xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision ffc9de54938a9574f465b83a71d5252cfd37cf30)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.frontend.icache._
26import xiangshan.backend.CtrlToFtqIO
27import xiangshan.backend.decode.ImmUnion
28import utility.ChiselDB
29
30class FtqDebugBundle extends Bundle {
31  val pc = UInt(39.W)
32  val target = UInt(39.W)
33  val isBr = Bool()
34  val isJmp = Bool()
35  val isCall = Bool()
36  val isRet = Bool()
37  val misPred = Bool()
38  val isTaken = Bool()
39  val predStage = UInt(2.W)
40}
41
42class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
43  p => p(XSCoreParamsKey).FtqSize
44){
45}
46
47object FtqPtr {
48  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
49    val ptr = Wire(new FtqPtr)
50    ptr.flag := f
51    ptr.value := v
52    ptr
53  }
54  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
55    apply(!ptr.flag, ptr.value)
56  }
57}
58
59class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
60
61  val io = IO(new Bundle() {
62    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
63    val ren = Input(Vec(numRead, Bool()))
64    val rdata = Output(Vec(numRead, gen))
65    val waddr = Input(UInt(log2Up(FtqSize).W))
66    val wen = Input(Bool())
67    val wdata = Input(gen)
68  })
69
70  for(i <- 0 until numRead){
71    val sram = Module(new SRAMTemplate(gen, FtqSize))
72    sram.io.r.req.valid := io.ren(i)
73    sram.io.r.req.bits.setIdx := io.raddr(i)
74    io.rdata(i) := sram.io.r.resp.data(0)
75    sram.io.w.req.valid := io.wen
76    sram.io.w.req.bits.setIdx := io.waddr
77    sram.io.w.req.bits.data := VecInit(io.wdata)
78  }
79
80}
81
82class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
83  val startAddr = UInt(VAddrBits.W)
84  val nextLineAddr = UInt(VAddrBits.W)
85  val isNextMask = Vec(PredictWidth, Bool())
86  val fallThruError = Bool()
87  // val carry = Bool()
88  def getPc(offset: UInt) = {
89    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
90    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
91    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
92        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
93  }
94  def fromBranchPrediction(resp: BranchPredictionBundle) = {
95    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
96    this.startAddr := resp.pc(3)
97    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
98    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
99      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
100    ))
101    this.fallThruError := resp.fallThruError(3)
102    this
103  }
104  override def toPrintable: Printable = {
105    p"startAddr:${Hexadecimal(startAddr)}"
106  }
107}
108
109class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
110  val brMask = Vec(PredictWidth, Bool())
111  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
112  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
113  val jalTarget = UInt(VAddrBits.W)
114  val rvcMask = Vec(PredictWidth, Bool())
115  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
116  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
117  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
118  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
119
120  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
121    val pds = pdWb.pd
122    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
123    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
124    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
125                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
126    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
127    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
128    this.jalTarget := pdWb.jalTarget
129  }
130
131  def toPd(offset: UInt) = {
132    require(offset.getWidth == log2Ceil(PredictWidth))
133    val pd = Wire(new PreDecodeInfo)
134    pd.valid := true.B
135    pd.isRVC := rvcMask(offset)
136    val isBr = brMask(offset)
137    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
138    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
139    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
140    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
141    pd
142  }
143}
144
145
146
147class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {}
148
149class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
150  val meta = UInt(MaxMetaLength.W)
151}
152
153class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
154  val target = UInt(VAddrBits.W)
155  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
156}
157
158
159class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
160  val ptr = Output(new FtqPtr)
161  val offset = Output(UInt(log2Ceil(PredictWidth).W))
162  val data = Input(gen)
163  def apply(ptr: FtqPtr, offset: UInt) = {
164    this.ptr := ptr
165    this.offset := offset
166    this.data
167  }
168}
169
170
171class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
172  val redirect = Valid(new BranchPredictionRedirect)
173  val update = Valid(new BranchPredictionUpdate)
174  val enq_ptr = Output(new FtqPtr)
175}
176
177class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
178  val req = Decoupled(new FetchRequestBundle)
179  val redirect = Valid(new BranchPredictionRedirect)
180  val topdown_redirect = Valid(new BranchPredictionRedirect)
181  val flushFromBpu = new Bundle {
182    // when ifu pipeline is not stalled,
183    // a packet from bpu s3 can reach f1 at most
184    val s2 = Valid(new FtqPtr)
185    val s3 = Valid(new FtqPtr)
186    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
187      src.valid && !isAfter(src.bits, idx_to_flush)
188    }
189    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
190    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
191  }
192}
193
194class FtqToICacheIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
195  //NOTE: req.bits must be prepare in T cycle
196  // while req.valid is set true in T + 1 cycle
197  val req = Decoupled(new FtqToICacheRequestBundle)
198}
199
200trait HasBackendRedirectInfo extends HasXSParameter {
201  def numRedirectPcRead = exuParameters.JmpCnt + exuParameters.AluCnt + 1
202  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
203}
204
205class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
206  // write to backend pc mem
207  val pc_mem_wen = Output(Bool())
208  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
209  val pc_mem_wdata = Output(new Ftq_RF_Components)
210  // newest target
211  val newest_entry_target = Output(UInt(VAddrBits.W))
212  val newest_entry_ptr = Output(new FtqPtr)
213}
214
215
216class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
217  val io = IO(new Bundle {
218    val start_addr = Input(UInt(VAddrBits.W))
219    val old_entry = Input(new FTBEntry)
220    val pd = Input(new Ftq_pd_Entry)
221    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
222    val target = Input(UInt(VAddrBits.W))
223    val hit = Input(Bool())
224    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
225
226    val new_entry = Output(new FTBEntry)
227    val new_br_insert_pos = Output(Vec(numBr, Bool()))
228    val taken_mask = Output(Vec(numBr, Bool()))
229    val jmp_taken = Output(Bool())
230    val mispred_mask = Output(Vec(numBr+1, Bool()))
231
232    // for perf counters
233    val is_init_entry = Output(Bool())
234    val is_old_entry = Output(Bool())
235    val is_new_br = Output(Bool())
236    val is_jalr_target_modified = Output(Bool())
237    val is_always_taken_modified = Output(Bool())
238    val is_br_full = Output(Bool())
239  })
240
241  // no mispredictions detected at predecode
242  val hit = io.hit
243  val pd = io.pd
244
245  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
246
247
248  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
249  val entry_has_jmp = pd.jmpInfo.valid
250  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
251  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
252  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
253  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
254  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
255  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
256
257  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
258  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
259
260  def carryPos = log2Ceil(PredictWidth)+instOffsetBits
261  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
262  // if not hit, establish a new entry
263  init_entry.valid := true.B
264  // tag is left for ftb to assign
265
266  // case br
267  val init_br_slot = init_entry.getSlotForBr(0)
268  when (cfi_is_br) {
269    init_br_slot.valid := true.B
270    init_br_slot.offset := io.cfiIndex.bits
271    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
272    init_entry.always_taken(0) := true.B // set to always taken on init
273  }
274
275  // case jmp
276  when (entry_has_jmp) {
277    init_entry.tailSlot.offset := pd.jmpOffset
278    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
279    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
280  }
281
282  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
283  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
284  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B)
285  init_entry.isJalr := new_jmp_is_jalr
286  init_entry.isCall := new_jmp_is_call
287  init_entry.isRet  := new_jmp_is_ret
288  // that means fall thru points to the middle of an inst
289  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset)
290
291  // if hit, check whether a new cfi(only br is possible) is detected
292  val oe = io.old_entry
293  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
294  val br_recorded = br_recorded_vec.asUInt.orR
295  val is_new_br = cfi_is_br && !br_recorded
296  val new_br_offset = io.cfiIndex.bits
297  // vec(i) means new br will be inserted BEFORE old br(i)
298  val allBrSlotsVec = oe.allSlotsForBr
299  val new_br_insert_onehot = VecInit((0 until numBr).map{
300    i => i match {
301      case 0 =>
302        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
303      case idx =>
304        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
305        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
306    }
307  })
308
309  val old_entry_modified = WireInit(io.old_entry)
310  for (i <- 0 until numBr) {
311    val slot = old_entry_modified.allSlotsForBr(i)
312    when (new_br_insert_onehot(i)) {
313      slot.valid := true.B
314      slot.offset := new_br_offset
315      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
316      old_entry_modified.always_taken(i) := true.B
317    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
318      old_entry_modified.always_taken(i) := false.B
319      // all other fields remain unchanged
320    }.otherwise {
321      // case i == 0, remain unchanged
322      if (i != 0) {
323        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
324        when (!noNeedToMoveFromFormerSlot) {
325          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
326          old_entry_modified.always_taken(i) := oe.always_taken(i)
327        }
328      }
329    }
330  }
331
332  // two circumstances:
333  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
334  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
335  //        the previous last br or the new br
336  val may_have_to_replace = oe.noEmptySlotForNewBr
337  val pft_need_to_change = is_new_br && may_have_to_replace
338  // it should either be the given last br or the new br
339  when (pft_need_to_change) {
340    val new_pft_offset =
341      Mux(!new_br_insert_onehot.asUInt.orR,
342        new_br_offset, oe.allSlotsForBr.last.offset)
343
344    // set jmp to invalid
345    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
346    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
347    old_entry_modified.last_may_be_rvi_call := false.B
348    old_entry_modified.isCall := false.B
349    old_entry_modified.isRet := false.B
350    old_entry_modified.isJalr := false.B
351  }
352
353  val old_entry_jmp_target_modified = WireInit(oe)
354  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
355  val old_tail_is_jmp = !oe.tailSlot.sharing
356  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
357  when (jalr_target_modified) {
358    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
359    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
360  }
361
362  val old_entry_always_taken = WireInit(oe)
363  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
364  for (i <- 0 until numBr) {
365    old_entry_always_taken.always_taken(i) :=
366      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
367    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
368  }
369  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
370
371
372
373  val derived_from_old_entry =
374    Mux(is_new_br, old_entry_modified,
375      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
376
377
378  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
379
380  io.new_br_insert_pos := new_br_insert_onehot
381  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
382    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
383  })
384  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
385  for (i <- 0 until numBr) {
386    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
387  }
388  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
389
390  // for perf counters
391  io.is_init_entry := !hit
392  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
393  io.is_new_br := hit && is_new_br
394  io.is_jalr_target_modified := hit && jalr_target_modified
395  io.is_always_taken_modified := hit && always_taken_modified
396  io.is_br_full := hit && is_new_br && may_have_to_replace
397}
398
399class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
400  val io = IO(new Bundle {
401    val ifuPtr_w       = Input(new FtqPtr)
402    val ifuPtrPlus1_w  = Input(new FtqPtr)
403    val ifuPtrPlus2_w  = Input(new FtqPtr)
404    val commPtr_w      = Input(new FtqPtr)
405    val commPtrPlus1_w = Input(new FtqPtr)
406    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
407    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
408    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
409    val commPtr_rdata      = Output(new Ftq_RF_Components)
410    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
411
412    val other_raddrs = Input(Vec(numOtherReads, UInt(log2Ceil(FtqSize).W)))
413    val other_rdatas = Output(Vec(numOtherReads, new Ftq_RF_Components))
414
415    val wen = Input(Bool())
416    val waddr = Input(UInt(log2Ceil(FtqSize).W))
417    val wdata = Input(new Ftq_RF_Components)
418  })
419
420  val num_pc_read = numOtherReads + 5
421  val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize,
422    num_pc_read, 1, "FtqPC"))
423  mem.io.wen(0)   := io.wen
424  mem.io.waddr(0) := io.waddr
425  mem.io.wdata(0) := io.wdata
426
427  // read one cycle ahead for ftq local reads
428  val raddr_vec = VecInit(io.other_raddrs ++
429    Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value, io.commPtrPlus1_w.value, io.commPtr_w.value))
430
431  mem.io.raddr := raddr_vec
432
433  io.other_rdatas       := mem.io.rdata.dropRight(5)
434  io.ifuPtr_rdata       := mem.io.rdata.dropRight(4).last
435  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(3).last
436  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(2).last
437  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
438  io.commPtr_rdata      := mem.io.rdata.last
439}
440
441class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
442  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
443  with HasICacheParameters{
444  val io = IO(new Bundle {
445    val fromBpu = Flipped(new BpuToFtqIO)
446    val fromIfu = Flipped(new IfuToFtqIO)
447    val fromBackend = Flipped(new CtrlToFtqIO)
448
449    val toBpu = new FtqToBpuIO
450    val toIfu = new FtqToIfuIO
451    val toICache = new FtqToICacheIO
452    val toBackend = new FtqToCtrlIO
453
454    val toPrefetch = new FtqPrefechBundle
455
456    val bpuInfo = new Bundle {
457      val bpRight = Output(UInt(XLEN.W))
458      val bpWrong = Output(UInt(XLEN.W))
459    }
460
461    val mmioCommitRead = Flipped(new mmioCommitRead)
462
463    // for perf
464    val ControlBTBMissBubble = Output(Bool())
465    val TAGEMissBubble = Output(Bool())
466    val SCMissBubble = Output(Bool())
467    val ITTAGEMissBubble = Output(Bool())
468    val RASMissBubble = Output(Bool())
469  })
470  io.bpuInfo := DontCare
471
472  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
473  dontTouch(topdown_stage)
474  // only driven by clock, not valid-ready
475  topdown_stage := io.fromBpu.resp.bits.topdown_info
476  io.toIfu.req.bits.topdown_info := topdown_stage
477
478  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
479
480  val backendRedirect = Wire(Valid(new BranchPredictionRedirect))
481  val backendRedirectReg = RegNext(backendRedirect)
482
483  val stage2Flush = backendRedirect.valid
484  val backendFlush = stage2Flush || RegNext(stage2Flush)
485  val ifuFlush = Wire(Bool())
486
487  val flush = stage2Flush || RegNext(stage2Flush)
488
489  val allowBpuIn, allowToIfu = WireInit(false.B)
490  val flushToIfu = !allowToIfu
491  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
492  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
493
494  def copyNum = 5
495  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
496  val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
497  val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U))
498  val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
499  val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
500  val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
501  require(FtqSize >= 4)
502  val ifuPtr_write       = WireInit(ifuPtr)
503  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
504  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
505  val ifuWbPtr_write     = WireInit(ifuWbPtr)
506  val commPtr_write      = WireInit(commPtr)
507  val commPtrPlus1_write = WireInit(commPtrPlus1)
508  ifuPtr       := ifuPtr_write
509  ifuPtrPlus1  := ifuPtrPlus1_write
510  ifuPtrPlus2  := ifuPtrPlus2_write
511  ifuWbPtr     := ifuWbPtr_write
512  commPtr      := commPtr_write
513  commPtrPlus1 := commPtrPlus1_write
514  copied_ifu_ptr.map{ptr =>
515    ptr := ifuPtr_write
516    dontTouch(ptr)
517  }
518  val validEntries = distanceBetween(bpuPtr, commPtr)
519  val canCommit = Wire(Bool())
520
521  // **********************************************************************
522  // **************************** enq from bpu ****************************
523  // **********************************************************************
524  val new_entry_ready = validEntries < FtqSize.U || canCommit
525  io.fromBpu.resp.ready := new_entry_ready
526
527  val bpu_s2_resp = io.fromBpu.resp.bits.s2
528  val bpu_s3_resp = io.fromBpu.resp.bits.s3
529  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
530  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
531
532  io.toBpu.enq_ptr := bpuPtr
533  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
534  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
535
536  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
537  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdxForFtq
538  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
539  val bpu_in_resp_idx = bpu_in_resp_ptr.value
540
541  // read ports:      prefetchReq ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
542  val ftq_pc_mem = Module(new FtqPcMemWrapper(1))
543  // resp from uBTB
544  ftq_pc_mem.io.wen := bpu_in_fire
545  ftq_pc_mem.io.waddr := bpu_in_resp_idx
546  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
547
548  //                                                            ifuRedirect + backendRedirect + commit
549  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
550  // these info is intended to enq at the last stage of bpu
551  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
552  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
553  ftq_redirect_sram.io.wdata := io.fromBpu.resp.bits.last_stage_spec_info
554  println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3")
555  println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3")
556
557  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
558  // these info is intended to enq at the last stage of bpu
559  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
560  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
561  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta
562  //                                                            ifuRedirect + backendRedirect + commit
563  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
564  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid(3)
565  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
566  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
567
568
569  // multi-write
570  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
571  val newest_entry_target = Reg(UInt(VAddrBits.W))
572  val newest_entry_ptr = Reg(new FtqPtr)
573  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
574  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
575  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
576
577  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
578  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
579    VecInit(Seq.fill(PredictWidth)(c_invalid))
580  }))
581
582  val f_to_send :: f_sent :: Nil = Enum(2)
583  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
584
585  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
586  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
587
588  // modify registers one cycle later to cut critical path
589  val last_cycle_bpu_in = RegNext(bpu_in_fire)
590  val last_cycle_bpu_in_ptr = RegNext(bpu_in_resp_ptr)
591  val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value
592  val last_cycle_bpu_target = RegNext(bpu_in_resp.getTarget(3))
593  val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex(3))
594  val last_cycle_bpu_in_stage = RegNext(bpu_in_stage)
595
596  def extra_copyNum_for_commitStateQueue = 2
597  val copied_last_cycle_bpu_in = VecInit(Seq.fill(copyNum+extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
598  val copied_last_cycle_bpu_in_ptr_for_ftq = VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_resp_ptr)))
599
600  when (last_cycle_bpu_in) {
601    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
602    cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
603    pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
604
605    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
606    newest_entry_target := last_cycle_bpu_target
607    newest_entry_ptr := last_cycle_bpu_in_ptr
608  }
609
610  // reduce fanout by delay write for a cycle
611  when (RegNext(last_cycle_bpu_in)) {
612    mispredict_vec(RegNext(last_cycle_bpu_in_idx)) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
613  }
614
615  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
616  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
617  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
618    case ((in, ptr), i) =>
619      when (in) {
620        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
621        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
622        for (j <- 0 until perSetEntries) {
623          when (ptr.value === (i*perSetEntries+j).U) {
624            commitStateQueue(i*perSetEntries+j) := VecInit(Seq.fill(PredictWidth)(c_invalid))
625          }
626        }
627      }
628  }
629
630  // num cycle is fixed
631  io.toBackend.newest_entry_ptr := RegNext(newest_entry_ptr)
632  io.toBackend.newest_entry_target := RegNext(newest_entry_target)
633
634
635  bpuPtr := bpuPtr + enq_fire
636  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
637  when (io.toIfu.req.fire && allowToIfu) {
638    ifuPtr_write := ifuPtrPlus1
639    ifuPtrPlus1_write := ifuPtrPlus2
640    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
641  }
642
643  // only use ftb result to assign hit status
644  when (bpu_s2_resp.valid(3)) {
645    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
646  }
647
648
649  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
650  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
651  when (bpu_s2_redirect) {
652    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
653    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
654    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
655    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
656      ifuPtr_write := bpu_s2_resp.ftq_idx
657      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
658      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
659    }
660  }
661
662  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
663  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
664  when (bpu_s3_redirect) {
665    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
666    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
667    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
668    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
669      ifuPtr_write := bpu_s3_resp.ftq_idx
670      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
671      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
672    }
673  }
674
675  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
676  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
677
678  (0 until copyNum).map{i =>
679    XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n")
680  }
681
682  // ****************************************************************
683  // **************************** to ifu ****************************
684  // ****************************************************************
685  // 0  for ifu, and 1-4 for ICache
686  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, enable=bpu_in_fire)
687  val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, enable=bpu_in_fire)))
688  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
689  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
690  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
691
692  val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegNext(bpu_in_resp_ptr)))
693  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
694
695  // read pc and target
696  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
697  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
698  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
699  ftq_pc_mem.io.commPtr_w      := commPtr_write
700  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
701
702
703  io.toIfu.req.bits.ftqIdx := ifuPtr
704
705  val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components))
706  val toICacheEntryToSend = Wire(Vec(copyNum,Bool()))
707  val toIfuPcBundle = Wire(new Ftq_RF_Components)
708  val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
709  val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
710  val entry_next_addr  = Wire(UInt(VAddrBits.W))
711
712  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
713  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
714  val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this
715
716  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1))))
717  val copied_ifu_ptr_to_send   = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
718
719  for(i <- 0 until copyNum){
720    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){
721      toICachePcBundle(i) := copied_bpu_in_bypass_buf(i)
722      toICacheEntryToSend(i)   := true.B
723    }.elsewhen(copied_last_cycle_to_ifu_fire(i)){
724      toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i)
725      toICacheEntryToSend(i)   := copied_ifu_plus1_to_send(i)
726    }.otherwise{
727      toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i)
728      toICacheEntryToSend(i)   := copied_ifu_ptr_to_send(i)
729    }
730  }
731
732  // TODO: reconsider target address bypass logic
733  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
734    toIfuPcBundle := bpu_in_bypass_buf_for_ifu
735    entry_is_to_send := true.B
736    entry_next_addr := last_cycle_bpu_target
737    entry_ftq_offset := last_cycle_cfiIndex
738    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
739  }.elsewhen (last_cycle_to_ifu_fire) {
740    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
741    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
742                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
743    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
744                          bpu_in_bypass_buf_for_ifu.startAddr,
745                          Mux(ifuPtr === newest_entry_ptr,
746                            newest_entry_target,
747                            RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2
748  }.otherwise {
749    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
750    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
751                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
752    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
753                          bpu_in_bypass_buf_for_ifu.startAddr,
754                          Mux(ifuPtr === newest_entry_ptr,
755                            newest_entry_target,
756                            RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1
757  }
758
759  io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
760  io.toIfu.req.bits.nextStartAddr := entry_next_addr
761  io.toIfu.req.bits.ftqOffset := entry_ftq_offset
762  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
763
764  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
765  io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)}
766  io.toICache.req.bits.pcMemRead.zipWithIndex.map{case(copy,i) => copy.fromFtqPcBundle(toICachePcBundle(i))}
767  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
768  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
769  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
770  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
771  // }
772
773  // TODO: remove this
774  XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
775          p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n")
776
777  // when fall through is smaller in value than start address, there must be a false hit
778  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
779    when (io.toIfu.req.fire &&
780      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
781      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
782    ) {
783      entry_hit_status(ifuPtr.value) := h_false_hit
784      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
785    }
786    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
787  }
788
789  XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
790    io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr))
791
792  val ifu_req_should_be_flushed =
793    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
794    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
795
796    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
797      entry_fetch_status(ifuPtr.value) := f_sent
798    }
799
800  // *********************************************************************
801  // **************************** wb from ifu ****************************
802  // *********************************************************************
803  val pdWb = io.fromIfu.pdWb
804  val pds = pdWb.bits.pd
805  val ifu_wb_valid = pdWb.valid
806  val ifu_wb_idx = pdWb.bits.ftqIdx.value
807  // read ports:                                                         commit update
808  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
809  ftq_pd_mem.io.wen(0) := ifu_wb_valid
810  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
811  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
812
813  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
814  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
815  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
816  val pd_reg       = RegEnable(pds,             pdWb.valid)
817  val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid)
818  val wb_idx_reg   = RegEnable(ifu_wb_idx,      pdWb.valid)
819
820  when (ifu_wb_valid) {
821    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
822      case (v, inRange) => v && inRange
823    })
824    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
825      case (qe, v) => when (v) { qe := c_valid }
826    }
827  }
828
829  when (ifu_wb_valid) {
830    ifuWbPtr_write := ifuWbPtr + 1.U
831  }
832
833  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
834
835  ftb_entry_mem.io.raddr.head := ifu_wb_idx
836  val has_false_hit = WireInit(false.B)
837  when (RegNext(hit_pd_valid)) {
838    // check for false hit
839    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
840    val brSlots = pred_ftb_entry.brSlots
841    val tailSlot = pred_ftb_entry.tailSlot
842    // we check cfis that bpu predicted
843
844    // bpu predicted branches but denied by predecode
845    val br_false_hit =
846      brSlots.map{
847        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
848      }.reduce(_||_) ||
849      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
850        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
851
852    val jmpOffset = tailSlot.offset
853    val jmp_pd = pd_reg(jmpOffset)
854    val jal_false_hit = pred_ftb_entry.jmpValid &&
855      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
856       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
857       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
858       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
859      )
860
861    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
862    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
863
864    // assert(!has_false_hit)
865  }
866
867  when (has_false_hit) {
868    entry_hit_status(wb_idx_reg) := h_false_hit
869  }
870
871
872  // **********************************************************************
873  // ***************************** to backend *****************************
874  // **********************************************************************
875  // to backend pc mem / target
876  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
877  io.toBackend.pc_mem_waddr := RegNext(last_cycle_bpu_in_idx)
878  io.toBackend.pc_mem_wdata := RegNext(bpu_in_bypass_buf_for_ifu)
879
880  // *******************************************************************************
881  // **************************** redirect from backend ****************************
882  // *******************************************************************************
883
884  // redirect read cfiInfo, couples to redirectGen s2
885  ftq_redirect_sram.io.ren.init.last := backendRedirect.valid
886  ftq_redirect_sram.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
887
888  ftb_entry_mem.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
889
890  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
891  val fromBackendRedirect = WireInit(backendRedirectReg)
892  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
893  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
894
895
896  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
897  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
898
899  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
900  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
901  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(r_ftb_entry.brSlots(0).offset === r_ftqOffset,
902      r_ftb_entry.brSlots(0).sc, r_ftb_entry.tailSlot.sc)
903
904  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
905    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
906      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
907      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
908
909    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
910        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
911  }.otherwise {
912    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
913    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
914  }
915
916
917  // ***************************************************************************
918  // **************************** redirect from ifu ****************************
919  // ***************************************************************************
920  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
921  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
922  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
923  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
924  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
925  fromIfuRedirect.bits.BTBMissBubble := true.B
926  fromIfuRedirect.bits.debugIsMemVio := false.B
927  fromIfuRedirect.bits.debugIsCtrl := false.B
928
929  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
930  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
931  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
932  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
933  ifuRedirectCfiUpdate.target := pdWb.bits.target
934  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
935  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
936
937  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
938  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
939  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
940
941  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
942  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
943
944  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
945
946  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
947  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
948  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
949    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
950  }
951
952  when (ifuRedirectReg.valid) {
953    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
954  } .elsewhen(RegNext(pdWb.valid)) {
955    // if pdWb and no redirect, set to false
956    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
957  }
958
959  // *********************************************************************
960  // **************************** wb from exu ****************************
961  // *********************************************************************
962
963  backendRedirect.valid := io.fromBackend.redirect.valid
964  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
965  backendRedirect.bits.BTBMissBubble := false.B
966
967
968  def extractRedirectInfo(wb: Valid[Redirect]) = {
969    val ftqPtr = wb.bits.ftqIdx
970    val ftqOffset = wb.bits.ftqOffset
971    val taken = wb.bits.cfiUpdate.taken
972    val mispred = wb.bits.cfiUpdate.isMisPred
973    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
974  }
975
976  // fix mispredict entry
977  val lastIsMispredict = RegNext(
978    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
979  )
980
981  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
982    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
983    val r_idx = r_ptr.value
984    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
985    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
986    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
987      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
988    } .elsewhen (r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
989      cfiIndex_vec(r_idx).valid :=false.B
990    }
991    when (cfiIndex_bits_wen) {
992      cfiIndex_vec(r_idx).bits := r_offset
993    }
994    newest_entry_target := redirect.bits.cfiUpdate.target
995    newest_entry_ptr := r_ptr
996    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
997    if (isBackend) {
998      mispredict_vec(r_idx)(r_offset) := r_mispred
999    }
1000  }
1001
1002  when(backendRedirectReg.valid) {
1003    updateCfiInfo(backendRedirectReg)
1004  }.elsewhen (ifuRedirectToBpu.valid) {
1005    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
1006  }
1007
1008  when (backendRedirectReg.valid) {
1009    when (backendRedirectReg.bits.ControlRedirectBubble) {
1010      when (fromBackendRedirect.bits.ControlBTBMissBubble) {
1011        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1012        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1013      } .elsewhen (fromBackendRedirect.bits.TAGEMissBubble) {
1014        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1015        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1016      } .elsewhen (fromBackendRedirect.bits.SCMissBubble) {
1017        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
1018        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1019      } .elsewhen (fromBackendRedirect.bits.ITTAGEMissBubble) {
1020        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1021        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1022      } .elsewhen (fromBackendRedirect.bits.RASMissBubble) {
1023        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
1024        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1025      }
1026
1027
1028    } .elsewhen (backendRedirectReg.bits.MemVioRedirectBubble) {
1029      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1030      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1031    } .otherwise {
1032      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1033      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1034    }
1035  } .elsewhen (ifuRedirectReg.valid) {
1036    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1037    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1038  }
1039
1040  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1041  io.TAGEMissBubble := fromBackendRedirect.bits.TAGEMissBubble
1042  io.SCMissBubble := fromBackendRedirect.bits.SCMissBubble
1043  io.ITTAGEMissBubble := fromBackendRedirect.bits.ITTAGEMissBubble
1044  io.RASMissBubble := fromBackendRedirect.bits.RASMissBubble
1045
1046  // ***********************************************************************************
1047  // **************************** flush ptr and state queue ****************************
1048  // ***********************************************************************************
1049
1050  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1051
1052  // when redirect, we should reset ptrs and status queues
1053  when(redirectVec.map(r => r.valid).reduce(_||_)){
1054    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1055    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1056    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1057    val next = idx + 1.U
1058    bpuPtr := next
1059    copied_bpu_ptr.map(_ := next)
1060    ifuPtr_write := next
1061    ifuWbPtr_write := next
1062    ifuPtrPlus1_write := idx + 2.U
1063    ifuPtrPlus2_write := idx + 3.U
1064
1065  }
1066  when(RegNext(redirectVec.map(r => r.valid).reduce(_||_))){
1067    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1068    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1069    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1070    when (RegNext(notIfu)) {
1071      commitStateQueue(RegNext(idx.value)).zipWithIndex.foreach({ case (s, i) =>
1072        when(i.U > RegNext(offset) || i.U === RegNext(offset) && RegNext(flushItSelf)){
1073          s := c_invalid
1074        }
1075      })
1076    }
1077  }
1078
1079
1080  // only the valid bit is actually needed
1081  io.toIfu.redirect.bits    := backendRedirect.bits
1082  io.toIfu.redirect.valid   := stage2Flush
1083  io.toIfu.topdown_redirect := fromBackendRedirect
1084
1085  // commit
1086  for (c <- io.fromBackend.rob_commits) {
1087    when(c.valid) {
1088      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
1089      // TODO: remove this
1090      // For instruction fusions, we also update the next instruction
1091      when (c.bits.commitType === 4.U) {
1092        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
1093      }.elsewhen(c.bits.commitType === 5.U) {
1094        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
1095      }.elsewhen(c.bits.commitType === 6.U) {
1096        val index = (c.bits.ftqIdx + 1.U).value
1097        commitStateQueue(index)(0) := c_commited
1098      }.elsewhen(c.bits.commitType === 7.U) {
1099        val index = (c.bits.ftqIdx + 1.U).value
1100        commitStateQueue(index)(1) := c_commited
1101      }
1102    }
1103  }
1104
1105  // ****************************************************************
1106  // **************************** to bpu ****************************
1107  // ****************************************************************
1108
1109  io.toBpu.redirect := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1110
1111  XSError(io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr), "Ftq received a redirect after its commit, check backend or replay")
1112
1113  val may_have_stall_from_bpu = Wire(Bool())
1114  val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1115  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1116  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1117    Cat(commitStateQueue(commPtr.value).map(s => {
1118      s === c_invalid || s === c_commited
1119    })).andR()
1120
1121  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1122  val mmioLastCommit = isBefore(commPtr, mmioReadPtr) && (isAfter(ifuPtr,mmioReadPtr)  ||  mmioReadPtr ===   ifuPtr) &&
1123                       Cat(commitStateQueue(mmioReadPtr.value).map(s => { s === c_invalid || s === c_commited})).andR()
1124  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1125
1126  // commit reads
1127  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1128  val commit_target =
1129    Mux(RegNext(commPtr === newest_entry_ptr),
1130      RegNext(newest_entry_target),
1131      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr))
1132  ftq_pd_mem.io.raddr.last := commPtr.value
1133  val commit_pd = ftq_pd_mem.io.rdata.last
1134  ftq_redirect_sram.io.ren.last := canCommit
1135  ftq_redirect_sram.io.raddr.last := commPtr.value
1136  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
1137  ftq_meta_1r_sram.io.ren(0) := canCommit
1138  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1139  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
1140  ftb_entry_mem.io.raddr.last := commPtr.value
1141  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
1142
1143  // need one cycle to read mem and srams
1144  val do_commit_ptr = RegNext(commPtr)
1145  val do_commit = RegNext(canCommit, init=false.B)
1146  when (canCommit) {
1147    commPtr_write := commPtrPlus1
1148    commPtrPlus1_write := commPtrPlus1 + 1.U
1149  }
1150  val commit_state = RegNext(commitStateQueue(commPtr.value))
1151  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1152  val do_commit_cfi = WireInit(cfiIndex_vec(do_commit_ptr.value))
1153  //
1154  //when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1155  //  can_commit_cfi.valid := false.B
1156  //}
1157  val commit_cfi = RegNext(can_commit_cfi)
1158  val debug_cfi = commitStateQueue(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_commited && do_commit_cfi.valid
1159
1160  val commit_mispredict  : Vec[Bool] = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
1161    case (mis, state) => mis && state === c_commited
1162  })
1163  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_commited)) // [PredictWidth]
1164  val can_commit_hit                 = entry_hit_status(commPtr.value)
1165  val commit_hit                     = RegNext(can_commit_hit)
1166  val diff_commit_target             = RegNext(update_target(commPtr.value)) // TODO: remove this
1167  val commit_stage                   = RegNext(pred_stage(commPtr.value))
1168  val commit_valid                   = commit_hit === h_hit || commit_cfi.valid // hit or taken
1169
1170  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1171  switch (bpu_ftb_update_stall) {
1172    is (0.U) {
1173      when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1174        bpu_ftb_update_stall := 2.U // 2-cycle stall
1175      }
1176    }
1177    is (2.U) {
1178      bpu_ftb_update_stall := 1.U
1179    }
1180    is (1.U) {
1181      bpu_ftb_update_stall := 0.U
1182    }
1183    is (3.U) {
1184      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1185    }
1186  }
1187
1188  // TODO: remove this
1189  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1190
1191  io.toBpu.update := DontCare
1192  io.toBpu.update.valid := commit_valid && do_commit
1193  val update = io.toBpu.update.bits
1194  update.false_hit   := commit_hit === h_false_hit
1195  update.pc          := commit_pc_bundle.startAddr
1196  update.meta        := commit_meta.meta
1197  update.cfi_idx     := commit_cfi
1198  update.full_target := commit_target
1199  update.from_stage  := commit_stage
1200  update.spec_info   := commit_spec_meta
1201  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1202
1203  val commit_real_hit = commit_hit === h_hit
1204  val update_ftb_entry = update.ftb_entry
1205
1206  val ftbEntryGen = Module(new FTBEntryGen).io
1207  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1208  ftbEntryGen.old_entry      := commit_ftb_entry
1209  ftbEntryGen.pd             := commit_pd
1210  ftbEntryGen.cfiIndex       := commit_cfi
1211  ftbEntryGen.target         := commit_target
1212  ftbEntryGen.hit            := commit_real_hit
1213  ftbEntryGen.mispredict_vec := commit_mispredict
1214
1215  update_ftb_entry         := ftbEntryGen.new_entry
1216  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1217  update.mispred_mask      := ftbEntryGen.mispred_mask
1218  update.old_entry         := ftbEntryGen.is_old_entry
1219  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1220  update.br_taken_mask     := ftbEntryGen.taken_mask
1221  update.br_committed      := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1222    case (valid, offset) => valid && commit_instCommited(offset)
1223  }
1224  update.jmp_taken         := ftbEntryGen.jmp_taken
1225
1226  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1227  // update.full_pred.jalr_target := commit_target
1228  // update.full_pred.hit := true.B
1229  // when (update.full_pred.is_jalr) {
1230  //   update.full_pred.targets.last := commit_target
1231  // }
1232
1233  // ****************************************************************
1234  // *********************** to prefetch ****************************
1235  // ****************************************************************
1236
1237  ftq_pc_mem.io.other_raddrs(0) := DontCare
1238  if(cacheParams.hasPrefetch){
1239    val prefetchPtr = RegInit(FtqPtr(false.B, 0.U))
1240    val diff_prefetch_addr = WireInit(update_target(prefetchPtr.value)) //TODO: remove this
1241    // TODO : MUST WIDER
1242    prefetchPtr := prefetchPtr + io.toPrefetch.req.fire()
1243
1244    val prefetch_too_late = (isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr)) || (prefetchPtr === ifuPtr)
1245    when(prefetch_too_late){
1246      when(prefetchPtr =/= bpuPtr){
1247        prefetchPtr := bpuPtr - 1.U
1248      }.otherwise{
1249        prefetchPtr := ifuPtr
1250      }
1251    }
1252
1253    ftq_pc_mem.io.other_raddrs(0) := prefetchPtr.value
1254
1255    when (bpu_s2_redirect && !isBefore(prefetchPtr, bpu_s2_resp.ftq_idx)) {
1256      prefetchPtr := bpu_s2_resp.ftq_idx
1257    }
1258
1259    when (bpu_s3_redirect && !isBefore(prefetchPtr, bpu_s3_resp.ftq_idx)) {
1260      prefetchPtr := bpu_s3_resp.ftq_idx
1261      // XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
1262    }
1263
1264
1265    val prefetch_is_to_send = WireInit(entry_fetch_status(prefetchPtr.value) === f_to_send)
1266    val prefetch_addr = Wire(UInt(VAddrBits.W))
1267
1268    when (last_cycle_bpu_in && bpu_in_bypass_ptr === prefetchPtr) {
1269      prefetch_is_to_send := true.B
1270      prefetch_addr := last_cycle_bpu_target
1271      diff_prefetch_addr := last_cycle_bpu_target // TODO: remove this
1272    }.otherwise{
1273      prefetch_addr := RegNext( ftq_pc_mem.io.other_rdatas(0).startAddr)
1274    }
1275    io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && prefetch_is_to_send
1276    io.toPrefetch.req.bits.target := prefetch_addr
1277
1278    when(redirectVec.map(r => r.valid).reduce(_||_)){
1279      val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1280      val next = r.ftqIdx + 1.U
1281      prefetchPtr := next
1282    }
1283
1284    // TODO: remove this
1285    // XSError(io.toPrefetch.req.valid && diff_prefetch_addr =/= prefetch_addr,
1286    //         f"\nprefetch_req_target wrong! prefetchPtr: ${prefetchPtr}, prefetch_addr: ${Hexadecimal(prefetch_addr)} diff_prefetch_addr: ${Hexadecimal(diff_prefetch_addr)}\n")
1287
1288
1289    XSError(isBefore(bpuPtr, prefetchPtr) && !isFull(bpuPtr, prefetchPtr), "\nprefetchPtr is before bpuPtr!\n")
1290//    XSError(isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr), "\nifuPtr is before prefetchPtr!\n")
1291  }
1292  else {
1293    io.toPrefetch.req <> DontCare
1294  }
1295
1296  // ******************************************************************************
1297  // **************************** commit perf counters ****************************
1298  // ******************************************************************************
1299
1300  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
1301  val commit_mispred_mask = commit_mispredict.asUInt
1302  val commit_not_mispred_mask = ~commit_mispred_mask
1303
1304  val commit_br_mask = commit_pd.brMask.asUInt
1305  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1306  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
1307
1308  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1309
1310  val mbpRights = mbpInstrs & commit_not_mispred_mask
1311  val mbpWrongs = mbpInstrs & commit_mispred_mask
1312
1313  io.bpuInfo.bpRight := PopCount(mbpRights)
1314  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1315
1316  val isWriteFTQTable = WireInit(Constantin.createRecord("isWriteFTQTable" + p(XSCoreParamsKey).HartId.toString))
1317  val ftqBranchTraceDB = ChiselDB.createTable("FTQTable" + p(XSCoreParamsKey).HartId.toString, new FtqDebugBundle)
1318  // Cfi Info
1319  for (i <- 0 until PredictWidth) {
1320    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
1321    val v = commit_state(i) === c_commited
1322    val isBr = commit_pd.brMask(i)
1323    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1324    val isCfi = isBr || isJmp
1325    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1326    val misPred = commit_mispredict(i)
1327    // val ghist = commit_spec_meta.ghist.predHist
1328    val histPtr = commit_spec_meta.histPtr
1329    val predCycle = commit_meta.meta(63, 0)
1330    val target = commit_target
1331
1332    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1333    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1334    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1335    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1336    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1337    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1338    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1339
1340    val logbundle = Wire(new FtqDebugBundle)
1341    logbundle.pc := pc
1342    logbundle.target := target
1343    logbundle.isBr := isBr
1344    logbundle.isJmp := isJmp
1345    logbundle.isCall := isJmp && commit_pd.hasCall
1346    logbundle.isRet := isJmp && commit_pd.hasRet
1347    logbundle.misPred := misPred
1348    logbundle.isTaken := isTaken
1349    logbundle.predStage := commit_stage
1350
1351    ftqBranchTraceDB.log(
1352      data = logbundle /* hardware of type T */,
1353      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1354      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1355      clock = clock,
1356      reset = reset
1357    )
1358  }
1359
1360  val enq = io.fromBpu.resp
1361  val perf_redirect = backendRedirect
1362
1363  XSPerfAccumulate("entry", validEntries)
1364  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1365  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1366  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1367  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1368
1369  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1370
1371  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1372  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1373  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1374
1375  val from_bpu = io.fromBpu.resp.bits
1376  def in_entry_len_map_gen(resp: BpuToFtqBundle)(stage: String) = {
1377    val entry_len = (resp.last_stage_ftb_entry.getFallThrough(resp.s3.pc(3)) - resp.s3.pc(3)) >> instOffsetBits
1378    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1379    val entry_len_map = (1 to PredictWidth+1).map(i =>
1380      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.s3.valid(3))
1381    ).foldLeft(Map[String, UInt]())(_+_)
1382    entry_len_map
1383  }
1384  val s3_entry_len_map = in_entry_len_map_gen(from_bpu)("s3")
1385
1386  val to_ifu = io.toIfu.req.bits
1387
1388
1389
1390  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1391  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1392    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1393  ).foldLeft(Map[String, UInt]())(_+_)
1394
1395
1396
1397  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1398  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1399  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1400  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1401
1402
1403  val mbpBRights = mbpRights & commit_br_mask
1404  val mbpJRights = mbpRights & commit_jal_mask
1405  val mbpIRights = mbpRights & commit_jalr_mask
1406  val mbpCRights = mbpRights & commit_call_mask
1407  val mbpRRights = mbpRights & commit_ret_mask
1408
1409  val mbpBWrongs = mbpWrongs & commit_br_mask
1410  val mbpJWrongs = mbpWrongs & commit_jal_mask
1411  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1412  val mbpCWrongs = mbpWrongs & commit_call_mask
1413  val mbpRWrongs = mbpWrongs & commit_ret_mask
1414
1415  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1416
1417  def pred_stage_map(src: UInt, name: String) = {
1418    (0 until numBpStages).map(i =>
1419      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1420    ).foldLeft(Map[String, UInt]())(_+_)
1421  }
1422
1423  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1424  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1425  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1426  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1427  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1428  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1429
1430  val update_valid = io.toBpu.update.valid
1431  def u(cond: Bool) = update_valid && cond
1432  val ftb_false_hit = u(update.false_hit)
1433  // assert(!ftb_false_hit)
1434  val ftb_hit = u(commit_hit === h_hit)
1435
1436  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1437  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1438  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1439  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1440
1441  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1442
1443  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1444  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1445  val ftb_modified_entry_ifu_redirected = u(ifuRedirected(do_commit_ptr.value))
1446  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1447  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1448  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1449
1450  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1451  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1452  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1453    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1454  ).foldLeft(Map[String, UInt]())(_+_)
1455  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1456    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1457  ).foldLeft(Map[String, UInt]())(_+_)
1458
1459  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1460    f"ftq_has_entry_$i" ->( validEntries === i.U)
1461  ).foldLeft(Map[String, UInt]())(_+_)
1462
1463  val perfCountsMap = Map(
1464    "BpInstr" -> PopCount(mbpInstrs),
1465    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1466    "BpRight"  -> PopCount(mbpRights),
1467    "BpWrong"  -> PopCount(mbpWrongs),
1468    "BpBRight" -> PopCount(mbpBRights),
1469    "BpBWrong" -> PopCount(mbpBWrongs),
1470    "BpJRight" -> PopCount(mbpJRights),
1471    "BpJWrong" -> PopCount(mbpJWrongs),
1472    "BpIRight" -> PopCount(mbpIRights),
1473    "BpIWrong" -> PopCount(mbpIWrongs),
1474    "BpCRight" -> PopCount(mbpCRights),
1475    "BpCWrong" -> PopCount(mbpCWrongs),
1476    "BpRRight" -> PopCount(mbpRRights),
1477    "BpRWrong" -> PopCount(mbpRWrongs),
1478
1479    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1480    "ftb_hit"                      -> PopCount(ftb_hit),
1481    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1482    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1483    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1484    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1485    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1486    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1487    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1488    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1489    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1490    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1491  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++
1492  s3_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1493  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1494  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1495
1496  for((key, value) <- perfCountsMap) {
1497    XSPerfAccumulate(key, value)
1498  }
1499
1500  // --------------------------- Debug --------------------------------
1501  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1502  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1503  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1504  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1505  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1506    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1507  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1508
1509  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1510  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1511  //       case (((valid, pd), ans), taken) =>
1512  //       Mux(valid && pd.isBr,
1513  //         isWrong ^ Mux(ans.hit.asBool,
1514  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1515  //           !taken),
1516  //         !taken),
1517  //       false.B)
1518  //     }
1519  //   }
1520
1521  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1522  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1523  //       case (((valid, pd), ans), taken) =>
1524  //       Mux(valid && pd.isBr,
1525  //         isWrong ^ Mux(ans.hit.asBool,
1526  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1527  //           !taken),
1528  //         !taken),
1529  //       false.B)
1530  //     }
1531  //   }
1532
1533  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1534  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1535  //       case (((valid, pd), ans), taken) =>
1536  //       Mux(valid && pd.isBr,
1537  //         isWrong ^ (ans.taken.asBool === taken),
1538  //       false.B)
1539  //     }
1540  //   }
1541
1542  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1543  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1544  //       case (((valid, pd), ans), taken) =>
1545  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1546  //         isWrong ^ (!taken),
1547  //           false.B)
1548  //     }
1549  //   }
1550
1551  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1552  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1553  //       case (((valid, pd), ans), taken) =>
1554  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1555  //         isWrong ^ (ans.target === commitEntry.target),
1556  //           false.B)
1557  //     }
1558  //   }
1559
1560  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1561  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1562  //   // btb and ubtb pred jal and jalr as well
1563  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1564  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1565  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1566  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1567
1568  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1569  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1570
1571  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1572  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1573
1574  val perfEvents = Seq(
1575    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1576    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1577    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1578    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1579    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1580    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1581    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1582    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1583    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1584    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1585    ("BpRight                ", PopCount(mbpRights)                                                         ),
1586    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1587    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1588    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1589    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1590    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1591    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1592    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1593    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1594    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1595    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1596    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1597    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1598    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1599  )
1600  generatePerfEvent()
1601}