xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 8891a219bbc84f568e1d134854d8d5ed86d6d560)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.frontend.icache._
26import xiangshan.backend.CtrlToFtqIO
27import xiangshan.backend.decode.ImmUnion
28import utility.ChiselDB
29
30class FtqDebugBundle extends Bundle {
31  val pc = UInt(39.W)
32  val target = UInt(39.W)
33  val isBr = Bool()
34  val isJmp = Bool()
35  val isCall = Bool()
36  val isRet = Bool()
37  val misPred = Bool()
38  val isTaken = Bool()
39  val predStage = UInt(2.W)
40}
41
42class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
43  p => p(XSCoreParamsKey).FtqSize
44){
45}
46
47object FtqPtr {
48  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
49    val ptr = Wire(new FtqPtr)
50    ptr.flag := f
51    ptr.value := v
52    ptr
53  }
54  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
55    apply(!ptr.flag, ptr.value)
56  }
57}
58
59class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
60
61  val io = IO(new Bundle() {
62    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
63    val ren = Input(Vec(numRead, Bool()))
64    val rdata = Output(Vec(numRead, gen))
65    val waddr = Input(UInt(log2Up(FtqSize).W))
66    val wen = Input(Bool())
67    val wdata = Input(gen)
68  })
69
70  for(i <- 0 until numRead){
71    val sram = Module(new SRAMTemplate(gen, FtqSize))
72    sram.io.r.req.valid := io.ren(i)
73    sram.io.r.req.bits.setIdx := io.raddr(i)
74    io.rdata(i) := sram.io.r.resp.data(0)
75    sram.io.w.req.valid := io.wen
76    sram.io.w.req.bits.setIdx := io.waddr
77    sram.io.w.req.bits.data := VecInit(io.wdata)
78  }
79
80}
81
82class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
83  val startAddr = UInt(VAddrBits.W)
84  val nextLineAddr = UInt(VAddrBits.W)
85  val isNextMask = Vec(PredictWidth, Bool())
86  val fallThruError = Bool()
87  // val carry = Bool()
88  def getPc(offset: UInt) = {
89    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
90    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
91    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
92        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
93  }
94  def fromBranchPrediction(resp: BranchPredictionBundle) = {
95    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
96    this.startAddr := resp.pc(3)
97    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
98    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
99      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
100    ))
101    this.fallThruError := resp.fallThruError(3)
102    this
103  }
104  override def toPrintable: Printable = {
105    p"startAddr:${Hexadecimal(startAddr)}"
106  }
107}
108
109class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
110  val brMask = Vec(PredictWidth, Bool())
111  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
112  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
113  val jalTarget = UInt(VAddrBits.W)
114  val rvcMask = Vec(PredictWidth, Bool())
115  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
116  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
117  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
118  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
119
120  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
121    val pds = pdWb.pd
122    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
123    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
124    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
125                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
126    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
127    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
128    this.jalTarget := pdWb.jalTarget
129  }
130
131  def toPd(offset: UInt) = {
132    require(offset.getWidth == log2Ceil(PredictWidth))
133    val pd = Wire(new PreDecodeInfo)
134    pd.valid := true.B
135    pd.isRVC := rvcMask(offset)
136    val isBr = brMask(offset)
137    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
138    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
139    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
140    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
141    pd
142  }
143}
144
145
146
147class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {}
148
149class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
150  val meta = UInt(MaxMetaLength.W)
151}
152
153class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
154  val target = UInt(VAddrBits.W)
155  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
156}
157
158
159class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
160  val ptr = Output(new FtqPtr)
161  val offset = Output(UInt(log2Ceil(PredictWidth).W))
162  val data = Input(gen)
163  def apply(ptr: FtqPtr, offset: UInt) = {
164    this.ptr := ptr
165    this.offset := offset
166    this.data
167  }
168}
169
170
171class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
172  val redirect = Valid(new BranchPredictionRedirect)
173  val update = Valid(new BranchPredictionUpdate)
174  val enq_ptr = Output(new FtqPtr)
175}
176
177class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
178  val req = Decoupled(new FetchRequestBundle)
179  val redirect = Valid(new BranchPredictionRedirect)
180  val topdown_redirect = Valid(new BranchPredictionRedirect)
181  val flushFromBpu = new Bundle {
182    // when ifu pipeline is not stalled,
183    // a packet from bpu s3 can reach f1 at most
184    val s2 = Valid(new FtqPtr)
185    val s3 = Valid(new FtqPtr)
186    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
187      src.valid && !isAfter(src.bits, idx_to_flush)
188    }
189    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
190    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
191  }
192}
193
194class FtqToICacheIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
195  //NOTE: req.bits must be prepare in T cycle
196  // while req.valid is set true in T + 1 cycle
197  val req = Decoupled(new FtqToICacheRequestBundle)
198}
199
200trait HasBackendRedirectInfo extends HasXSParameter {
201  def numRedirectPcRead = exuParameters.JmpCnt + exuParameters.AluCnt + 1
202  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
203}
204
205class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
206  // write to backend pc mem
207  val pc_mem_wen = Output(Bool())
208  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
209  val pc_mem_wdata = Output(new Ftq_RF_Components)
210  // newest target
211  val newest_entry_target = Output(UInt(VAddrBits.W))
212  val newest_entry_ptr = Output(new FtqPtr)
213}
214
215
216class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
217  val io = IO(new Bundle {
218    val start_addr = Input(UInt(VAddrBits.W))
219    val old_entry = Input(new FTBEntry)
220    val pd = Input(new Ftq_pd_Entry)
221    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
222    val target = Input(UInt(VAddrBits.W))
223    val hit = Input(Bool())
224    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
225
226    val new_entry = Output(new FTBEntry)
227    val new_br_insert_pos = Output(Vec(numBr, Bool()))
228    val taken_mask = Output(Vec(numBr, Bool()))
229    val jmp_taken = Output(Bool())
230    val mispred_mask = Output(Vec(numBr+1, Bool()))
231
232    // for perf counters
233    val is_init_entry = Output(Bool())
234    val is_old_entry = Output(Bool())
235    val is_new_br = Output(Bool())
236    val is_jalr_target_modified = Output(Bool())
237    val is_always_taken_modified = Output(Bool())
238    val is_br_full = Output(Bool())
239  })
240
241  // no mispredictions detected at predecode
242  val hit = io.hit
243  val pd = io.pd
244
245  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
246
247
248  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
249  val entry_has_jmp = pd.jmpInfo.valid
250  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
251  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
252  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
253  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
254  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
255  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
256
257  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
258  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
259
260  def carryPos = log2Ceil(PredictWidth)+instOffsetBits
261  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
262  // if not hit, establish a new entry
263  init_entry.valid := true.B
264  // tag is left for ftb to assign
265
266  // case br
267  val init_br_slot = init_entry.getSlotForBr(0)
268  when (cfi_is_br) {
269    init_br_slot.valid := true.B
270    init_br_slot.offset := io.cfiIndex.bits
271    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
272    init_entry.always_taken(0) := true.B // set to always taken on init
273  }
274
275  // case jmp
276  when (entry_has_jmp) {
277    init_entry.tailSlot.offset := pd.jmpOffset
278    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
279    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
280  }
281
282  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
283  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
284  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B)
285  init_entry.isJalr := new_jmp_is_jalr
286  init_entry.isCall := new_jmp_is_call
287  init_entry.isRet  := new_jmp_is_ret
288  // that means fall thru points to the middle of an inst
289  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset)
290
291  // if hit, check whether a new cfi(only br is possible) is detected
292  val oe = io.old_entry
293  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
294  val br_recorded = br_recorded_vec.asUInt.orR
295  val is_new_br = cfi_is_br && !br_recorded
296  val new_br_offset = io.cfiIndex.bits
297  // vec(i) means new br will be inserted BEFORE old br(i)
298  val allBrSlotsVec = oe.allSlotsForBr
299  val new_br_insert_onehot = VecInit((0 until numBr).map{
300    i => i match {
301      case 0 =>
302        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
303      case idx =>
304        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
305        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
306    }
307  })
308
309  val old_entry_modified = WireInit(io.old_entry)
310  for (i <- 0 until numBr) {
311    val slot = old_entry_modified.allSlotsForBr(i)
312    when (new_br_insert_onehot(i)) {
313      slot.valid := true.B
314      slot.offset := new_br_offset
315      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
316      old_entry_modified.always_taken(i) := true.B
317    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
318      old_entry_modified.always_taken(i) := false.B
319      // all other fields remain unchanged
320    }.otherwise {
321      // case i == 0, remain unchanged
322      if (i != 0) {
323        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
324        when (!noNeedToMoveFromFormerSlot) {
325          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
326          old_entry_modified.always_taken(i) := oe.always_taken(i)
327        }
328      }
329    }
330  }
331
332  // two circumstances:
333  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
334  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
335  //        the previous last br or the new br
336  val may_have_to_replace = oe.noEmptySlotForNewBr
337  val pft_need_to_change = is_new_br && may_have_to_replace
338  // it should either be the given last br or the new br
339  when (pft_need_to_change) {
340    val new_pft_offset =
341      Mux(!new_br_insert_onehot.asUInt.orR,
342        new_br_offset, oe.allSlotsForBr.last.offset)
343
344    // set jmp to invalid
345    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
346    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
347    old_entry_modified.last_may_be_rvi_call := false.B
348    old_entry_modified.isCall := false.B
349    old_entry_modified.isRet := false.B
350    old_entry_modified.isJalr := false.B
351  }
352
353  val old_entry_jmp_target_modified = WireInit(oe)
354  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
355  val old_tail_is_jmp = !oe.tailSlot.sharing
356  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
357  when (jalr_target_modified) {
358    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
359    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
360  }
361
362  val old_entry_always_taken = WireInit(oe)
363  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
364  for (i <- 0 until numBr) {
365    old_entry_always_taken.always_taken(i) :=
366      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
367    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
368  }
369  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
370
371
372
373  val derived_from_old_entry =
374    Mux(is_new_br, old_entry_modified,
375      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
376
377
378  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
379
380  io.new_br_insert_pos := new_br_insert_onehot
381  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
382    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
383  })
384  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
385  for (i <- 0 until numBr) {
386    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
387  }
388  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
389
390  // for perf counters
391  io.is_init_entry := !hit
392  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
393  io.is_new_br := hit && is_new_br
394  io.is_jalr_target_modified := hit && jalr_target_modified
395  io.is_always_taken_modified := hit && always_taken_modified
396  io.is_br_full := hit && is_new_br && may_have_to_replace
397}
398
399class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
400  val io = IO(new Bundle {
401    val ifuPtr_w       = Input(new FtqPtr)
402    val ifuPtrPlus1_w  = Input(new FtqPtr)
403    val ifuPtrPlus2_w  = Input(new FtqPtr)
404    val commPtr_w      = Input(new FtqPtr)
405    val commPtrPlus1_w = Input(new FtqPtr)
406    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
407    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
408    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
409    val commPtr_rdata      = Output(new Ftq_RF_Components)
410    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
411
412    val other_raddrs = Input(Vec(numOtherReads, UInt(log2Ceil(FtqSize).W)))
413    val other_rdatas = Output(Vec(numOtherReads, new Ftq_RF_Components))
414
415    val wen = Input(Bool())
416    val waddr = Input(UInt(log2Ceil(FtqSize).W))
417    val wdata = Input(new Ftq_RF_Components)
418  })
419
420  val num_pc_read = numOtherReads + 5
421  val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize,
422    num_pc_read, 1, "FtqPC"))
423  mem.io.wen(0)   := io.wen
424  mem.io.waddr(0) := io.waddr
425  mem.io.wdata(0) := io.wdata
426
427  // read one cycle ahead for ftq local reads
428  val raddr_vec = VecInit(io.other_raddrs ++
429    Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value, io.commPtrPlus1_w.value, io.commPtr_w.value))
430
431  mem.io.raddr := raddr_vec
432
433  io.other_rdatas       := mem.io.rdata.dropRight(5)
434  io.ifuPtr_rdata       := mem.io.rdata.dropRight(4).last
435  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(3).last
436  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(2).last
437  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
438  io.commPtr_rdata      := mem.io.rdata.last
439}
440
441class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
442  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
443  with HasICacheParameters{
444  val io = IO(new Bundle {
445    val fromBpu = Flipped(new BpuToFtqIO)
446    val fromIfu = Flipped(new IfuToFtqIO)
447    val fromBackend = Flipped(new CtrlToFtqIO)
448
449    val toBpu = new FtqToBpuIO
450    val toIfu = new FtqToIfuIO
451    val toICache = new FtqToICacheIO
452    val toBackend = new FtqToCtrlIO
453
454    val toPrefetch = new FtqPrefechBundle
455
456    val bpuInfo = new Bundle {
457      val bpRight = Output(UInt(XLEN.W))
458      val bpWrong = Output(UInt(XLEN.W))
459    }
460
461    val mmioCommitRead = Flipped(new mmioCommitRead)
462
463    // for perf
464    val ControlBTBMissBubble = Output(Bool())
465    val TAGEMissBubble = Output(Bool())
466    val SCMissBubble = Output(Bool())
467    val ITTAGEMissBubble = Output(Bool())
468    val RASMissBubble = Output(Bool())
469  })
470  io.bpuInfo := DontCare
471
472  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
473  dontTouch(topdown_stage)
474  // only driven by clock, not valid-ready
475  topdown_stage := io.fromBpu.resp.bits.topdown_info
476  io.toIfu.req.bits.topdown_info := topdown_stage
477
478  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
479
480  val backendRedirect = Wire(Valid(new BranchPredictionRedirect))
481  val backendRedirectReg = RegNext(backendRedirect)
482
483  val stage2Flush = backendRedirect.valid
484  val backendFlush = stage2Flush || RegNext(stage2Flush)
485  val ifuFlush = Wire(Bool())
486
487  val flush = stage2Flush || RegNext(stage2Flush)
488
489  val allowBpuIn, allowToIfu = WireInit(false.B)
490  val flushToIfu = !allowToIfu
491  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
492  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
493
494  def copyNum = 5
495  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
496  val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
497  val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U))
498  val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
499  val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
500  val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
501  require(FtqSize >= 4)
502  val ifuPtr_write       = WireInit(ifuPtr)
503  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
504  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
505  val ifuWbPtr_write     = WireInit(ifuWbPtr)
506  val commPtr_write      = WireInit(commPtr)
507  val commPtrPlus1_write = WireInit(commPtrPlus1)
508  ifuPtr       := ifuPtr_write
509  ifuPtrPlus1  := ifuPtrPlus1_write
510  ifuPtrPlus2  := ifuPtrPlus2_write
511  ifuWbPtr     := ifuWbPtr_write
512  commPtr      := commPtr_write
513  commPtrPlus1 := commPtrPlus1_write
514  copied_ifu_ptr.map{ptr =>
515    ptr := ifuPtr_write
516    dontTouch(ptr)
517  }
518  val validEntries = distanceBetween(bpuPtr, commPtr)
519  val canCommit = Wire(Bool())
520
521  // **********************************************************************
522  // **************************** enq from bpu ****************************
523  // **********************************************************************
524  val new_entry_ready = validEntries < FtqSize.U || canCommit
525  io.fromBpu.resp.ready := new_entry_ready
526
527  val bpu_s2_resp = io.fromBpu.resp.bits.s2
528  val bpu_s3_resp = io.fromBpu.resp.bits.s3
529  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
530  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
531
532  io.toBpu.enq_ptr := bpuPtr
533  val enq_fire = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
534  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
535
536  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
537  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdxForFtq
538  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
539  val bpu_in_resp_idx = bpu_in_resp_ptr.value
540
541  // read ports:      prefetchReq ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
542  val ftq_pc_mem = Module(new FtqPcMemWrapper(1))
543  // resp from uBTB
544  ftq_pc_mem.io.wen := bpu_in_fire
545  ftq_pc_mem.io.waddr := bpu_in_resp_idx
546  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
547
548  //                                                            ifuRedirect + backendRedirect + commit
549  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
550  // these info is intended to enq at the last stage of bpu
551  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
552  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
553  ftq_redirect_sram.io.wdata := io.fromBpu.resp.bits.last_stage_spec_info
554  println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3")
555  println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3")
556
557  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
558  // these info is intended to enq at the last stage of bpu
559  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
560  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
561  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta
562  //                                                            ifuRedirect + backendRedirect + commit
563  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
564  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid(3)
565  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
566  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
567
568
569  // multi-write
570  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
571  val newest_entry_target = Reg(UInt(VAddrBits.W))
572  val newest_entry_ptr = Reg(new FtqPtr)
573  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
574  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
575  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
576  val pred_s1_cycle = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
577
578  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
579  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
580    VecInit(Seq.fill(PredictWidth)(c_invalid))
581  }))
582
583  val f_to_send :: f_sent :: Nil = Enum(2)
584  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
585
586  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
587  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
588
589  // modify registers one cycle later to cut critical path
590  val last_cycle_bpu_in = RegNext(bpu_in_fire)
591  val last_cycle_bpu_in_ptr = RegNext(bpu_in_resp_ptr)
592  val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value
593  val last_cycle_bpu_target = RegNext(bpu_in_resp.getTarget(3))
594  val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex(3))
595  val last_cycle_bpu_in_stage = RegNext(bpu_in_stage)
596
597  def extra_copyNum_for_commitStateQueue = 2
598  val copied_last_cycle_bpu_in = VecInit(Seq.fill(copyNum+extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
599  val copied_last_cycle_bpu_in_ptr_for_ftq = VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_resp_ptr)))
600
601  when (last_cycle_bpu_in) {
602    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
603    cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
604    pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
605
606    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
607    newest_entry_target := last_cycle_bpu_target
608    newest_entry_ptr := last_cycle_bpu_in_ptr
609  }
610
611  // reduce fanout by delay write for a cycle
612  when (RegNext(last_cycle_bpu_in)) {
613    mispredict_vec(RegNext(last_cycle_bpu_in_idx)) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
614  }
615
616  // record s1 pred cycles
617  pred_s1_cycle.map(vec => {
618    when (bpu_in_fire && (bpu_in_stage === BP_S1)) {
619      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
620    }
621  })
622
623  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
624  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
625  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
626    case ((in, ptr), i) =>
627      when (in) {
628        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
629        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
630        for (j <- 0 until perSetEntries) {
631          when (ptr.value === (i*perSetEntries+j).U) {
632            commitStateQueue(i*perSetEntries+j) := VecInit(Seq.fill(PredictWidth)(c_invalid))
633          }
634        }
635      }
636  }
637
638  // num cycle is fixed
639  io.toBackend.newest_entry_ptr := RegNext(newest_entry_ptr)
640  io.toBackend.newest_entry_target := RegNext(newest_entry_target)
641
642
643  bpuPtr := bpuPtr + enq_fire
644  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
645  when (io.toIfu.req.fire && allowToIfu) {
646    ifuPtr_write := ifuPtrPlus1
647    ifuPtrPlus1_write := ifuPtrPlus2
648    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
649  }
650
651  // only use ftb result to assign hit status
652  when (bpu_s2_resp.valid(3)) {
653    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
654  }
655
656
657  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
658  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
659  when (bpu_s2_redirect) {
660    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
661    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
662    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
663    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
664      ifuPtr_write := bpu_s2_resp.ftq_idx
665      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
666      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
667    }
668  }
669
670  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
671  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
672  when (bpu_s3_redirect) {
673    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
674    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
675    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
676    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
677      ifuPtr_write := bpu_s3_resp.ftq_idx
678      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
679      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
680    }
681  }
682
683  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
684  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
685
686  (0 until copyNum).map{i =>
687    XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n")
688  }
689
690  // ****************************************************************
691  // **************************** to ifu ****************************
692  // ****************************************************************
693  // 0  for ifu, and 1-4 for ICache
694  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
695  val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
696  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
697  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
698  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
699
700  val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegNext(bpu_in_resp_ptr)))
701  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
702
703  // read pc and target
704  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
705  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
706  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
707  ftq_pc_mem.io.commPtr_w      := commPtr_write
708  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
709
710
711  io.toIfu.req.bits.ftqIdx := ifuPtr
712
713  val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components))
714  val toICacheEntryToSend = Wire(Vec(copyNum,Bool()))
715  val toIfuPcBundle = Wire(new Ftq_RF_Components)
716  val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
717  val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
718  val entry_next_addr  = Wire(UInt(VAddrBits.W))
719
720  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
721  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
722  val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this
723
724  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1))))
725  val copied_ifu_ptr_to_send   = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
726
727  for(i <- 0 until copyNum){
728    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){
729      toICachePcBundle(i) := copied_bpu_in_bypass_buf(i)
730      toICacheEntryToSend(i)   := true.B
731    }.elsewhen(copied_last_cycle_to_ifu_fire(i)){
732      toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i)
733      toICacheEntryToSend(i)   := copied_ifu_plus1_to_send(i)
734    }.otherwise{
735      toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i)
736      toICacheEntryToSend(i)   := copied_ifu_ptr_to_send(i)
737    }
738  }
739
740  // TODO: reconsider target address bypass logic
741  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
742    toIfuPcBundle := bpu_in_bypass_buf_for_ifu
743    entry_is_to_send := true.B
744    entry_next_addr := last_cycle_bpu_target
745    entry_ftq_offset := last_cycle_cfiIndex
746    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
747  }.elsewhen (last_cycle_to_ifu_fire) {
748    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
749    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
750                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
751    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
752                          bpu_in_bypass_buf_for_ifu.startAddr,
753                          Mux(ifuPtr === newest_entry_ptr,
754                            newest_entry_target,
755                            RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2
756  }.otherwise {
757    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
758    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
759                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
760    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
761                          bpu_in_bypass_buf_for_ifu.startAddr,
762                          Mux(ifuPtr === newest_entry_ptr,
763                            newest_entry_target,
764                            RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1
765  }
766
767  io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
768  io.toIfu.req.bits.nextStartAddr := entry_next_addr
769  io.toIfu.req.bits.ftqOffset := entry_ftq_offset
770  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
771
772  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
773  io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)}
774  io.toICache.req.bits.pcMemRead.zipWithIndex.map{case(copy,i) => copy.fromFtqPcBundle(toICachePcBundle(i))}
775  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
776  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
777  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
778  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
779  // }
780
781  // TODO: remove this
782  XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
783          p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n")
784
785  // when fall through is smaller in value than start address, there must be a false hit
786  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
787    when (io.toIfu.req.fire &&
788      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
789      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
790    ) {
791      entry_hit_status(ifuPtr.value) := h_false_hit
792      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
793    }
794    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
795  }
796
797  XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
798    io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr))
799
800  val ifu_req_should_be_flushed =
801    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
802    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
803
804    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
805      entry_fetch_status(ifuPtr.value) := f_sent
806    }
807
808  // *********************************************************************
809  // **************************** wb from ifu ****************************
810  // *********************************************************************
811  val pdWb = io.fromIfu.pdWb
812  val pds = pdWb.bits.pd
813  val ifu_wb_valid = pdWb.valid
814  val ifu_wb_idx = pdWb.bits.ftqIdx.value
815  // read ports:                                                         commit update
816  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
817  ftq_pd_mem.io.wen(0) := ifu_wb_valid
818  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
819  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
820
821  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
822  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
823  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
824  val pd_reg       = RegEnable(pds,             pdWb.valid)
825  val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid)
826  val wb_idx_reg   = RegEnable(ifu_wb_idx,      pdWb.valid)
827
828  when (ifu_wb_valid) {
829    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
830      case (v, inRange) => v && inRange
831    })
832    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
833      case (qe, v) => when (v) { qe := c_valid }
834    }
835  }
836
837  when (ifu_wb_valid) {
838    ifuWbPtr_write := ifuWbPtr + 1.U
839  }
840
841  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
842
843  ftb_entry_mem.io.raddr.head := ifu_wb_idx
844  val has_false_hit = WireInit(false.B)
845  when (RegNext(hit_pd_valid)) {
846    // check for false hit
847    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
848    val brSlots = pred_ftb_entry.brSlots
849    val tailSlot = pred_ftb_entry.tailSlot
850    // we check cfis that bpu predicted
851
852    // bpu predicted branches but denied by predecode
853    val br_false_hit =
854      brSlots.map{
855        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
856      }.reduce(_||_) ||
857      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
858        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
859
860    val jmpOffset = tailSlot.offset
861    val jmp_pd = pd_reg(jmpOffset)
862    val jal_false_hit = pred_ftb_entry.jmpValid &&
863      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
864       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
865       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
866       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
867      )
868
869    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
870    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
871
872    // assert(!has_false_hit)
873  }
874
875  when (has_false_hit) {
876    entry_hit_status(wb_idx_reg) := h_false_hit
877  }
878
879
880  // **********************************************************************
881  // ***************************** to backend *****************************
882  // **********************************************************************
883  // to backend pc mem / target
884  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
885  io.toBackend.pc_mem_waddr := RegNext(last_cycle_bpu_in_idx)
886  io.toBackend.pc_mem_wdata := RegNext(bpu_in_bypass_buf_for_ifu)
887
888  // *******************************************************************************
889  // **************************** redirect from backend ****************************
890  // *******************************************************************************
891
892  // redirect read cfiInfo, couples to redirectGen s2
893  ftq_redirect_sram.io.ren.init.last := backendRedirect.valid
894  ftq_redirect_sram.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
895
896  ftb_entry_mem.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
897
898  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
899  val fromBackendRedirect = WireInit(backendRedirectReg)
900  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
901  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
902
903
904  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
905  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
906
907  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
908  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
909  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(r_ftb_entry.brSlots(0).offset === r_ftqOffset,
910      r_ftb_entry.brSlots(0).sc, r_ftb_entry.tailSlot.sc)
911
912  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
913    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
914      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
915      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
916
917    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
918        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
919  }.otherwise {
920    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
921    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
922  }
923
924
925  // ***************************************************************************
926  // **************************** redirect from ifu ****************************
927  // ***************************************************************************
928  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
929  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
930  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
931  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
932  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
933  fromIfuRedirect.bits.BTBMissBubble := true.B
934  fromIfuRedirect.bits.debugIsMemVio := false.B
935  fromIfuRedirect.bits.debugIsCtrl := false.B
936
937  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
938  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
939  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
940  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
941  ifuRedirectCfiUpdate.target := pdWb.bits.target
942  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
943  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
944
945  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
946  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
947  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
948
949  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
950  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
951
952  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
953
954  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
955  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
956  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
957    toBpuCfi.target := toBpuCfi.topAddr
958  }
959
960  when (ifuRedirectReg.valid) {
961    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
962  } .elsewhen(RegNext(pdWb.valid)) {
963    // if pdWb and no redirect, set to false
964    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
965  }
966
967  // *********************************************************************
968  // **************************** wb from exu ****************************
969  // *********************************************************************
970
971  backendRedirect.valid := io.fromBackend.redirect.valid
972  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
973  backendRedirect.bits.BTBMissBubble := false.B
974
975
976  def extractRedirectInfo(wb: Valid[Redirect]) = {
977    val ftqPtr = wb.bits.ftqIdx
978    val ftqOffset = wb.bits.ftqOffset
979    val taken = wb.bits.cfiUpdate.taken
980    val mispred = wb.bits.cfiUpdate.isMisPred
981    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
982  }
983
984  // fix mispredict entry
985  val lastIsMispredict = RegNext(
986    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
987  )
988
989  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
990    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
991    val r_idx = r_ptr.value
992    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
993    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
994    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
995      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
996    } .elsewhen (r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
997      cfiIndex_vec(r_idx).valid :=false.B
998    }
999    when (cfiIndex_bits_wen) {
1000      cfiIndex_vec(r_idx).bits := r_offset
1001    }
1002    newest_entry_target := redirect.bits.cfiUpdate.target
1003    newest_entry_ptr := r_ptr
1004    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1005    if (isBackend) {
1006      mispredict_vec(r_idx)(r_offset) := r_mispred
1007    }
1008  }
1009
1010  when(backendRedirectReg.valid) {
1011    updateCfiInfo(backendRedirectReg)
1012  }.elsewhen (ifuRedirectToBpu.valid) {
1013    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
1014  }
1015
1016  when (backendRedirectReg.valid) {
1017    when (backendRedirectReg.bits.ControlRedirectBubble) {
1018      when (fromBackendRedirect.bits.ControlBTBMissBubble) {
1019        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1020        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1021      } .elsewhen (fromBackendRedirect.bits.TAGEMissBubble) {
1022        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1023        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1024      } .elsewhen (fromBackendRedirect.bits.SCMissBubble) {
1025        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
1026        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1027      } .elsewhen (fromBackendRedirect.bits.ITTAGEMissBubble) {
1028        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1029        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1030      } .elsewhen (fromBackendRedirect.bits.RASMissBubble) {
1031        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
1032        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1033      }
1034
1035
1036    } .elsewhen (backendRedirectReg.bits.MemVioRedirectBubble) {
1037      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1038      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1039    } .otherwise {
1040      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1041      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1042    }
1043  } .elsewhen (ifuRedirectReg.valid) {
1044    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1045    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1046  }
1047
1048  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1049  io.TAGEMissBubble := fromBackendRedirect.bits.TAGEMissBubble
1050  io.SCMissBubble := fromBackendRedirect.bits.SCMissBubble
1051  io.ITTAGEMissBubble := fromBackendRedirect.bits.ITTAGEMissBubble
1052  io.RASMissBubble := fromBackendRedirect.bits.RASMissBubble
1053
1054  // ***********************************************************************************
1055  // **************************** flush ptr and state queue ****************************
1056  // ***********************************************************************************
1057
1058  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1059
1060  // when redirect, we should reset ptrs and status queues
1061  when(redirectVec.map(r => r.valid).reduce(_||_)){
1062    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1063    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1064    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1065    val next = idx + 1.U
1066    bpuPtr := next
1067    copied_bpu_ptr.map(_ := next)
1068    ifuPtr_write := next
1069    ifuWbPtr_write := next
1070    ifuPtrPlus1_write := idx + 2.U
1071    ifuPtrPlus2_write := idx + 3.U
1072
1073  }
1074  when(RegNext(redirectVec.map(r => r.valid).reduce(_||_))){
1075    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1076    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1077    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1078    when (RegNext(notIfu)) {
1079      commitStateQueue(RegNext(idx.value)).zipWithIndex.foreach({ case (s, i) =>
1080        when(i.U > RegNext(offset) || i.U === RegNext(offset) && RegNext(flushItSelf)){
1081          s := c_invalid
1082        }
1083      })
1084    }
1085  }
1086
1087
1088  // only the valid bit is actually needed
1089  io.toIfu.redirect.bits    := backendRedirect.bits
1090  io.toIfu.redirect.valid   := stage2Flush
1091  io.toIfu.topdown_redirect := fromBackendRedirect
1092
1093  // commit
1094  for (c <- io.fromBackend.rob_commits) {
1095    when(c.valid) {
1096      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
1097      // TODO: remove this
1098      // For instruction fusions, we also update the next instruction
1099      when (c.bits.commitType === 4.U) {
1100        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
1101      }.elsewhen(c.bits.commitType === 5.U) {
1102        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
1103      }.elsewhen(c.bits.commitType === 6.U) {
1104        val index = (c.bits.ftqIdx + 1.U).value
1105        commitStateQueue(index)(0) := c_commited
1106      }.elsewhen(c.bits.commitType === 7.U) {
1107        val index = (c.bits.ftqIdx + 1.U).value
1108        commitStateQueue(index)(1) := c_commited
1109      }
1110    }
1111  }
1112
1113  // ****************************************************************
1114  // **************************** to bpu ****************************
1115  // ****************************************************************
1116
1117  io.toBpu.redirect := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1118  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_=>0.U(64.W)))
1119  val redirect_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1120  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1121  XSPerfHistogram("ifu_redirect_latency", redirect_latency, !fromBackendRedirect.valid && ifuRedirectToBpu.valid, 0, 60, 1)
1122
1123  XSError(io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr), "Ftq received a redirect after its commit, check backend or replay")
1124
1125  val may_have_stall_from_bpu = Wire(Bool())
1126  val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1127  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1128  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1129    Cat(commitStateQueue(commPtr.value).map(s => {
1130      s === c_invalid || s === c_commited
1131    })).andR
1132
1133  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1134  val mmioLastCommit = isBefore(commPtr, mmioReadPtr) && (isAfter(ifuPtr,mmioReadPtr)  ||  mmioReadPtr ===   ifuPtr) &&
1135                       Cat(commitStateQueue(mmioReadPtr.value).map(s => { s === c_invalid || s === c_commited})).andR
1136  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1137
1138  // commit reads
1139  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1140  val commit_target =
1141    Mux(RegNext(commPtr === newest_entry_ptr),
1142      RegNext(newest_entry_target),
1143      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr))
1144  ftq_pd_mem.io.raddr.last := commPtr.value
1145  val commit_pd = ftq_pd_mem.io.rdata.last
1146  ftq_redirect_sram.io.ren.last := canCommit
1147  ftq_redirect_sram.io.raddr.last := commPtr.value
1148  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
1149  ftq_meta_1r_sram.io.ren(0) := canCommit
1150  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1151  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
1152  ftb_entry_mem.io.raddr.last := commPtr.value
1153  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
1154
1155  // need one cycle to read mem and srams
1156  val do_commit_ptr = RegNext(commPtr)
1157  val do_commit = RegNext(canCommit, init=false.B)
1158  when (canCommit) {
1159    commPtr_write := commPtrPlus1
1160    commPtrPlus1_write := commPtrPlus1 + 1.U
1161  }
1162  val commit_state = RegNext(commitStateQueue(commPtr.value))
1163  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1164  val do_commit_cfi = WireInit(cfiIndex_vec(do_commit_ptr.value))
1165  //
1166  //when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1167  //  can_commit_cfi.valid := false.B
1168  //}
1169  val commit_cfi = RegNext(can_commit_cfi)
1170  val debug_cfi = commitStateQueue(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_commited && do_commit_cfi.valid
1171
1172  val commit_mispredict  : Vec[Bool] = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
1173    case (mis, state) => mis && state === c_commited
1174  })
1175  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_commited)) // [PredictWidth]
1176  val can_commit_hit                 = entry_hit_status(commPtr.value)
1177  val commit_hit                     = RegNext(can_commit_hit)
1178  val diff_commit_target             = RegNext(update_target(commPtr.value)) // TODO: remove this
1179  val commit_stage                   = RegNext(pred_stage(commPtr.value))
1180  val commit_valid                   = commit_hit === h_hit || commit_cfi.valid // hit or taken
1181
1182  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1183  switch (bpu_ftb_update_stall) {
1184    is (0.U) {
1185      when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1186        bpu_ftb_update_stall := 2.U // 2-cycle stall
1187      }
1188    }
1189    is (2.U) {
1190      bpu_ftb_update_stall := 1.U
1191    }
1192    is (1.U) {
1193      bpu_ftb_update_stall := 0.U
1194    }
1195    is (3.U) {
1196      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1197    }
1198  }
1199
1200  // TODO: remove this
1201  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1202
1203  // update latency stats
1204  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1205  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1206
1207  io.toBpu.update := DontCare
1208  io.toBpu.update.valid := commit_valid && do_commit
1209  val update = io.toBpu.update.bits
1210  update.false_hit   := commit_hit === h_false_hit
1211  update.pc          := commit_pc_bundle.startAddr
1212  update.meta        := commit_meta.meta
1213  update.cfi_idx     := commit_cfi
1214  update.full_target := commit_target
1215  update.from_stage  := commit_stage
1216  update.spec_info   := commit_spec_meta
1217  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1218
1219  val commit_real_hit = commit_hit === h_hit
1220  val update_ftb_entry = update.ftb_entry
1221
1222  val ftbEntryGen = Module(new FTBEntryGen).io
1223  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1224  ftbEntryGen.old_entry      := commit_ftb_entry
1225  ftbEntryGen.pd             := commit_pd
1226  ftbEntryGen.cfiIndex       := commit_cfi
1227  ftbEntryGen.target         := commit_target
1228  ftbEntryGen.hit            := commit_real_hit
1229  ftbEntryGen.mispredict_vec := commit_mispredict
1230
1231  update_ftb_entry         := ftbEntryGen.new_entry
1232  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1233  update.mispred_mask      := ftbEntryGen.mispred_mask
1234  update.old_entry         := ftbEntryGen.is_old_entry
1235  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1236  update.br_taken_mask     := ftbEntryGen.taken_mask
1237  update.br_committed      := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1238    case (valid, offset) => valid && commit_instCommited(offset)
1239  }
1240  update.jmp_taken         := ftbEntryGen.jmp_taken
1241
1242  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1243  // update.full_pred.jalr_target := commit_target
1244  // update.full_pred.hit := true.B
1245  // when (update.full_pred.is_jalr) {
1246  //   update.full_pred.targets.last := commit_target
1247  // }
1248
1249  // ****************************************************************
1250  // *********************** to prefetch ****************************
1251  // ****************************************************************
1252
1253  ftq_pc_mem.io.other_raddrs(0) := DontCare
1254  if(cacheParams.hasPrefetch){
1255    val prefetchPtr = RegInit(FtqPtr(false.B, 0.U))
1256    val diff_prefetch_addr = WireInit(update_target(prefetchPtr.value)) //TODO: remove this
1257    // TODO : MUST WIDER
1258    prefetchPtr := prefetchPtr + io.toPrefetch.req.fire
1259
1260    val prefetch_too_late = (isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr)) || (prefetchPtr === ifuPtr)
1261    when(prefetch_too_late){
1262      when(prefetchPtr =/= bpuPtr){
1263        prefetchPtr := bpuPtr - 1.U
1264      }.otherwise{
1265        prefetchPtr := ifuPtr
1266      }
1267    }
1268
1269    ftq_pc_mem.io.other_raddrs(0) := prefetchPtr.value
1270
1271    when (bpu_s2_redirect && !isBefore(prefetchPtr, bpu_s2_resp.ftq_idx)) {
1272      prefetchPtr := bpu_s2_resp.ftq_idx
1273    }
1274
1275    when (bpu_s3_redirect && !isBefore(prefetchPtr, bpu_s3_resp.ftq_idx)) {
1276      prefetchPtr := bpu_s3_resp.ftq_idx
1277      // XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
1278    }
1279
1280
1281    val prefetch_is_to_send = WireInit(entry_fetch_status(prefetchPtr.value) === f_to_send)
1282    val prefetch_addr = Wire(UInt(VAddrBits.W))
1283
1284    when (last_cycle_bpu_in && bpu_in_bypass_ptr === prefetchPtr) {
1285      prefetch_is_to_send := true.B
1286      prefetch_addr := last_cycle_bpu_target
1287      diff_prefetch_addr := last_cycle_bpu_target // TODO: remove this
1288    }.otherwise{
1289      prefetch_addr := RegNext( ftq_pc_mem.io.other_rdatas(0).startAddr)
1290    }
1291    io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && prefetch_is_to_send
1292    io.toPrefetch.req.bits.target := prefetch_addr
1293
1294    when(redirectVec.map(r => r.valid).reduce(_||_)){
1295      val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1296      val next = r.ftqIdx + 1.U
1297      prefetchPtr := next
1298    }
1299
1300    // TODO: remove this
1301    // XSError(io.toPrefetch.req.valid && diff_prefetch_addr =/= prefetch_addr,
1302    //         f"\nprefetch_req_target wrong! prefetchPtr: ${prefetchPtr}, prefetch_addr: ${Hexadecimal(prefetch_addr)} diff_prefetch_addr: ${Hexadecimal(diff_prefetch_addr)}\n")
1303
1304
1305    XSError(isBefore(bpuPtr, prefetchPtr) && !isFull(bpuPtr, prefetchPtr), "\nprefetchPtr is before bpuPtr!\n")
1306//    XSError(isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr), "\nifuPtr is before prefetchPtr!\n")
1307  }
1308  else {
1309    io.toPrefetch.req <> DontCare
1310  }
1311
1312  // ******************************************************************************
1313  // **************************** commit perf counters ****************************
1314  // ******************************************************************************
1315
1316  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
1317  val commit_mispred_mask = commit_mispredict.asUInt
1318  val commit_not_mispred_mask = ~commit_mispred_mask
1319
1320  val commit_br_mask = commit_pd.brMask.asUInt
1321  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1322  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
1323
1324  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1325
1326  val mbpRights = mbpInstrs & commit_not_mispred_mask
1327  val mbpWrongs = mbpInstrs & commit_mispred_mask
1328
1329  io.bpuInfo.bpRight := PopCount(mbpRights)
1330  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1331
1332  val isWriteFTQTable = WireInit(Constantin.createRecord("isWriteFTQTable" + p(XSCoreParamsKey).HartId.toString))
1333  val ftqBranchTraceDB = ChiselDB.createTable("FTQTable" + p(XSCoreParamsKey).HartId.toString, new FtqDebugBundle)
1334  // Cfi Info
1335  for (i <- 0 until PredictWidth) {
1336    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
1337    val v = commit_state(i) === c_commited
1338    val isBr = commit_pd.brMask(i)
1339    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1340    val isCfi = isBr || isJmp
1341    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1342    val misPred = commit_mispredict(i)
1343    // val ghist = commit_spec_meta.ghist.predHist
1344    val histPtr = commit_spec_meta.histPtr
1345    val predCycle = commit_meta.meta(63, 0)
1346    val target = commit_target
1347
1348    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1349    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1350    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1351    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1352    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1353    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1354    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1355
1356    val logbundle = Wire(new FtqDebugBundle)
1357    logbundle.pc := pc
1358    logbundle.target := target
1359    logbundle.isBr := isBr
1360    logbundle.isJmp := isJmp
1361    logbundle.isCall := isJmp && commit_pd.hasCall
1362    logbundle.isRet := isJmp && commit_pd.hasRet
1363    logbundle.misPred := misPred
1364    logbundle.isTaken := isTaken
1365    logbundle.predStage := commit_stage
1366
1367    ftqBranchTraceDB.log(
1368      data = logbundle /* hardware of type T */,
1369      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1370      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1371      clock = clock,
1372      reset = reset
1373    )
1374  }
1375
1376  val enq = io.fromBpu.resp
1377  val perf_redirect = backendRedirect
1378
1379  XSPerfAccumulate("entry", validEntries)
1380  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1381  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1382  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1383  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1384
1385  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1386
1387  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1388  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1389  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1390  XSPerfAccumulate("bpu_to_ifu_bubble_when_ftq_full", (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready)
1391
1392  val from_bpu = io.fromBpu.resp.bits
1393  val to_ifu = io.toIfu.req.bits
1394
1395
1396  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth+1, 1)
1397
1398
1399
1400
1401  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1402  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1403  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1404  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1405
1406
1407  val mbpBRights = mbpRights & commit_br_mask
1408  val mbpJRights = mbpRights & commit_jal_mask
1409  val mbpIRights = mbpRights & commit_jalr_mask
1410  val mbpCRights = mbpRights & commit_call_mask
1411  val mbpRRights = mbpRights & commit_ret_mask
1412
1413  val mbpBWrongs = mbpWrongs & commit_br_mask
1414  val mbpJWrongs = mbpWrongs & commit_jal_mask
1415  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1416  val mbpCWrongs = mbpWrongs & commit_call_mask
1417  val mbpRWrongs = mbpWrongs & commit_ret_mask
1418
1419  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1420
1421  def pred_stage_map(src: UInt, name: String) = {
1422    (0 until numBpStages).map(i =>
1423      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1424    ).foldLeft(Map[String, UInt]())(_+_)
1425  }
1426
1427  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1428  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1429  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1430  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1431  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1432  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1433
1434  val update_valid = io.toBpu.update.valid
1435  def u(cond: Bool) = update_valid && cond
1436  val ftb_false_hit = u(update.false_hit)
1437  // assert(!ftb_false_hit)
1438  val ftb_hit = u(commit_hit === h_hit)
1439
1440  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1441  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1442  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1443  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1444
1445  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1446
1447  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1448  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1449  val ftb_modified_entry_ifu_redirected = u(ifuRedirected(do_commit_ptr.value))
1450  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1451  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1452  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1453
1454  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1455  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1456  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth+1, 1)
1457  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth+1, 1)
1458  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1459  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth+1, 1)
1460
1461  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize+1, 1)
1462
1463  val perfCountsMap = Map(
1464    "BpInstr" -> PopCount(mbpInstrs),
1465    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1466    "BpRight"  -> PopCount(mbpRights),
1467    "BpWrong"  -> PopCount(mbpWrongs),
1468    "BpBRight" -> PopCount(mbpBRights),
1469    "BpBWrong" -> PopCount(mbpBWrongs),
1470    "BpJRight" -> PopCount(mbpJRights),
1471    "BpJWrong" -> PopCount(mbpJWrongs),
1472    "BpIRight" -> PopCount(mbpIRights),
1473    "BpIWrong" -> PopCount(mbpIWrongs),
1474    "BpCRight" -> PopCount(mbpCRights),
1475    "BpCWrong" -> PopCount(mbpCWrongs),
1476    "BpRRight" -> PopCount(mbpRRights),
1477    "BpRWrong" -> PopCount(mbpRWrongs),
1478
1479    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1480    "ftb_hit"                      -> PopCount(ftb_hit),
1481    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1482    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1483    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1484    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1485    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1486    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1487    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1488    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1489    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1490    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1491  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1492       correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1493
1494  for((key, value) <- perfCountsMap) {
1495    XSPerfAccumulate(key, value)
1496  }
1497
1498  // --------------------------- Debug --------------------------------
1499  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1500  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1501  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1502  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1503  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1504    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1505  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1506
1507  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1508  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1509  //       case (((valid, pd), ans), taken) =>
1510  //       Mux(valid && pd.isBr,
1511  //         isWrong ^ Mux(ans.hit.asBool,
1512  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1513  //           !taken),
1514  //         !taken),
1515  //       false.B)
1516  //     }
1517  //   }
1518
1519  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1520  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1521  //       case (((valid, pd), ans), taken) =>
1522  //       Mux(valid && pd.isBr,
1523  //         isWrong ^ Mux(ans.hit.asBool,
1524  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1525  //           !taken),
1526  //         !taken),
1527  //       false.B)
1528  //     }
1529  //   }
1530
1531  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1532  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1533  //       case (((valid, pd), ans), taken) =>
1534  //       Mux(valid && pd.isBr,
1535  //         isWrong ^ (ans.taken.asBool === taken),
1536  //       false.B)
1537  //     }
1538  //   }
1539
1540  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1541  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1542  //       case (((valid, pd), ans), taken) =>
1543  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1544  //         isWrong ^ (!taken),
1545  //           false.B)
1546  //     }
1547  //   }
1548
1549  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1550  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1551  //       case (((valid, pd), ans), taken) =>
1552  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1553  //         isWrong ^ (ans.target === commitEntry.target),
1554  //           false.B)
1555  //     }
1556  //   }
1557
1558  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1559  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1560  //   // btb and ubtb pred jal and jalr as well
1561  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1562  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1563  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1564  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1565
1566  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1567  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1568
1569  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1570  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1571
1572  val perfEvents = Seq(
1573    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1574    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1575    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1576    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1577    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1578    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1579    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1580    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1581    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1582    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1583    ("BpRight                ", PopCount(mbpRights)                                                         ),
1584    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1585    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1586    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1587    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1588    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1589    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1590    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1591    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1592    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1593    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1594    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1595    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1596    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1597  )
1598  generatePerfEvent()
1599}