xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 83ba63b34cf09b33c0a9e1b3203138e51af4491b)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.frontend.icache._
26import xiangshan.backend.CtrlToFtqIO
27import xiangshan.backend.decode.ImmUnion
28import utility.ChiselDB
29
30class FtqDebugBundle extends Bundle {
31  val pc = UInt(39.W)
32  val target = UInt(39.W)
33  val isBr = Bool()
34  val isJmp = Bool()
35  val isCall = Bool()
36  val isRet = Bool()
37  val misPred = Bool()
38  val isTaken = Bool()
39  val predStage = UInt(2.W)
40}
41
42class FtqPtr(entries: Int) extends CircularQueuePtr[FtqPtr](
43  entries
44){
45  def this()(implicit p: Parameters) = this(p(XSCoreParamsKey).FtqSize)
46}
47
48object FtqPtr {
49  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
50    val ptr = Wire(new FtqPtr)
51    ptr.flag := f
52    ptr.value := v
53    ptr
54  }
55  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
56    apply(!ptr.flag, ptr.value)
57  }
58}
59
60class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
61
62  val io = IO(new Bundle() {
63    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
64    val ren = Input(Vec(numRead, Bool()))
65    val rdata = Output(Vec(numRead, gen))
66    val waddr = Input(UInt(log2Up(FtqSize).W))
67    val wen = Input(Bool())
68    val wdata = Input(gen)
69  })
70
71  for(i <- 0 until numRead){
72    val sram = Module(new SRAMTemplate(gen, FtqSize))
73    sram.io.r.req.valid := io.ren(i)
74    sram.io.r.req.bits.setIdx := io.raddr(i)
75    io.rdata(i) := sram.io.r.resp.data(0)
76    sram.io.w.req.valid := io.wen
77    sram.io.w.req.bits.setIdx := io.waddr
78    sram.io.w.req.bits.data := VecInit(io.wdata)
79  }
80
81}
82
83class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
84  val startAddr = UInt(VAddrBits.W)
85  val nextLineAddr = UInt(VAddrBits.W)
86  val isNextMask = Vec(PredictWidth, Bool())
87  val fallThruError = Bool()
88  // val carry = Bool()
89  def getPc(offset: UInt) = {
90    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
91    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
92    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
93        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
94  }
95  def fromBranchPrediction(resp: BranchPredictionBundle) = {
96    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
97    this.startAddr := resp.pc(3)
98    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
99    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
100      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
101    ))
102    this.fallThruError := resp.fallThruError(3)
103    this
104  }
105  override def toPrintable: Printable = {
106    p"startAddr:${Hexadecimal(startAddr)}"
107  }
108}
109
110class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
111  val brMask = Vec(PredictWidth, Bool())
112  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
113  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
114  val jalTarget = UInt(VAddrBits.W)
115  val rvcMask = Vec(PredictWidth, Bool())
116  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
117  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
118  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
119  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
120
121  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
122    val pds = pdWb.pd
123    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
124    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
125    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
126                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
127    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
128    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
129    this.jalTarget := pdWb.jalTarget
130  }
131
132  def toPd(offset: UInt) = {
133    require(offset.getWidth == log2Ceil(PredictWidth))
134    val pd = Wire(new PreDecodeInfo)
135    pd.valid := true.B
136    pd.isRVC := rvcMask(offset)
137    val isBr = brMask(offset)
138    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
139    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
140    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
141    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
142    pd
143  }
144}
145
146
147
148class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {}
149
150class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
151  val meta = UInt(MaxMetaLength.W)
152}
153
154class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
155  val target = UInt(VAddrBits.W)
156  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
157}
158
159
160class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
161  val ptr = Output(new FtqPtr)
162  val offset = Output(UInt(log2Ceil(PredictWidth).W))
163  val data = Input(gen)
164  def apply(ptr: FtqPtr, offset: UInt) = {
165    this.ptr := ptr
166    this.offset := offset
167    this.data
168  }
169}
170
171
172class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
173  val redirect = Valid(new BranchPredictionRedirect)
174  val update = Valid(new BranchPredictionUpdate)
175  val enq_ptr = Output(new FtqPtr)
176}
177
178class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
179  val req = Decoupled(new FetchRequestBundle)
180  val redirect = Valid(new BranchPredictionRedirect)
181  val topdown_redirect = Valid(new BranchPredictionRedirect)
182  val flushFromBpu = new Bundle {
183    // when ifu pipeline is not stalled,
184    // a packet from bpu s3 can reach f1 at most
185    val s2 = Valid(new FtqPtr)
186    val s3 = Valid(new FtqPtr)
187    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
188      src.valid && !isAfter(src.bits, idx_to_flush)
189    }
190    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
191    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
192  }
193}
194
195class FtqToICacheIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
196  //NOTE: req.bits must be prepare in T cycle
197  // while req.valid is set true in T + 1 cycle
198  val req = Decoupled(new FtqToICacheRequestBundle)
199}
200
201trait HasBackendRedirectInfo extends HasXSParameter {
202  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
203}
204
205class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
206  // write to backend pc mem
207  val pc_mem_wen = Output(Bool())
208  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
209  val pc_mem_wdata = Output(new Ftq_RF_Components)
210  // newest target
211  val newest_entry_target = Output(UInt(VAddrBits.W))
212  val newest_entry_ptr = Output(new FtqPtr)
213}
214
215
216class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
217  val io = IO(new Bundle {
218    val start_addr = Input(UInt(VAddrBits.W))
219    val old_entry = Input(new FTBEntry)
220    val pd = Input(new Ftq_pd_Entry)
221    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
222    val target = Input(UInt(VAddrBits.W))
223    val hit = Input(Bool())
224    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
225
226    val new_entry = Output(new FTBEntry)
227    val new_br_insert_pos = Output(Vec(numBr, Bool()))
228    val taken_mask = Output(Vec(numBr, Bool()))
229    val jmp_taken = Output(Bool())
230    val mispred_mask = Output(Vec(numBr+1, Bool()))
231
232    // for perf counters
233    val is_init_entry = Output(Bool())
234    val is_old_entry = Output(Bool())
235    val is_new_br = Output(Bool())
236    val is_jalr_target_modified = Output(Bool())
237    val is_always_taken_modified = Output(Bool())
238    val is_br_full = Output(Bool())
239  })
240
241  // no mispredictions detected at predecode
242  val hit = io.hit
243  val pd = io.pd
244
245  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
246
247
248  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
249  val entry_has_jmp = pd.jmpInfo.valid
250  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
251  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
252  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
253  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
254  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
255  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
256
257  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
258  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
259
260  def carryPos = log2Ceil(PredictWidth)+instOffsetBits
261  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
262  // if not hit, establish a new entry
263  init_entry.valid := true.B
264  // tag is left for ftb to assign
265
266  // case br
267  val init_br_slot = init_entry.getSlotForBr(0)
268  when (cfi_is_br) {
269    init_br_slot.valid := true.B
270    init_br_slot.offset := io.cfiIndex.bits
271    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
272    init_entry.always_taken(0) := true.B // set to always taken on init
273  }
274
275  // case jmp
276  when (entry_has_jmp) {
277    init_entry.tailSlot.offset := pd.jmpOffset
278    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
279    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
280  }
281
282  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
283  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
284  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B)
285  init_entry.isJalr := new_jmp_is_jalr
286  init_entry.isCall := new_jmp_is_call
287  init_entry.isRet  := new_jmp_is_ret
288  // that means fall thru points to the middle of an inst
289  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset)
290
291  // if hit, check whether a new cfi(only br is possible) is detected
292  val oe = io.old_entry
293  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
294  val br_recorded = br_recorded_vec.asUInt.orR
295  val is_new_br = cfi_is_br && !br_recorded
296  val new_br_offset = io.cfiIndex.bits
297  // vec(i) means new br will be inserted BEFORE old br(i)
298  val allBrSlotsVec = oe.allSlotsForBr
299  val new_br_insert_onehot = VecInit((0 until numBr).map{
300    i => i match {
301      case 0 =>
302        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
303      case idx =>
304        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
305        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
306    }
307  })
308
309  val old_entry_modified = WireInit(io.old_entry)
310  for (i <- 0 until numBr) {
311    val slot = old_entry_modified.allSlotsForBr(i)
312    when (new_br_insert_onehot(i)) {
313      slot.valid := true.B
314      slot.offset := new_br_offset
315      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
316      old_entry_modified.always_taken(i) := true.B
317    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
318      old_entry_modified.always_taken(i) := false.B
319      // all other fields remain unchanged
320    }.otherwise {
321      // case i == 0, remain unchanged
322      if (i != 0) {
323        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
324        when (!noNeedToMoveFromFormerSlot) {
325          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
326          old_entry_modified.always_taken(i) := oe.always_taken(i)
327        }
328      }
329    }
330  }
331
332  // two circumstances:
333  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
334  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
335  //        the previous last br or the new br
336  val may_have_to_replace = oe.noEmptySlotForNewBr
337  val pft_need_to_change = is_new_br && may_have_to_replace
338  // it should either be the given last br or the new br
339  when (pft_need_to_change) {
340    val new_pft_offset =
341      Mux(!new_br_insert_onehot.asUInt.orR,
342        new_br_offset, oe.allSlotsForBr.last.offset)
343
344    // set jmp to invalid
345    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
346    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
347    old_entry_modified.last_may_be_rvi_call := false.B
348    old_entry_modified.isCall := false.B
349    old_entry_modified.isRet := false.B
350    old_entry_modified.isJalr := false.B
351  }
352
353  val old_entry_jmp_target_modified = WireInit(oe)
354  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
355  val old_tail_is_jmp = !oe.tailSlot.sharing
356  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
357  when (jalr_target_modified) {
358    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
359    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
360  }
361
362  val old_entry_always_taken = WireInit(oe)
363  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
364  for (i <- 0 until numBr) {
365    old_entry_always_taken.always_taken(i) :=
366      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
367    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
368  }
369  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
370
371
372
373  val derived_from_old_entry =
374    Mux(is_new_br, old_entry_modified,
375      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
376
377
378  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
379
380  io.new_br_insert_pos := new_br_insert_onehot
381  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
382    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
383  })
384  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
385  for (i <- 0 until numBr) {
386    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
387  }
388  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
389
390  // for perf counters
391  io.is_init_entry := !hit
392  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
393  io.is_new_br := hit && is_new_br
394  io.is_jalr_target_modified := hit && jalr_target_modified
395  io.is_always_taken_modified := hit && always_taken_modified
396  io.is_br_full := hit && is_new_br && may_have_to_replace
397}
398
399class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
400  val io = IO(new Bundle {
401    val ifuPtr_w       = Input(new FtqPtr)
402    val ifuPtrPlus1_w  = Input(new FtqPtr)
403    val ifuPtrPlus2_w  = Input(new FtqPtr)
404    val commPtr_w      = Input(new FtqPtr)
405    val commPtrPlus1_w = Input(new FtqPtr)
406    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
407    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
408    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
409    val commPtr_rdata      = Output(new Ftq_RF_Components)
410    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
411
412    val other_raddrs = Input(Vec(numOtherReads, UInt(log2Ceil(FtqSize).W)))
413    val other_rdatas = Output(Vec(numOtherReads, new Ftq_RF_Components))
414
415    val wen = Input(Bool())
416    val waddr = Input(UInt(log2Ceil(FtqSize).W))
417    val wdata = Input(new Ftq_RF_Components)
418  })
419
420  val num_pc_read = numOtherReads + 5
421  val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize,
422    num_pc_read, 1, "FtqPC"))
423  mem.io.wen(0)   := io.wen
424  mem.io.waddr(0) := io.waddr
425  mem.io.wdata(0) := io.wdata
426
427  // read one cycle ahead for ftq local reads
428  val raddr_vec = VecInit(io.other_raddrs ++
429    Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value, io.commPtrPlus1_w.value, io.commPtr_w.value))
430
431  mem.io.raddr := raddr_vec
432
433  io.other_rdatas       := mem.io.rdata.dropRight(5)
434  io.ifuPtr_rdata       := mem.io.rdata.dropRight(4).last
435  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(3).last
436  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(2).last
437  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
438  io.commPtr_rdata      := mem.io.rdata.last
439}
440
441class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
442  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
443  with HasICacheParameters{
444  val io = IO(new Bundle {
445    val fromBpu = Flipped(new BpuToFtqIO)
446    val fromIfu = Flipped(new IfuToFtqIO)
447    val fromBackend = Flipped(new CtrlToFtqIO)
448
449    val toBpu = new FtqToBpuIO
450    val toIfu = new FtqToIfuIO
451    val toICache = new FtqToICacheIO
452    val toBackend = new FtqToCtrlIO
453
454    val toPrefetch = new FtqPrefechBundle
455
456    val bpuInfo = new Bundle {
457      val bpRight = Output(UInt(XLEN.W))
458      val bpWrong = Output(UInt(XLEN.W))
459    }
460
461    val mmioCommitRead = Flipped(new mmioCommitRead)
462
463    // for perf
464    val ControlBTBMissBubble = Output(Bool())
465    val TAGEMissBubble = Output(Bool())
466    val SCMissBubble = Output(Bool())
467    val ITTAGEMissBubble = Output(Bool())
468    val RASMissBubble = Output(Bool())
469  })
470  io.bpuInfo := DontCare
471
472  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
473  dontTouch(topdown_stage)
474  // only driven by clock, not valid-ready
475  topdown_stage := io.fromBpu.resp.bits.topdown_info
476  io.toIfu.req.bits.topdown_info := topdown_stage
477
478  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
479
480  val backendRedirect = Wire(Valid(new BranchPredictionRedirect))
481  val backendRedirectReg = RegNext(backendRedirect)
482
483  val stage2Flush = backendRedirect.valid
484  val backendFlush = stage2Flush || RegNext(stage2Flush)
485  val ifuFlush = Wire(Bool())
486
487  val flush = stage2Flush || RegNext(stage2Flush)
488
489  val allowBpuIn, allowToIfu = WireInit(false.B)
490  val flushToIfu = !allowToIfu
491  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
492  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
493
494  def copyNum = 5
495  val bpuPtr, ifuPtr, ifuWbPtr, commPtr, robCommPtr = RegInit(FtqPtr(false.B, 0.U))
496  val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
497  val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U))
498  val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
499  val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
500  val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
501  require(FtqSize >= 4)
502  val ifuPtr_write       = WireInit(ifuPtr)
503  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
504  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
505  val ifuWbPtr_write     = WireInit(ifuWbPtr)
506  val commPtr_write      = WireInit(commPtr)
507  val commPtrPlus1_write = WireInit(commPtrPlus1)
508  val robCommPtr_write   = WireInit(robCommPtr)
509  ifuPtr       := ifuPtr_write
510  ifuPtrPlus1  := ifuPtrPlus1_write
511  ifuPtrPlus2  := ifuPtrPlus2_write
512  ifuWbPtr     := ifuWbPtr_write
513  commPtr      := commPtr_write
514  commPtrPlus1 := commPtrPlus1_write
515  copied_ifu_ptr.map{ptr =>
516    ptr := ifuPtr_write
517    dontTouch(ptr)
518  }
519  robCommPtr   := robCommPtr_write
520  val validEntries = distanceBetween(bpuPtr, commPtr)
521  val canCommit = Wire(Bool())
522
523  // **********************************************************************
524  // **************************** enq from bpu ****************************
525  // **********************************************************************
526  val new_entry_ready = validEntries < FtqSize.U || canCommit
527  io.fromBpu.resp.ready := new_entry_ready
528
529  val bpu_s2_resp = io.fromBpu.resp.bits.s2
530  val bpu_s3_resp = io.fromBpu.resp.bits.s3
531  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
532  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
533
534  io.toBpu.enq_ptr := bpuPtr
535  val enq_fire = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
536  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
537
538  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
539  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdxForFtq
540  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
541  val bpu_in_resp_idx = bpu_in_resp_ptr.value
542
543  // read ports:      prefetchReq ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
544  val ftq_pc_mem = Module(new FtqPcMemWrapper(1))
545  // resp from uBTB
546  ftq_pc_mem.io.wen := bpu_in_fire
547  ftq_pc_mem.io.waddr := bpu_in_resp_idx
548  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
549
550  //                                                            ifuRedirect + backendRedirect + commit
551  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
552  // these info is intended to enq at the last stage of bpu
553  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
554  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
555  ftq_redirect_sram.io.wdata := io.fromBpu.resp.bits.last_stage_spec_info
556  println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3")
557  println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3")
558
559  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
560  // these info is intended to enq at the last stage of bpu
561  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
562  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
563  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta
564  //                                                            ifuRedirect + backendRedirect + commit
565  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
566  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid(3)
567  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
568  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
569
570
571  // multi-write
572  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
573  val newest_entry_target = Reg(UInt(VAddrBits.W))
574  val newest_entry_ptr = Reg(new FtqPtr)
575  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
576  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
577  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
578  val pred_s1_cycle = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
579
580  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
581  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
582    VecInit(Seq.fill(PredictWidth)(c_invalid))
583  }))
584
585  val f_to_send :: f_sent :: Nil = Enum(2)
586  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
587
588  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
589  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
590
591  // modify registers one cycle later to cut critical path
592  val last_cycle_bpu_in = RegNext(bpu_in_fire)
593  val last_cycle_bpu_in_ptr = RegNext(bpu_in_resp_ptr)
594  val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value
595  val last_cycle_bpu_target = RegNext(bpu_in_resp.getTarget(3))
596  val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex(3))
597  val last_cycle_bpu_in_stage = RegNext(bpu_in_stage)
598
599  def extra_copyNum_for_commitStateQueue = 2
600  val copied_last_cycle_bpu_in = VecInit(Seq.fill(copyNum+extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
601  val copied_last_cycle_bpu_in_ptr_for_ftq = VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_resp_ptr)))
602
603  when (last_cycle_bpu_in) {
604    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
605    cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
606    pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
607
608    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
609    newest_entry_target := last_cycle_bpu_target
610    newest_entry_ptr := last_cycle_bpu_in_ptr
611  }
612
613  // reduce fanout by delay write for a cycle
614  when (RegNext(last_cycle_bpu_in)) {
615    mispredict_vec(RegNext(last_cycle_bpu_in_idx)) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
616  }
617
618  // record s1 pred cycles
619  pred_s1_cycle.map(vec => {
620    when (bpu_in_fire && (bpu_in_stage === BP_S1)) {
621      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
622    }
623  })
624
625  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
626  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
627  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
628    case ((in, ptr), i) =>
629      when (in) {
630        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
631        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
632        for (j <- 0 until perSetEntries) {
633          when (ptr.value === (i*perSetEntries+j).U) {
634            commitStateQueue(i*perSetEntries+j) := VecInit(Seq.fill(PredictWidth)(c_invalid))
635          }
636        }
637      }
638  }
639
640  // num cycle is fixed
641  io.toBackend.newest_entry_ptr := RegNext(newest_entry_ptr)
642  io.toBackend.newest_entry_target := RegNext(newest_entry_target)
643
644
645  bpuPtr := bpuPtr + enq_fire
646  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
647  when (io.toIfu.req.fire && allowToIfu) {
648    ifuPtr_write := ifuPtrPlus1
649    ifuPtrPlus1_write := ifuPtrPlus2
650    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
651  }
652
653  // only use ftb result to assign hit status
654  when (bpu_s2_resp.valid(3)) {
655    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
656  }
657
658
659  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
660  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
661  when (bpu_s2_redirect) {
662    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
663    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
664    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
665    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
666      ifuPtr_write := bpu_s2_resp.ftq_idx
667      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
668      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
669    }
670  }
671
672  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
673  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
674  when (bpu_s3_redirect) {
675    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
676    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
677    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
678    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
679      ifuPtr_write := bpu_s3_resp.ftq_idx
680      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
681      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
682    }
683  }
684
685  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
686  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
687
688  (0 until copyNum).map{i =>
689    XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n")
690  }
691
692  // ****************************************************************
693  // **************************** to ifu ****************************
694  // ****************************************************************
695  // 0  for ifu, and 1-4 for ICache
696  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
697  val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
698  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
699  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
700  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
701
702  val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegNext(bpu_in_resp_ptr)))
703  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
704
705  // read pc and target
706  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
707  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
708  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
709  ftq_pc_mem.io.commPtr_w      := commPtr_write
710  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
711
712
713  io.toIfu.req.bits.ftqIdx := ifuPtr
714
715  val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components))
716  val toICacheEntryToSend = Wire(Vec(copyNum,Bool()))
717  val toIfuPcBundle = Wire(new Ftq_RF_Components)
718  val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
719  val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
720  val entry_next_addr  = Wire(UInt(VAddrBits.W))
721
722  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
723  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
724  val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this
725
726  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1))))
727  val copied_ifu_ptr_to_send   = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
728
729  for(i <- 0 until copyNum){
730    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){
731      toICachePcBundle(i) := copied_bpu_in_bypass_buf(i)
732      toICacheEntryToSend(i)   := true.B
733    }.elsewhen(copied_last_cycle_to_ifu_fire(i)){
734      toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i)
735      toICacheEntryToSend(i)   := copied_ifu_plus1_to_send(i)
736    }.otherwise{
737      toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i)
738      toICacheEntryToSend(i)   := copied_ifu_ptr_to_send(i)
739    }
740  }
741
742  // TODO: reconsider target address bypass logic
743  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
744    toIfuPcBundle := bpu_in_bypass_buf_for_ifu
745    entry_is_to_send := true.B
746    entry_next_addr := last_cycle_bpu_target
747    entry_ftq_offset := last_cycle_cfiIndex
748    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
749  }.elsewhen (last_cycle_to_ifu_fire) {
750    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
751    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
752                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
753    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
754                          bpu_in_bypass_buf_for_ifu.startAddr,
755                          Mux(ifuPtr === newest_entry_ptr,
756                            newest_entry_target,
757                            RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2
758  }.otherwise {
759    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
760    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
761                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
762    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
763                          bpu_in_bypass_buf_for_ifu.startAddr,
764                          Mux(ifuPtr === newest_entry_ptr,
765                            newest_entry_target,
766                            RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1
767  }
768
769  io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
770  io.toIfu.req.bits.nextStartAddr := entry_next_addr
771  io.toIfu.req.bits.ftqOffset := entry_ftq_offset
772  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
773
774  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
775  io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)}
776  io.toICache.req.bits.pcMemRead.zipWithIndex.map{case(copy,i) => copy.fromFtqPcBundle(toICachePcBundle(i))}
777  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
778  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
779  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
780  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
781  // }
782
783  // TODO: remove this
784  XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
785          p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n")
786
787  // when fall through is smaller in value than start address, there must be a false hit
788  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
789    when (io.toIfu.req.fire &&
790      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
791      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
792    ) {
793      entry_hit_status(ifuPtr.value) := h_false_hit
794      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
795    }
796    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
797  }
798
799  XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
800    io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr))
801
802  val ifu_req_should_be_flushed =
803    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
804    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
805
806    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
807      entry_fetch_status(ifuPtr.value) := f_sent
808    }
809
810  // *********************************************************************
811  // **************************** wb from ifu ****************************
812  // *********************************************************************
813  val pdWb = io.fromIfu.pdWb
814  val pds = pdWb.bits.pd
815  val ifu_wb_valid = pdWb.valid
816  val ifu_wb_idx = pdWb.bits.ftqIdx.value
817  // read ports:                                                         commit update
818  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
819  ftq_pd_mem.io.wen(0) := ifu_wb_valid
820  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
821  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
822
823  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
824  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
825  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
826  val pd_reg       = RegEnable(pds,             pdWb.valid)
827  val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid)
828  val wb_idx_reg   = RegEnable(ifu_wb_idx,      pdWb.valid)
829
830  when (ifu_wb_valid) {
831    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
832      case (v, inRange) => v && inRange
833    })
834    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
835      case (qe, v) => when (v) { qe := c_valid }
836    }
837  }
838
839  when (ifu_wb_valid) {
840    ifuWbPtr_write := ifuWbPtr + 1.U
841  }
842
843  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
844
845  ftb_entry_mem.io.raddr.head := ifu_wb_idx
846  val has_false_hit = WireInit(false.B)
847  when (RegNext(hit_pd_valid)) {
848    // check for false hit
849    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
850    val brSlots = pred_ftb_entry.brSlots
851    val tailSlot = pred_ftb_entry.tailSlot
852    // we check cfis that bpu predicted
853
854    // bpu predicted branches but denied by predecode
855    val br_false_hit =
856      brSlots.map{
857        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
858      }.reduce(_||_) ||
859      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
860        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
861
862    val jmpOffset = tailSlot.offset
863    val jmp_pd = pd_reg(jmpOffset)
864    val jal_false_hit = pred_ftb_entry.jmpValid &&
865      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
866       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
867       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
868       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
869      )
870
871    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
872    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
873
874    // assert(!has_false_hit)
875  }
876
877  when (has_false_hit) {
878    entry_hit_status(wb_idx_reg) := h_false_hit
879  }
880
881
882  // **********************************************************************
883  // ***************************** to backend *****************************
884  // **********************************************************************
885  // to backend pc mem / target
886  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
887  io.toBackend.pc_mem_waddr := RegNext(last_cycle_bpu_in_idx)
888  io.toBackend.pc_mem_wdata := RegNext(bpu_in_bypass_buf_for_ifu)
889
890  // *******************************************************************************
891  // **************************** redirect from backend ****************************
892  // *******************************************************************************
893
894  // redirect read cfiInfo, couples to redirectGen s2
895  ftq_redirect_sram.io.ren.init.last := backendRedirect.valid
896  ftq_redirect_sram.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
897
898  ftb_entry_mem.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
899
900  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
901  val fromBackendRedirect = WireInit(backendRedirectReg)
902  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
903  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
904
905
906  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
907  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
908
909  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
910  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
911  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(r_ftb_entry.brSlots(0).offset === r_ftqOffset,
912      r_ftb_entry.brSlots(0).sc, r_ftb_entry.tailSlot.sc)
913
914  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
915    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
916      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
917      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
918
919    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
920        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
921  }.otherwise {
922    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
923    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
924  }
925
926
927  // ***************************************************************************
928  // **************************** redirect from ifu ****************************
929  // ***************************************************************************
930  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
931  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
932  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
933  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
934  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
935  fromIfuRedirect.bits.BTBMissBubble := true.B
936  fromIfuRedirect.bits.debugIsMemVio := false.B
937  fromIfuRedirect.bits.debugIsCtrl := false.B
938
939  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
940  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
941  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
942  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
943  ifuRedirectCfiUpdate.target := pdWb.bits.target
944  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
945  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
946
947  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
948  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
949  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
950
951  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
952  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
953
954  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
955
956  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
957  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
958  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
959    toBpuCfi.target := toBpuCfi.topAddr
960  }
961
962  when (ifuRedirectReg.valid) {
963    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
964  } .elsewhen(RegNext(pdWb.valid)) {
965    // if pdWb and no redirect, set to false
966    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
967  }
968
969  // *********************************************************************
970  // **************************** wb from exu ****************************
971  // *********************************************************************
972
973  backendRedirect.valid := io.fromBackend.redirect.valid
974  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
975  backendRedirect.bits.BTBMissBubble := false.B
976
977
978  def extractRedirectInfo(wb: Valid[Redirect]) = {
979    val ftqPtr = wb.bits.ftqIdx
980    val ftqOffset = wb.bits.ftqOffset
981    val taken = wb.bits.cfiUpdate.taken
982    val mispred = wb.bits.cfiUpdate.isMisPred
983    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
984  }
985
986  // fix mispredict entry
987  val lastIsMispredict = RegNext(
988    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
989  )
990
991  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
992    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
993    val r_idx = r_ptr.value
994    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
995    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
996    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
997      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
998    } .elsewhen (r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
999      cfiIndex_vec(r_idx).valid :=false.B
1000    }
1001    when (cfiIndex_bits_wen) {
1002      cfiIndex_vec(r_idx).bits := r_offset
1003    }
1004    newest_entry_target := redirect.bits.cfiUpdate.target
1005    newest_entry_ptr := r_ptr
1006    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1007    if (isBackend) {
1008      mispredict_vec(r_idx)(r_offset) := r_mispred
1009    }
1010  }
1011
1012  when(backendRedirectReg.valid) {
1013    updateCfiInfo(backendRedirectReg)
1014  }.elsewhen (ifuRedirectToBpu.valid) {
1015    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
1016  }
1017
1018  when (backendRedirectReg.valid) {
1019    when (backendRedirectReg.bits.ControlRedirectBubble) {
1020      when (fromBackendRedirect.bits.ControlBTBMissBubble) {
1021        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1022        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1023      } .elsewhen (fromBackendRedirect.bits.TAGEMissBubble) {
1024        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1025        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1026      } .elsewhen (fromBackendRedirect.bits.SCMissBubble) {
1027        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
1028        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1029      } .elsewhen (fromBackendRedirect.bits.ITTAGEMissBubble) {
1030        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1031        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1032      } .elsewhen (fromBackendRedirect.bits.RASMissBubble) {
1033        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
1034        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1035      }
1036
1037
1038    } .elsewhen (backendRedirectReg.bits.MemVioRedirectBubble) {
1039      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1040      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1041    } .otherwise {
1042      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1043      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1044    }
1045  } .elsewhen (ifuRedirectReg.valid) {
1046    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1047    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1048  }
1049
1050  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1051  io.TAGEMissBubble := fromBackendRedirect.bits.TAGEMissBubble
1052  io.SCMissBubble := fromBackendRedirect.bits.SCMissBubble
1053  io.ITTAGEMissBubble := fromBackendRedirect.bits.ITTAGEMissBubble
1054  io.RASMissBubble := fromBackendRedirect.bits.RASMissBubble
1055
1056  // ***********************************************************************************
1057  // **************************** flush ptr and state queue ****************************
1058  // ***********************************************************************************
1059
1060  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1061
1062  // when redirect, we should reset ptrs and status queues
1063  when(redirectVec.map(r => r.valid).reduce(_||_)){
1064    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1065    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1066    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1067    val next = idx + 1.U
1068    bpuPtr := next
1069    copied_bpu_ptr.map(_ := next)
1070    ifuPtr_write := next
1071    ifuWbPtr_write := next
1072    ifuPtrPlus1_write := idx + 2.U
1073    ifuPtrPlus2_write := idx + 3.U
1074
1075  }
1076  when(RegNext(redirectVec.map(r => r.valid).reduce(_||_))){
1077    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1078    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1079    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1080    when (RegNext(notIfu)) {
1081      commitStateQueue(RegNext(idx.value)).zipWithIndex.foreach({ case (s, i) =>
1082        when(i.U > RegNext(offset) || i.U === RegNext(offset) && RegNext(flushItSelf)){
1083          s := c_invalid
1084        }
1085      })
1086    }
1087  }
1088
1089
1090  // only the valid bit is actually needed
1091  io.toIfu.redirect.bits    := backendRedirect.bits
1092  io.toIfu.redirect.valid   := stage2Flush
1093  io.toIfu.topdown_redirect := fromBackendRedirect
1094
1095  // commit
1096  for (c <- io.fromBackend.rob_commits) {
1097    when(c.valid) {
1098      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
1099      // TODO: remove this
1100      // For instruction fusions, we also update the next instruction
1101      when (c.bits.commitType === 4.U) {
1102        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
1103      }.elsewhen(c.bits.commitType === 5.U) {
1104        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
1105      }.elsewhen(c.bits.commitType === 6.U) {
1106        val index = (c.bits.ftqIdx + 1.U).value
1107        commitStateQueue(index)(0) := c_commited
1108      }.elsewhen(c.bits.commitType === 7.U) {
1109        val index = (c.bits.ftqIdx + 1.U).value
1110        commitStateQueue(index)(1) := c_commited
1111      }
1112    }
1113  }
1114
1115  robCommPtr_write := Mux(io.fromBackend.rob_commits.map(_.valid).reduce(_ | _), ParallelPriorityMux(io.fromBackend.rob_commits.map(_.valid).reverse, io.fromBackend.rob_commits.map(_.bits.ftqIdx).reverse), robCommPtr)
1116
1117  // ****************************************************************
1118  // **************************** to bpu ****************************
1119  // ****************************************************************
1120
1121  io.toBpu.redirect := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1122  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_=>0.U(64.W)))
1123  val redirect_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1124  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1125  XSPerfHistogram("ifu_redirect_latency", redirect_latency, !fromBackendRedirect.valid && ifuRedirectToBpu.valid, 0, 60, 1)
1126
1127  XSError(io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr), "Ftq received a redirect after its commit, check backend or replay")
1128
1129  val may_have_stall_from_bpu = Wire(Bool())
1130  val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1131  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1132  val notInvalidSeq = commitStateQueue(commPtr.value).map(s => s =/= c_invalid).reverse
1133  // Todo: @huxuan check it
1134  //  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1135  //    Cat(commitStateQueue(commPtr.value).map(s => {
1136  //      s === c_invalid || s === c_commited
1137  //    })).andR
1138  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1139    (isAfter(robCommPtr, commPtr) || PriorityMuxDefault(notInvalidSeq.zip(commitStateQueue(commPtr.value).reverse), c_invalid) === c_commited)
1140
1141  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1142  val mmioLastCommit = isBefore(commPtr, mmioReadPtr) && (isAfter(ifuPtr,mmioReadPtr)  ||  mmioReadPtr ===   ifuPtr) &&
1143                       Cat(commitStateQueue(mmioReadPtr.value).map(s => { s === c_invalid || s === c_commited})).andR
1144  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1145
1146  // commit reads
1147  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1148  val commit_target =
1149    Mux(RegNext(commPtr === newest_entry_ptr),
1150      RegNext(newest_entry_target),
1151      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr))
1152  ftq_pd_mem.io.raddr.last := commPtr.value
1153  val commit_pd = ftq_pd_mem.io.rdata.last
1154  ftq_redirect_sram.io.ren.last := canCommit
1155  ftq_redirect_sram.io.raddr.last := commPtr.value
1156  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
1157  ftq_meta_1r_sram.io.ren(0) := canCommit
1158  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1159  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
1160  ftb_entry_mem.io.raddr.last := commPtr.value
1161  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
1162
1163  // need one cycle to read mem and srams
1164  val do_commit_ptr = RegNext(commPtr)
1165  val do_commit = RegNext(canCommit, init=false.B)
1166  when (canCommit) {
1167    commPtr_write := commPtrPlus1
1168    commPtrPlus1_write := commPtrPlus1 + 1.U
1169  }
1170  val commit_state = RegNext(commitStateQueue(commPtr.value))
1171  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1172  val do_commit_cfi = WireInit(cfiIndex_vec(do_commit_ptr.value))
1173  //
1174  //when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1175  //  can_commit_cfi.valid := false.B
1176  //}
1177  val commit_cfi = RegNext(can_commit_cfi)
1178  val debug_cfi = commitStateQueue(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_commited && do_commit_cfi.valid
1179
1180  val commit_mispredict  : Vec[Bool] = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
1181    case (mis, state) => mis && state === c_commited
1182  })
1183  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_commited)) // [PredictWidth]
1184  val can_commit_hit                 = entry_hit_status(commPtr.value)
1185  val commit_hit                     = RegNext(can_commit_hit)
1186  val diff_commit_target             = RegNext(update_target(commPtr.value)) // TODO: remove this
1187  val commit_stage                   = RegNext(pred_stage(commPtr.value))
1188  val commit_valid                   = commit_hit === h_hit || commit_cfi.valid // hit or taken
1189
1190  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1191  switch (bpu_ftb_update_stall) {
1192    is (0.U) {
1193      when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1194        bpu_ftb_update_stall := 2.U // 2-cycle stall
1195      }
1196    }
1197    is (2.U) {
1198      bpu_ftb_update_stall := 1.U
1199    }
1200    is (1.U) {
1201      bpu_ftb_update_stall := 0.U
1202    }
1203    is (3.U) {
1204      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1205    }
1206  }
1207
1208  // TODO: remove this
1209  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1210
1211  // update latency stats
1212  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1213  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1214
1215  io.toBpu.update := DontCare
1216  io.toBpu.update.valid := commit_valid && do_commit
1217  val update = io.toBpu.update.bits
1218  update.false_hit   := commit_hit === h_false_hit
1219  update.pc          := commit_pc_bundle.startAddr
1220  update.meta        := commit_meta.meta
1221  update.cfi_idx     := commit_cfi
1222  update.full_target := commit_target
1223  update.from_stage  := commit_stage
1224  update.spec_info   := commit_spec_meta
1225  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1226
1227  val commit_real_hit = commit_hit === h_hit
1228  val update_ftb_entry = update.ftb_entry
1229
1230  val ftbEntryGen = Module(new FTBEntryGen).io
1231  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1232  ftbEntryGen.old_entry      := commit_ftb_entry
1233  ftbEntryGen.pd             := commit_pd
1234  ftbEntryGen.cfiIndex       := commit_cfi
1235  ftbEntryGen.target         := commit_target
1236  ftbEntryGen.hit            := commit_real_hit
1237  ftbEntryGen.mispredict_vec := commit_mispredict
1238
1239  update_ftb_entry         := ftbEntryGen.new_entry
1240  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1241  update.mispred_mask      := ftbEntryGen.mispred_mask
1242  update.old_entry         := ftbEntryGen.is_old_entry
1243  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1244  update.br_taken_mask     := ftbEntryGen.taken_mask
1245  update.br_committed      := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1246    case (valid, offset) => valid && commit_instCommited(offset)
1247  }
1248  update.jmp_taken         := ftbEntryGen.jmp_taken
1249
1250  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1251  // update.full_pred.jalr_target := commit_target
1252  // update.full_pred.hit := true.B
1253  // when (update.full_pred.is_jalr) {
1254  //   update.full_pred.targets.last := commit_target
1255  // }
1256
1257  // ****************************************************************
1258  // *********************** to prefetch ****************************
1259  // ****************************************************************
1260
1261  ftq_pc_mem.io.other_raddrs(0) := DontCare
1262  if(cacheParams.hasPrefetch){
1263    val prefetchPtr = RegInit(FtqPtr(false.B, 0.U))
1264    val diff_prefetch_addr = WireInit(update_target(prefetchPtr.value)) //TODO: remove this
1265    // TODO : MUST WIDER
1266    prefetchPtr := prefetchPtr + io.toPrefetch.req.fire
1267
1268    val prefetch_too_late = (isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr)) || (prefetchPtr === ifuPtr)
1269    when(prefetch_too_late){
1270      when(prefetchPtr =/= bpuPtr){
1271        prefetchPtr := bpuPtr - 1.U
1272      }.otherwise{
1273        prefetchPtr := ifuPtr
1274      }
1275    }
1276
1277    ftq_pc_mem.io.other_raddrs(0) := prefetchPtr.value
1278
1279    when (bpu_s2_redirect && !isBefore(prefetchPtr, bpu_s2_resp.ftq_idx)) {
1280      prefetchPtr := bpu_s2_resp.ftq_idx
1281    }
1282
1283    when (bpu_s3_redirect && !isBefore(prefetchPtr, bpu_s3_resp.ftq_idx)) {
1284      prefetchPtr := bpu_s3_resp.ftq_idx
1285      // XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
1286    }
1287
1288
1289    val prefetch_is_to_send = WireInit(entry_fetch_status(prefetchPtr.value) === f_to_send)
1290    val prefetch_addr = Wire(UInt(VAddrBits.W))
1291
1292    when (last_cycle_bpu_in && bpu_in_bypass_ptr === prefetchPtr) {
1293      prefetch_is_to_send := true.B
1294      prefetch_addr := last_cycle_bpu_target
1295      diff_prefetch_addr := last_cycle_bpu_target // TODO: remove this
1296    }.otherwise{
1297      prefetch_addr := RegNext( ftq_pc_mem.io.other_rdatas(0).startAddr)
1298    }
1299    io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && prefetch_is_to_send
1300    io.toPrefetch.req.bits.target := prefetch_addr
1301
1302    when(redirectVec.map(r => r.valid).reduce(_||_)){
1303      val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1304      val next = r.ftqIdx + 1.U
1305      prefetchPtr := next
1306    }
1307
1308    // TODO: remove this
1309    // XSError(io.toPrefetch.req.valid && diff_prefetch_addr =/= prefetch_addr,
1310    //         f"\nprefetch_req_target wrong! prefetchPtr: ${prefetchPtr}, prefetch_addr: ${Hexadecimal(prefetch_addr)} diff_prefetch_addr: ${Hexadecimal(diff_prefetch_addr)}\n")
1311
1312
1313    XSError(isBefore(bpuPtr, prefetchPtr) && !isFull(bpuPtr, prefetchPtr), "\nprefetchPtr is before bpuPtr!\n")
1314//    XSError(isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr), "\nifuPtr is before prefetchPtr!\n")
1315  }
1316  else {
1317    io.toPrefetch.req <> DontCare
1318  }
1319
1320  // ******************************************************************************
1321  // **************************** commit perf counters ****************************
1322  // ******************************************************************************
1323
1324  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
1325  val commit_mispred_mask = commit_mispredict.asUInt
1326  val commit_not_mispred_mask = ~commit_mispred_mask
1327
1328  val commit_br_mask = commit_pd.brMask.asUInt
1329  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1330  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
1331
1332  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1333
1334  val mbpRights = mbpInstrs & commit_not_mispred_mask
1335  val mbpWrongs = mbpInstrs & commit_mispred_mask
1336
1337  io.bpuInfo.bpRight := PopCount(mbpRights)
1338  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1339
1340  val isWriteFTQTable = WireInit(Constantin.createRecord("isWriteFTQTable" + p(XSCoreParamsKey).HartId.toString))
1341  val ftqBranchTraceDB = ChiselDB.createTable("FTQTable" + p(XSCoreParamsKey).HartId.toString, new FtqDebugBundle)
1342  // Cfi Info
1343  for (i <- 0 until PredictWidth) {
1344    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
1345    val v = commit_state(i) === c_commited
1346    val isBr = commit_pd.brMask(i)
1347    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1348    val isCfi = isBr || isJmp
1349    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1350    val misPred = commit_mispredict(i)
1351    // val ghist = commit_spec_meta.ghist.predHist
1352    val histPtr = commit_spec_meta.histPtr
1353    val predCycle = commit_meta.meta(63, 0)
1354    val target = commit_target
1355
1356    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1357    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1358    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1359    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1360    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1361    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1362    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1363
1364    val logbundle = Wire(new FtqDebugBundle)
1365    logbundle.pc := pc
1366    logbundle.target := target
1367    logbundle.isBr := isBr
1368    logbundle.isJmp := isJmp
1369    logbundle.isCall := isJmp && commit_pd.hasCall
1370    logbundle.isRet := isJmp && commit_pd.hasRet
1371    logbundle.misPred := misPred
1372    logbundle.isTaken := isTaken
1373    logbundle.predStage := commit_stage
1374
1375    ftqBranchTraceDB.log(
1376      data = logbundle /* hardware of type T */,
1377      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1378      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1379      clock = clock,
1380      reset = reset
1381    )
1382  }
1383
1384  val enq = io.fromBpu.resp
1385  val perf_redirect = backendRedirect
1386
1387  XSPerfAccumulate("entry", validEntries)
1388  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1389  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1390  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1391  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1392
1393  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1394
1395  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1396  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1397  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1398  XSPerfAccumulate("bpu_to_ifu_bubble_when_ftq_full", (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready)
1399
1400  val from_bpu = io.fromBpu.resp.bits
1401  val to_ifu = io.toIfu.req.bits
1402
1403
1404  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth+1, 1)
1405
1406
1407
1408
1409  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1410  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1411  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1412  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1413
1414
1415  val mbpBRights = mbpRights & commit_br_mask
1416  val mbpJRights = mbpRights & commit_jal_mask
1417  val mbpIRights = mbpRights & commit_jalr_mask
1418  val mbpCRights = mbpRights & commit_call_mask
1419  val mbpRRights = mbpRights & commit_ret_mask
1420
1421  val mbpBWrongs = mbpWrongs & commit_br_mask
1422  val mbpJWrongs = mbpWrongs & commit_jal_mask
1423  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1424  val mbpCWrongs = mbpWrongs & commit_call_mask
1425  val mbpRWrongs = mbpWrongs & commit_ret_mask
1426
1427  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1428
1429  def pred_stage_map(src: UInt, name: String) = {
1430    (0 until numBpStages).map(i =>
1431      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1432    ).foldLeft(Map[String, UInt]())(_+_)
1433  }
1434
1435  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1436  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1437  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1438  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1439  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1440  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1441
1442  val update_valid = io.toBpu.update.valid
1443  def u(cond: Bool) = update_valid && cond
1444  val ftb_false_hit = u(update.false_hit)
1445  // assert(!ftb_false_hit)
1446  val ftb_hit = u(commit_hit === h_hit)
1447
1448  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1449  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1450  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1451  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1452
1453  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1454
1455  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1456  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1457  val ftb_modified_entry_ifu_redirected = u(ifuRedirected(do_commit_ptr.value))
1458  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1459  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1460  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1461
1462  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1463  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1464  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth+1, 1)
1465  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth+1, 1)
1466  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1467  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth+1, 1)
1468
1469  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize+1, 1)
1470
1471  val perfCountsMap = Map(
1472    "BpInstr" -> PopCount(mbpInstrs),
1473    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1474    "BpRight"  -> PopCount(mbpRights),
1475    "BpWrong"  -> PopCount(mbpWrongs),
1476    "BpBRight" -> PopCount(mbpBRights),
1477    "BpBWrong" -> PopCount(mbpBWrongs),
1478    "BpJRight" -> PopCount(mbpJRights),
1479    "BpJWrong" -> PopCount(mbpJWrongs),
1480    "BpIRight" -> PopCount(mbpIRights),
1481    "BpIWrong" -> PopCount(mbpIWrongs),
1482    "BpCRight" -> PopCount(mbpCRights),
1483    "BpCWrong" -> PopCount(mbpCWrongs),
1484    "BpRRight" -> PopCount(mbpRRights),
1485    "BpRWrong" -> PopCount(mbpRWrongs),
1486
1487    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1488    "ftb_hit"                      -> PopCount(ftb_hit),
1489    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1490    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1491    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1492    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1493    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1494    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1495    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1496    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1497    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1498    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1499  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1500       correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1501
1502  for((key, value) <- perfCountsMap) {
1503    XSPerfAccumulate(key, value)
1504  }
1505
1506  // --------------------------- Debug --------------------------------
1507  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1508  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1509  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1510  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1511  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1512    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1513  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1514
1515  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1516  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1517  //       case (((valid, pd), ans), taken) =>
1518  //       Mux(valid && pd.isBr,
1519  //         isWrong ^ Mux(ans.hit.asBool,
1520  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1521  //           !taken),
1522  //         !taken),
1523  //       false.B)
1524  //     }
1525  //   }
1526
1527  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1528  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1529  //       case (((valid, pd), ans), taken) =>
1530  //       Mux(valid && pd.isBr,
1531  //         isWrong ^ Mux(ans.hit.asBool,
1532  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1533  //           !taken),
1534  //         !taken),
1535  //       false.B)
1536  //     }
1537  //   }
1538
1539  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1540  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1541  //       case (((valid, pd), ans), taken) =>
1542  //       Mux(valid && pd.isBr,
1543  //         isWrong ^ (ans.taken.asBool === taken),
1544  //       false.B)
1545  //     }
1546  //   }
1547
1548  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1549  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1550  //       case (((valid, pd), ans), taken) =>
1551  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1552  //         isWrong ^ (!taken),
1553  //           false.B)
1554  //     }
1555  //   }
1556
1557  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1558  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1559  //       case (((valid, pd), ans), taken) =>
1560  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1561  //         isWrong ^ (ans.target === commitEntry.target),
1562  //           false.B)
1563  //     }
1564  //   }
1565
1566  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1567  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1568  //   // btb and ubtb pred jal and jalr as well
1569  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1570  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1571  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1572  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1573
1574  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1575  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1576
1577  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1578  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1579
1580  val perfEvents = Seq(
1581    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1582    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1583    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1584    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1585    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1586    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1587    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1588    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1589    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1590    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1591    ("BpRight                ", PopCount(mbpRights)                                                         ),
1592    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1593    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1594    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1595    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1596    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1597    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1598    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1599    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1600    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1601    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1602    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1603    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1604    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1605  )
1606  generatePerfEvent()
1607}
1608