xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision a63155a6a44b3c7714e55906b55ebf92e0efc125)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.frontend.icache._
26import xiangshan.backend.decode.ImmUnion
27import utility.ChiselDB
28import xiangshan.backend.CtrlToFtqIO
29
30class FtqDebugBundle extends Bundle {
31  val pc = UInt(39.W)
32  val target = UInt(39.W)
33  val isBr = Bool()
34  val isJmp = Bool()
35  val isCall = Bool()
36  val isRet = Bool()
37  val misPred = Bool()
38  val isTaken = Bool()
39  val predStage = UInt(2.W)
40}
41
42class FtqPtr(entries: Int) extends CircularQueuePtr[FtqPtr](
43  entries
44){
45  def this()(implicit p: Parameters) = this(p(XSCoreParamsKey).FtqSize)
46}
47
48object FtqPtr {
49  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
50    val ptr = Wire(new FtqPtr)
51    ptr.flag := f
52    ptr.value := v
53    ptr
54  }
55  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
56    apply(!ptr.flag, ptr.value)
57  }
58}
59
60class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
61
62  val io = IO(new Bundle() {
63    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
64    val ren = Input(Vec(numRead, Bool()))
65    val rdata = Output(Vec(numRead, gen))
66    val waddr = Input(UInt(log2Up(FtqSize).W))
67    val wen = Input(Bool())
68    val wdata = Input(gen)
69  })
70
71  for(i <- 0 until numRead){
72    val sram = Module(new SRAMTemplate(gen, FtqSize))
73    sram.io.r.req.valid := io.ren(i)
74    sram.io.r.req.bits.setIdx := io.raddr(i)
75    io.rdata(i) := sram.io.r.resp.data(0)
76    sram.io.w.req.valid := io.wen
77    sram.io.w.req.bits.setIdx := io.waddr
78    sram.io.w.req.bits.data := VecInit(io.wdata)
79  }
80
81}
82
83class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
84  val startAddr = UInt(VAddrBits.W)
85  val nextLineAddr = UInt(VAddrBits.W)
86  val isNextMask = Vec(PredictWidth, Bool())
87  val fallThruError = Bool()
88  // val carry = Bool()
89  def getPc(offset: UInt) = {
90    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
91    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
92    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
93        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
94  }
95  def fromBranchPrediction(resp: BranchPredictionBundle) = {
96    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
97    this.startAddr := resp.pc
98    this.nextLineAddr := resp.pc + (FetchWidth * 4 * 2).U // may be broken on other configs
99    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
100      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
101    ))
102    this.fallThruError := resp.fallThruError
103    this
104  }
105  override def toPrintable: Printable = {
106    p"startAddr:${Hexadecimal(startAddr)}"
107  }
108}
109
110class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
111  val brMask = Vec(PredictWidth, Bool())
112  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
113  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
114  val jalTarget = UInt(VAddrBits.W)
115  val rvcMask = Vec(PredictWidth, Bool())
116  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
117  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
118  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
119  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
120
121  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
122    val pds = pdWb.pd
123    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
124    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
125    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
126                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
127    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
128    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
129    this.jalTarget := pdWb.jalTarget
130  }
131
132  def toPd(offset: UInt) = {
133    require(offset.getWidth == log2Ceil(PredictWidth))
134    val pd = Wire(new PreDecodeInfo)
135    pd.valid := true.B
136    pd.isRVC := rvcMask(offset)
137    val isBr = brMask(offset)
138    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
139    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
140    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
141    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
142    pd
143  }
144}
145
146
147
148class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {}
149
150class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
151  val meta = UInt(MaxMetaLength.W)
152}
153
154class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
155  val target = UInt(VAddrBits.W)
156  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
157}
158
159
160class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
161  val ptr = Output(new FtqPtr)
162  val offset = Output(UInt(log2Ceil(PredictWidth).W))
163  val data = Input(gen)
164  def apply(ptr: FtqPtr, offset: UInt) = {
165    this.ptr := ptr
166    this.offset := offset
167    this.data
168  }
169}
170
171
172class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
173  val redirect = Valid(new BranchPredictionRedirect)
174  val update = Valid(new BranchPredictionUpdate)
175  val enq_ptr = Output(new FtqPtr)
176}
177
178class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
179  val req = Decoupled(new FetchRequestBundle)
180  val redirect = Valid(new BranchPredictionRedirect)
181  val topdown_redirect = Valid(new BranchPredictionRedirect)
182  val flushFromBpu = new Bundle {
183    // when ifu pipeline is not stalled,
184    // a packet from bpu s3 can reach f1 at most
185    val s2 = Valid(new FtqPtr)
186    val s3 = Valid(new FtqPtr)
187    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
188      src.valid && !isAfter(src.bits, idx_to_flush)
189    }
190    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
191    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
192  }
193}
194
195class FtqToICacheIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
196  //NOTE: req.bits must be prepare in T cycle
197  // while req.valid is set true in T + 1 cycle
198  val req = Decoupled(new FtqToICacheRequestBundle)
199}
200
201trait HasBackendRedirectInfo extends HasXSParameter {
202  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
203}
204
205class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
206  // write to backend pc mem
207  val pc_mem_wen = Output(Bool())
208  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
209  val pc_mem_wdata = Output(new Ftq_RF_Components)
210  // newest target
211  val newest_entry_target = Output(UInt(VAddrBits.W))
212  val newest_entry_ptr = Output(new FtqPtr)
213}
214
215
216class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
217  val io = IO(new Bundle {
218    val start_addr = Input(UInt(VAddrBits.W))
219    val old_entry = Input(new FTBEntry)
220    val pd = Input(new Ftq_pd_Entry)
221    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
222    val target = Input(UInt(VAddrBits.W))
223    val hit = Input(Bool())
224    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
225
226    val new_entry = Output(new FTBEntry)
227    val new_br_insert_pos = Output(Vec(numBr, Bool()))
228    val taken_mask = Output(Vec(numBr, Bool()))
229    val jmp_taken = Output(Bool())
230    val mispred_mask = Output(Vec(numBr+1, Bool()))
231
232    // for perf counters
233    val is_init_entry = Output(Bool())
234    val is_old_entry = Output(Bool())
235    val is_new_br = Output(Bool())
236    val is_jalr_target_modified = Output(Bool())
237    val is_always_taken_modified = Output(Bool())
238    val is_br_full = Output(Bool())
239  })
240
241  // no mispredictions detected at predecode
242  val hit = io.hit
243  val pd = io.pd
244
245  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
246
247
248  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
249  val entry_has_jmp = pd.jmpInfo.valid
250  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
251  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
252  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
253  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
254  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
255  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
256
257  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
258  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
259
260  def carryPos = log2Ceil(PredictWidth)+instOffsetBits
261  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
262  // if not hit, establish a new entry
263  init_entry.valid := true.B
264  // tag is left for ftb to assign
265
266  // case br
267  val init_br_slot = init_entry.getSlotForBr(0)
268  when (cfi_is_br) {
269    init_br_slot.valid := true.B
270    init_br_slot.offset := io.cfiIndex.bits
271    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
272    init_entry.always_taken(0) := true.B // set to always taken on init
273  }
274
275  // case jmp
276  when (entry_has_jmp) {
277    init_entry.tailSlot.offset := pd.jmpOffset
278    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
279    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
280  }
281
282  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
283  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
284  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B)
285  init_entry.isJalr := new_jmp_is_jalr
286  init_entry.isCall := new_jmp_is_call
287  init_entry.isRet  := new_jmp_is_ret
288  // that means fall thru points to the middle of an inst
289  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset)
290
291  // if hit, check whether a new cfi(only br is possible) is detected
292  val oe = io.old_entry
293  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
294  val br_recorded = br_recorded_vec.asUInt.orR
295  val is_new_br = cfi_is_br && !br_recorded
296  val new_br_offset = io.cfiIndex.bits
297  // vec(i) means new br will be inserted BEFORE old br(i)
298  val allBrSlotsVec = oe.allSlotsForBr
299  val new_br_insert_onehot = VecInit((0 until numBr).map{
300    i => i match {
301      case 0 =>
302        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
303      case idx =>
304        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
305        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
306    }
307  })
308
309  val old_entry_modified = WireInit(io.old_entry)
310  for (i <- 0 until numBr) {
311    val slot = old_entry_modified.allSlotsForBr(i)
312    when (new_br_insert_onehot(i)) {
313      slot.valid := true.B
314      slot.offset := new_br_offset
315      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
316      old_entry_modified.always_taken(i) := true.B
317    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
318      old_entry_modified.always_taken(i) := false.B
319      // all other fields remain unchanged
320    }.otherwise {
321      // case i == 0, remain unchanged
322      if (i != 0) {
323        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
324        when (!noNeedToMoveFromFormerSlot) {
325          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
326          old_entry_modified.always_taken(i) := oe.always_taken(i)
327        }
328      }
329    }
330  }
331
332  // two circumstances:
333  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
334  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
335  //        the previous last br or the new br
336  val may_have_to_replace = oe.noEmptySlotForNewBr
337  val pft_need_to_change = is_new_br && may_have_to_replace
338  // it should either be the given last br or the new br
339  when (pft_need_to_change) {
340    val new_pft_offset =
341      Mux(!new_br_insert_onehot.asUInt.orR,
342        new_br_offset, oe.allSlotsForBr.last.offset)
343
344    // set jmp to invalid
345    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
346    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
347    old_entry_modified.last_may_be_rvi_call := false.B
348    old_entry_modified.isCall := false.B
349    old_entry_modified.isRet := false.B
350    old_entry_modified.isJalr := false.B
351  }
352
353  val old_entry_jmp_target_modified = WireInit(oe)
354  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
355  val old_tail_is_jmp = !oe.tailSlot.sharing
356  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
357  when (jalr_target_modified) {
358    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
359    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
360  }
361
362  val old_entry_always_taken = WireInit(oe)
363  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
364  for (i <- 0 until numBr) {
365    old_entry_always_taken.always_taken(i) :=
366      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
367    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
368  }
369  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
370
371
372
373  val derived_from_old_entry =
374    Mux(is_new_br, old_entry_modified,
375      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
376
377
378  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
379
380  io.new_br_insert_pos := new_br_insert_onehot
381  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
382    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
383  })
384  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
385  for (i <- 0 until numBr) {
386    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
387  }
388  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
389
390  // for perf counters
391  io.is_init_entry := !hit
392  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
393  io.is_new_br := hit && is_new_br
394  io.is_jalr_target_modified := hit && jalr_target_modified
395  io.is_always_taken_modified := hit && always_taken_modified
396  io.is_br_full := hit && is_new_br && may_have_to_replace
397}
398
399class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
400  val io = IO(new Bundle {
401    val ifuPtr_w       = Input(new FtqPtr)
402    val ifuPtrPlus1_w  = Input(new FtqPtr)
403    val ifuPtrPlus2_w  = Input(new FtqPtr)
404    val commPtr_w      = Input(new FtqPtr)
405    val commPtrPlus1_w = Input(new FtqPtr)
406    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
407    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
408    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
409    val commPtr_rdata      = Output(new Ftq_RF_Components)
410    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
411
412    val other_raddrs = Input(Vec(numOtherReads, UInt(log2Ceil(FtqSize).W)))
413    val other_rdatas = Output(Vec(numOtherReads, new Ftq_RF_Components))
414
415    val wen = Input(Bool())
416    val waddr = Input(UInt(log2Ceil(FtqSize).W))
417    val wdata = Input(new Ftq_RF_Components)
418  })
419
420  val num_pc_read = numOtherReads + 5
421  val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize,
422    num_pc_read, 1, "FtqPC"))
423  mem.io.wen(0)   := io.wen
424  mem.io.waddr(0) := io.waddr
425  mem.io.wdata(0) := io.wdata
426
427  // read one cycle ahead for ftq local reads
428  val raddr_vec = VecInit(io.other_raddrs ++
429    Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value, io.commPtrPlus1_w.value, io.commPtr_w.value))
430
431  mem.io.raddr := raddr_vec
432
433  io.other_rdatas       := mem.io.rdata.dropRight(5)
434  io.ifuPtr_rdata       := mem.io.rdata.dropRight(4).last
435  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(3).last
436  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(2).last
437  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
438  io.commPtr_rdata      := mem.io.rdata.last
439}
440
441class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
442  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
443  with HasICacheParameters{
444  val io = IO(new Bundle {
445    val fromBpu = Flipped(new BpuToFtqIO)
446    val fromIfu = Flipped(new IfuToFtqIO)
447    val fromBackend = Flipped(new CtrlToFtqIO)
448
449    val toBpu = new FtqToBpuIO
450    val toIfu = new FtqToIfuIO
451    val toICache = new FtqToICacheIO
452    val toBackend = new FtqToCtrlIO
453
454    val toPrefetch = new FtqPrefechBundle
455
456    val bpuInfo = new Bundle {
457      val bpRight = Output(UInt(XLEN.W))
458      val bpWrong = Output(UInt(XLEN.W))
459    }
460
461    val mmioCommitRead = Flipped(new mmioCommitRead)
462
463    // for perf
464    val ControlBTBMissBubble = Output(Bool())
465    val TAGEMissBubble = Output(Bool())
466    val SCMissBubble = Output(Bool())
467    val ITTAGEMissBubble = Output(Bool())
468    val RASMissBubble = Output(Bool())
469  })
470  io.bpuInfo := DontCare
471
472  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
473  dontTouch(topdown_stage)
474  // only driven by clock, not valid-ready
475  topdown_stage := io.fromBpu.resp.bits.topdown_info
476  io.toIfu.req.bits.topdown_info := topdown_stage
477
478  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
479
480  val backendRedirect = Wire(Valid(new BranchPredictionRedirect))
481  val backendRedirectReg = RegNext(backendRedirect)
482
483  val stage2Flush = backendRedirect.valid
484  val backendFlush = stage2Flush || RegNext(stage2Flush)
485  val ifuFlush = Wire(Bool())
486
487  val flush = stage2Flush || RegNext(stage2Flush)
488
489  val allowBpuIn, allowToIfu = WireInit(false.B)
490  val flushToIfu = !allowToIfu
491  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
492  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
493
494  def copyNum = 5
495  val bpuPtr, ifuPtr, ifuWbPtr, commPtr, robCommPtr = RegInit(FtqPtr(false.B, 0.U))
496  val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
497  val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U))
498  val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
499  val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
500  val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
501  require(FtqSize >= 4)
502  val ifuPtr_write       = WireInit(ifuPtr)
503  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
504  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
505  val ifuWbPtr_write     = WireInit(ifuWbPtr)
506  val commPtr_write      = WireInit(commPtr)
507  val commPtrPlus1_write = WireInit(commPtrPlus1)
508  val robCommPtr_write   = WireInit(robCommPtr)
509  ifuPtr       := ifuPtr_write
510  ifuPtrPlus1  := ifuPtrPlus1_write
511  ifuPtrPlus2  := ifuPtrPlus2_write
512  ifuWbPtr     := ifuWbPtr_write
513  commPtr      := commPtr_write
514  commPtrPlus1 := commPtrPlus1_write
515  copied_ifu_ptr.map{ptr =>
516    ptr := ifuPtr_write
517    dontTouch(ptr)
518  }
519  robCommPtr   := robCommPtr_write
520  val validEntries = distanceBetween(bpuPtr, commPtr)
521  val canCommit = Wire(Bool())
522
523  // **********************************************************************
524  // **************************** enq from bpu ****************************
525  // **********************************************************************
526  val new_entry_ready = validEntries < FtqSize.U || canCommit
527  io.fromBpu.resp.ready := new_entry_ready
528
529  val bpu_s2_resp = io.fromBpu.resp.bits.s2
530  val bpu_s3_resp = io.fromBpu.resp.bits.s3
531  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
532  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
533
534  io.toBpu.enq_ptr := bpuPtr
535  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
536  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
537
538  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
539  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdx
540  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
541  val bpu_in_resp_idx = bpu_in_resp_ptr.value
542
543  // read ports:      prefetchReq ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
544  val ftq_pc_mem = Module(new FtqPcMemWrapper(1))
545  // resp from uBTB
546  ftq_pc_mem.io.wen := bpu_in_fire
547  ftq_pc_mem.io.waddr := bpu_in_resp_idx
548  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
549
550  //                                                            ifuRedirect + backendRedirect + commit
551  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
552  // these info is intended to enq at the last stage of bpu
553  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
554  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
555  ftq_redirect_sram.io.wdata := io.fromBpu.resp.bits.last_stage_spec_info
556  println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3")
557  println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3")
558
559  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
560  // these info is intended to enq at the last stage of bpu
561  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
562  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
563  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta
564  //                                                            ifuRedirect + backendRedirect + commit
565  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
566  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
567  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
568  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
569
570
571  // multi-write
572  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
573  val newest_entry_target = Reg(UInt(VAddrBits.W))
574  val newest_entry_ptr = Reg(new FtqPtr)
575  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
576  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
577  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
578
579  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
580  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
581    VecInit(Seq.fill(PredictWidth)(c_invalid))
582  }))
583
584  val f_to_send :: f_sent :: Nil = Enum(2)
585  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
586
587  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
588  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
589
590  // modify registers one cycle later to cut critical path
591  val last_cycle_bpu_in = RegNext(bpu_in_fire)
592  val last_cycle_bpu_in_ptr = RegNext(bpu_in_resp_ptr)
593  val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value
594  val last_cycle_bpu_target = RegNext(bpu_in_resp.getTarget)
595  val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex)
596  val last_cycle_bpu_in_stage = RegNext(bpu_in_stage)
597
598  def extra_copyNum_for_commitStateQueue = 2
599  val copied_last_cycle_bpu_in = VecInit(Seq.fill(copyNum+extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
600  val copied_last_cycle_bpu_in_ptr_for_ftq = VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_resp_ptr)))
601
602  when (last_cycle_bpu_in) {
603    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
604    cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
605    pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
606
607    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
608    newest_entry_target := last_cycle_bpu_target
609    newest_entry_ptr := last_cycle_bpu_in_ptr
610  }
611
612  // reduce fanout by delay write for a cycle
613  when (RegNext(last_cycle_bpu_in)) {
614    mispredict_vec(RegNext(last_cycle_bpu_in_idx)) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
615  }
616
617  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
618  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
619  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
620    case ((in, ptr), i) =>
621      when (in) {
622        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
623        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
624        for (j <- 0 until perSetEntries) {
625          when (ptr.value === (i*perSetEntries+j).U) {
626            commitStateQueue(i*perSetEntries+j) := VecInit(Seq.fill(PredictWidth)(c_invalid))
627          }
628        }
629      }
630  }
631
632  // num cycle is fixed
633  io.toBackend.newest_entry_ptr := RegNext(newest_entry_ptr)
634  io.toBackend.newest_entry_target := RegNext(newest_entry_target)
635
636
637  bpuPtr := bpuPtr + enq_fire
638  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
639  when (io.toIfu.req.fire && allowToIfu) {
640    ifuPtr_write := ifuPtrPlus1
641    ifuPtrPlus1_write := ifuPtrPlus2
642    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
643  }
644
645  // only use ftb result to assign hit status
646  when (bpu_s2_resp.valid) {
647    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred.hit, h_hit, h_not_hit)
648  }
649
650
651  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
652  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
653  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
654    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
655    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
656    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
657    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
658      ifuPtr_write := bpu_s2_resp.ftq_idx
659      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
660      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
661    }
662  }
663
664  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
665  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
666  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
667    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
668    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
669    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
670    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
671      ifuPtr_write := bpu_s3_resp.ftq_idx
672      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
673      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
674    }
675  }
676
677  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
678  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
679
680  (0 until copyNum).map{i =>
681    XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n")
682  }
683
684  // ****************************************************************
685  // **************************** to ifu ****************************
686  // ****************************************************************
687  // 0  for ifu, and 1-4 for ICache
688  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, enable=bpu_in_fire)
689  val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, enable=bpu_in_fire)))
690  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
691  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
692  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
693
694  val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegNext(bpu_in_resp_ptr)))
695  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
696
697  // read pc and target
698  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
699  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
700  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
701  ftq_pc_mem.io.commPtr_w      := commPtr_write
702  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
703
704
705  io.toIfu.req.bits.ftqIdx := ifuPtr
706
707  val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components))
708  val toICacheEntryToSend = Wire(Vec(copyNum,Bool()))
709  val toIfuPcBundle = Wire(new Ftq_RF_Components)
710  val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
711  val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
712  val entry_next_addr  = Wire(UInt(VAddrBits.W))
713
714  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
715  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
716  val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this
717
718  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1))))
719  val copied_ifu_ptr_to_send   = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
720
721  for(i <- 0 until copyNum){
722    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){
723      toICachePcBundle(i) := copied_bpu_in_bypass_buf(i)
724      toICacheEntryToSend(i)   := true.B
725    }.elsewhen(copied_last_cycle_to_ifu_fire(i)){
726      toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i)
727      toICacheEntryToSend(i)   := copied_ifu_plus1_to_send(i)
728    }.otherwise{
729      toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i)
730      toICacheEntryToSend(i)   := copied_ifu_ptr_to_send(i)
731    }
732  }
733
734  // TODO: reconsider target address bypass logic
735  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
736    toIfuPcBundle := bpu_in_bypass_buf_for_ifu
737    entry_is_to_send := true.B
738    entry_next_addr := last_cycle_bpu_target
739    entry_ftq_offset := last_cycle_cfiIndex
740    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
741  }.elsewhen (last_cycle_to_ifu_fire) {
742    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
743    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
744                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
745    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
746                          bpu_in_bypass_buf_for_ifu.startAddr,
747                          Mux(ifuPtr === newest_entry_ptr,
748                            newest_entry_target,
749                            RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2
750  }.otherwise {
751    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
752    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
753                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
754    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
755                          bpu_in_bypass_buf_for_ifu.startAddr,
756                          Mux(ifuPtr === newest_entry_ptr,
757                            newest_entry_target,
758                            RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1
759  }
760
761  io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
762  io.toIfu.req.bits.nextStartAddr := entry_next_addr
763  io.toIfu.req.bits.ftqOffset := entry_ftq_offset
764  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
765
766  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
767  io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)}
768  io.toICache.req.bits.pcMemRead.zipWithIndex.map{case(copy,i) => copy.fromFtqPcBundle(toICachePcBundle(i))}
769  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
770  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
771  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
772  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
773  // }
774
775  // TODO: remove this
776  XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
777          p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n")
778
779  // when fall through is smaller in value than start address, there must be a false hit
780  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
781    when (io.toIfu.req.fire &&
782      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
783      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
784    ) {
785      entry_hit_status(ifuPtr.value) := h_false_hit
786      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
787    }
788    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
789  }
790
791  XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
792    io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr))
793
794  val ifu_req_should_be_flushed =
795    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
796    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
797
798    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
799      entry_fetch_status(ifuPtr.value) := f_sent
800    }
801
802  // *********************************************************************
803  // **************************** wb from ifu ****************************
804  // *********************************************************************
805  val pdWb = io.fromIfu.pdWb
806  val pds = pdWb.bits.pd
807  val ifu_wb_valid = pdWb.valid
808  val ifu_wb_idx = pdWb.bits.ftqIdx.value
809  // read ports:                                                         commit update
810  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
811  ftq_pd_mem.io.wen(0) := ifu_wb_valid
812  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
813  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
814
815  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
816  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
817  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
818  val pd_reg       = RegEnable(pds,             pdWb.valid)
819  val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid)
820  val wb_idx_reg   = RegEnable(ifu_wb_idx,      pdWb.valid)
821
822  when (ifu_wb_valid) {
823    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
824      case (v, inRange) => v && inRange
825    })
826    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
827      case (qe, v) => when (v) { qe := c_valid }
828    }
829  }
830
831  when (ifu_wb_valid) {
832    ifuWbPtr_write := ifuWbPtr + 1.U
833  }
834
835  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
836
837  ftb_entry_mem.io.raddr.head := ifu_wb_idx
838  val has_false_hit = WireInit(false.B)
839  when (RegNext(hit_pd_valid)) {
840    // check for false hit
841    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
842    val brSlots = pred_ftb_entry.brSlots
843    val tailSlot = pred_ftb_entry.tailSlot
844    // we check cfis that bpu predicted
845
846    // bpu predicted branches but denied by predecode
847    val br_false_hit =
848      brSlots.map{
849        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
850      }.reduce(_||_) ||
851      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
852        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
853
854    val jmpOffset = tailSlot.offset
855    val jmp_pd = pd_reg(jmpOffset)
856    val jal_false_hit = pred_ftb_entry.jmpValid &&
857      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
858       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
859       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
860       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
861      )
862
863    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
864    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
865
866    // assert(!has_false_hit)
867  }
868
869  when (has_false_hit) {
870    entry_hit_status(wb_idx_reg) := h_false_hit
871  }
872
873
874  // **********************************************************************
875  // ***************************** to backend *****************************
876  // **********************************************************************
877  // to backend pc mem / target
878  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
879  io.toBackend.pc_mem_waddr := RegNext(last_cycle_bpu_in_idx)
880  io.toBackend.pc_mem_wdata := RegNext(bpu_in_bypass_buf_for_ifu)
881
882  // *******************************************************************************
883  // **************************** redirect from backend ****************************
884  // *******************************************************************************
885
886  // redirect read cfiInfo, couples to redirectGen s2
887  ftq_redirect_sram.io.ren.init.last := backendRedirect.valid
888  ftq_redirect_sram.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
889
890  ftb_entry_mem.io.raddr.init.last := backendRedirect.bits.ftqIdx.value
891
892  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
893  val fromBackendRedirect = WireInit(backendRedirectReg)
894  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
895  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
896
897
898  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
899  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
900
901  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
902  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
903  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(r_ftb_entry.brSlots(0).offset === r_ftqOffset,
904      r_ftb_entry.brSlots(0).sc, r_ftb_entry.tailSlot.sc)
905
906  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
907    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
908      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
909      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
910
911    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
912        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
913  }.otherwise {
914    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
915    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
916  }
917
918
919  // ***************************************************************************
920  // **************************** redirect from ifu ****************************
921  // ***************************************************************************
922  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
923  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
924  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
925  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
926  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
927  fromIfuRedirect.bits.BTBMissBubble := true.B
928  fromIfuRedirect.bits.debugIsMemVio := false.B
929  fromIfuRedirect.bits.debugIsCtrl := false.B
930
931  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
932  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
933  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
934  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
935  ifuRedirectCfiUpdate.target := pdWb.bits.target
936  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
937  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
938
939  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
940  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
941  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
942
943  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
944  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
945
946  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
947
948  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
949  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
950  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
951    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
952  }
953
954  when (ifuRedirectReg.valid) {
955    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
956  } .elsewhen(RegNext(pdWb.valid)) {
957    // if pdWb and no redirect, set to false
958    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
959  }
960
961  // *********************************************************************
962  // **************************** wb from exu ****************************
963  // *********************************************************************
964
965  backendRedirect.valid := io.fromBackend.redirect.valid
966  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
967  backendRedirect.bits.BTBMissBubble := false.B
968
969
970  def extractRedirectInfo(wb: Valid[Redirect]) = {
971    val ftqPtr = wb.bits.ftqIdx
972    val ftqOffset = wb.bits.ftqOffset
973    val taken = wb.bits.cfiUpdate.taken
974    val mispred = wb.bits.cfiUpdate.isMisPred
975    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
976  }
977
978  // fix mispredict entry
979  val lastIsMispredict = RegNext(
980    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
981  )
982
983  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
984    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
985    val r_idx = r_ptr.value
986    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
987    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
988    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
989      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
990    } .elsewhen (r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
991      cfiIndex_vec(r_idx).valid :=false.B
992    }
993    when (cfiIndex_bits_wen) {
994      cfiIndex_vec(r_idx).bits := r_offset
995    }
996    newest_entry_target := redirect.bits.cfiUpdate.target
997    newest_entry_ptr := r_ptr
998    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
999    if (isBackend) {
1000      mispredict_vec(r_idx)(r_offset) := r_mispred
1001    }
1002  }
1003
1004  when(backendRedirectReg.valid) {
1005    updateCfiInfo(backendRedirectReg)
1006  }.elsewhen (ifuRedirectToBpu.valid) {
1007    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
1008  }
1009
1010  when (backendRedirectReg.valid) {
1011    when (backendRedirectReg.bits.ControlRedirectBubble) {
1012      when (fromBackendRedirect.bits.ControlBTBMissBubble) {
1013        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1014        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1015      } .elsewhen (fromBackendRedirect.bits.TAGEMissBubble) {
1016        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1017        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1018      } .elsewhen (fromBackendRedirect.bits.SCMissBubble) {
1019        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
1020        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1021      } .elsewhen (fromBackendRedirect.bits.ITTAGEMissBubble) {
1022        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1023        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1024      } .elsewhen (fromBackendRedirect.bits.RASMissBubble) {
1025        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
1026        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1027      }
1028
1029
1030    } .elsewhen (backendRedirectReg.bits.MemVioRedirectBubble) {
1031      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1032      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1033    } .otherwise {
1034      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1035      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1036    }
1037  } .elsewhen (ifuRedirectReg.valid) {
1038    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1039    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1040  }
1041
1042  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1043  io.TAGEMissBubble := fromBackendRedirect.bits.TAGEMissBubble
1044  io.SCMissBubble := fromBackendRedirect.bits.SCMissBubble
1045  io.ITTAGEMissBubble := fromBackendRedirect.bits.ITTAGEMissBubble
1046  io.RASMissBubble := fromBackendRedirect.bits.RASMissBubble
1047
1048  // ***********************************************************************************
1049  // **************************** flush ptr and state queue ****************************
1050  // ***********************************************************************************
1051
1052  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1053
1054  // when redirect, we should reset ptrs and status queues
1055  when(redirectVec.map(r => r.valid).reduce(_||_)){
1056    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1057    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1058    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1059    val next = idx + 1.U
1060    bpuPtr := next
1061    copied_bpu_ptr.map(_ := next)
1062    ifuPtr_write := next
1063    ifuWbPtr_write := next
1064    ifuPtrPlus1_write := idx + 2.U
1065    ifuPtrPlus2_write := idx + 3.U
1066
1067  }
1068  when(RegNext(redirectVec.map(r => r.valid).reduce(_||_))){
1069    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1070    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1071    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1072    when (RegNext(notIfu)) {
1073      commitStateQueue(RegNext(idx.value)).zipWithIndex.foreach({ case (s, i) =>
1074        when(i.U > RegNext(offset) || i.U === RegNext(offset) && RegNext(flushItSelf)){
1075          s := c_invalid
1076        }
1077      })
1078    }
1079  }
1080
1081
1082  // only the valid bit is actually needed
1083  io.toIfu.redirect.bits    := backendRedirect.bits
1084  io.toIfu.redirect.valid   := stage2Flush
1085  io.toIfu.topdown_redirect := fromBackendRedirect
1086
1087  // commit
1088  for (c <- io.fromBackend.rob_commits) {
1089    when(c.valid) {
1090      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
1091      // TODO: remove this
1092      // For instruction fusions, we also update the next instruction
1093      when (c.bits.commitType === 4.U) {
1094        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
1095      }.elsewhen(c.bits.commitType === 5.U) {
1096        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
1097      }.elsewhen(c.bits.commitType === 6.U) {
1098        val index = (c.bits.ftqIdx + 1.U).value
1099        commitStateQueue(index)(0) := c_commited
1100      }.elsewhen(c.bits.commitType === 7.U) {
1101        val index = (c.bits.ftqIdx + 1.U).value
1102        commitStateQueue(index)(1) := c_commited
1103      }
1104    }
1105  }
1106
1107  robCommPtr_write := Mux(io.fromBackend.rob_commits.map(_.valid).reduce(_ | _), ParallelPriorityMux(io.fromBackend.rob_commits.map(_.valid).reverse, io.fromBackend.rob_commits.map(_.bits.ftqIdx).reverse), robCommPtr)
1108
1109  // ****************************************************************
1110  // **************************** to bpu ****************************
1111  // ****************************************************************
1112
1113  io.toBpu.redirect := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1114
1115  XSError(io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr), "Ftq received a redirect after its commit, check backend or replay")
1116
1117  val may_have_stall_from_bpu = Wire(Bool())
1118  val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1119  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1120  val notInvalidSeq = commitStateQueue(commPtr.value).map(s => s =/= c_invalid).reverse
1121  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1122    (isAfter(robCommPtr, commPtr) || PriorityMuxDefault(notInvalidSeq.zip(commitStateQueue(commPtr.value).reverse), c_invalid) === c_commited)
1123
1124  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1125  val mmioLastCommit = isBefore(commPtr, mmioReadPtr) && (isAfter(ifuPtr,mmioReadPtr)  ||  mmioReadPtr ===   ifuPtr) &&
1126                       Cat(commitStateQueue(mmioReadPtr.value).map(s => { s === c_invalid || s === c_commited})).andR()
1127  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1128
1129  // commit reads
1130  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1131  val commit_target =
1132    Mux(RegNext(commPtr === newest_entry_ptr),
1133      RegNext(newest_entry_target),
1134      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr))
1135  ftq_pd_mem.io.raddr.last := commPtr.value
1136  val commit_pd = ftq_pd_mem.io.rdata.last
1137  ftq_redirect_sram.io.ren.last := canCommit
1138  ftq_redirect_sram.io.raddr.last := commPtr.value
1139  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
1140  ftq_meta_1r_sram.io.ren(0) := canCommit
1141  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1142  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
1143  ftb_entry_mem.io.raddr.last := commPtr.value
1144  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
1145
1146  // need one cycle to read mem and srams
1147  val do_commit_ptr = RegNext(commPtr)
1148  val do_commit = RegNext(canCommit, init=false.B)
1149  when (canCommit) {
1150    commPtr_write := commPtrPlus1
1151    commPtrPlus1_write := commPtrPlus1 + 1.U
1152  }
1153  val commit_state = RegNext(commitStateQueue(commPtr.value))
1154  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1155  val do_commit_cfi = WireInit(cfiIndex_vec(do_commit_ptr.value))
1156  //
1157  //when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1158  //  can_commit_cfi.valid := false.B
1159  //}
1160  val commit_cfi = RegNext(can_commit_cfi)
1161  val debug_cfi = commitStateQueue(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_commited && do_commit_cfi.valid
1162
1163  val commit_mispredict  : Vec[Bool] = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
1164    case (mis, state) => mis && state === c_commited
1165  })
1166  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_commited)) // [PredictWidth]
1167  val can_commit_hit                 = entry_hit_status(commPtr.value)
1168  val commit_hit                     = RegNext(can_commit_hit)
1169  val diff_commit_target             = RegNext(update_target(commPtr.value)) // TODO: remove this
1170  val commit_stage                   = RegNext(pred_stage(commPtr.value))
1171  val commit_valid                   = commit_hit === h_hit || commit_cfi.valid // hit or taken
1172
1173  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1174  switch (bpu_ftb_update_stall) {
1175    is (0.U) {
1176      when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1177        bpu_ftb_update_stall := 2.U // 2-cycle stall
1178      }
1179    }
1180    is (2.U) {
1181      bpu_ftb_update_stall := 1.U
1182    }
1183    is (1.U) {
1184      bpu_ftb_update_stall := 0.U
1185    }
1186    is (3.U) {
1187      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1188    }
1189  }
1190
1191  // TODO: remove this
1192  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1193
1194  io.toBpu.update := DontCare
1195  io.toBpu.update.valid := commit_valid && do_commit
1196  val update = io.toBpu.update.bits
1197  update.false_hit   := commit_hit === h_false_hit
1198  update.pc          := commit_pc_bundle.startAddr
1199  update.meta        := commit_meta.meta
1200  update.cfi_idx     := commit_cfi
1201  update.full_target := commit_target
1202  update.from_stage  := commit_stage
1203  update.spec_info   := commit_spec_meta
1204  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1205
1206  val commit_real_hit = commit_hit === h_hit
1207  val update_ftb_entry = update.ftb_entry
1208
1209  val ftbEntryGen = Module(new FTBEntryGen).io
1210  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1211  ftbEntryGen.old_entry      := commit_ftb_entry
1212  ftbEntryGen.pd             := commit_pd
1213  ftbEntryGen.cfiIndex       := commit_cfi
1214  ftbEntryGen.target         := commit_target
1215  ftbEntryGen.hit            := commit_real_hit
1216  ftbEntryGen.mispredict_vec := commit_mispredict
1217
1218  update_ftb_entry         := ftbEntryGen.new_entry
1219  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1220  update.mispred_mask      := ftbEntryGen.mispred_mask
1221  update.old_entry         := ftbEntryGen.is_old_entry
1222  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1223  update.br_taken_mask     := ftbEntryGen.taken_mask
1224  update.br_committed      := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1225    case (valid, offset) => valid && commit_instCommited(offset)
1226  }
1227  update.jmp_taken         := ftbEntryGen.jmp_taken
1228
1229  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1230  // update.full_pred.jalr_target := commit_target
1231  // update.full_pred.hit := true.B
1232  // when (update.full_pred.is_jalr) {
1233  //   update.full_pred.targets.last := commit_target
1234  // }
1235
1236  // ****************************************************************
1237  // *********************** to prefetch ****************************
1238  // ****************************************************************
1239
1240  ftq_pc_mem.io.other_raddrs(0) := DontCare
1241  if(cacheParams.hasPrefetch){
1242    val prefetchPtr = RegInit(FtqPtr(false.B, 0.U))
1243    val diff_prefetch_addr = WireInit(update_target(prefetchPtr.value)) //TODO: remove this
1244    // TODO : MUST WIDER
1245    prefetchPtr := prefetchPtr + io.toPrefetch.req.fire()
1246
1247    val prefetch_too_late = (isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr)) || (prefetchPtr === ifuPtr)
1248    when(prefetch_too_late){
1249      when(prefetchPtr =/= bpuPtr){
1250        prefetchPtr := bpuPtr - 1.U
1251      }.otherwise{
1252        prefetchPtr := ifuPtr
1253      }
1254    }
1255
1256    ftq_pc_mem.io.other_raddrs(0) := prefetchPtr.value
1257
1258    when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect && !isBefore(prefetchPtr, bpu_s2_resp.ftq_idx)) {
1259      prefetchPtr := bpu_s2_resp.ftq_idx
1260    }
1261
1262    when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect && !isBefore(prefetchPtr, bpu_s3_resp.ftq_idx)) {
1263      prefetchPtr := bpu_s3_resp.ftq_idx
1264      // XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
1265    }
1266
1267
1268    val prefetch_is_to_send = WireInit(entry_fetch_status(prefetchPtr.value) === f_to_send)
1269    val prefetch_addr = Wire(UInt(VAddrBits.W))
1270
1271    when (last_cycle_bpu_in && bpu_in_bypass_ptr === prefetchPtr) {
1272      prefetch_is_to_send := true.B
1273      prefetch_addr := last_cycle_bpu_target
1274      diff_prefetch_addr := last_cycle_bpu_target // TODO: remove this
1275    }.otherwise{
1276      prefetch_addr := RegNext( ftq_pc_mem.io.other_rdatas(0).startAddr)
1277    }
1278    io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && prefetch_is_to_send
1279    io.toPrefetch.req.bits.target := prefetch_addr
1280
1281    when(redirectVec.map(r => r.valid).reduce(_||_)){
1282      val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1283      val next = r.ftqIdx + 1.U
1284      prefetchPtr := next
1285    }
1286
1287    // TODO: remove this
1288    // XSError(io.toPrefetch.req.valid && diff_prefetch_addr =/= prefetch_addr,
1289    //         f"\nprefetch_req_target wrong! prefetchPtr: ${prefetchPtr}, prefetch_addr: ${Hexadecimal(prefetch_addr)} diff_prefetch_addr: ${Hexadecimal(diff_prefetch_addr)}\n")
1290
1291
1292    XSError(isBefore(bpuPtr, prefetchPtr) && !isFull(bpuPtr, prefetchPtr), "\nprefetchPtr is before bpuPtr!\n")
1293//    XSError(isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr), "\nifuPtr is before prefetchPtr!\n")
1294  }
1295  else {
1296    io.toPrefetch.req <> DontCare
1297  }
1298
1299  // ******************************************************************************
1300  // **************************** commit perf counters ****************************
1301  // ******************************************************************************
1302
1303  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
1304  val commit_mispred_mask = commit_mispredict.asUInt
1305  val commit_not_mispred_mask = ~commit_mispred_mask
1306
1307  val commit_br_mask = commit_pd.brMask.asUInt
1308  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1309  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
1310
1311  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1312
1313  val mbpRights = mbpInstrs & commit_not_mispred_mask
1314  val mbpWrongs = mbpInstrs & commit_mispred_mask
1315
1316  io.bpuInfo.bpRight := PopCount(mbpRights)
1317  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1318
1319  val isWriteFTQTable = WireInit(Constantin.createRecord("isWriteFTQTable" + p(XSCoreParamsKey).HartId.toString))
1320  val ftqBranchTraceDB = ChiselDB.createTable("FTQTable" + p(XSCoreParamsKey).HartId.toString, new FtqDebugBundle)
1321  // Cfi Info
1322  for (i <- 0 until PredictWidth) {
1323    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
1324    val v = commit_state(i) === c_commited
1325    val isBr = commit_pd.brMask(i)
1326    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1327    val isCfi = isBr || isJmp
1328    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1329    val misPred = commit_mispredict(i)
1330    // val ghist = commit_spec_meta.ghist.predHist
1331    val histPtr = commit_spec_meta.histPtr
1332    val predCycle = commit_meta.meta(63, 0)
1333    val target = commit_target
1334
1335    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1336    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1337    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1338    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1339    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1340    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1341    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1342
1343    val logbundle = Wire(new FtqDebugBundle)
1344    logbundle.pc := pc
1345    logbundle.target := target
1346    logbundle.isBr := isBr
1347    logbundle.isJmp := isJmp
1348    logbundle.isCall := isJmp && commit_pd.hasCall
1349    logbundle.isRet := isJmp && commit_pd.hasRet
1350    logbundle.misPred := misPred
1351    logbundle.isTaken := isTaken
1352    logbundle.predStage := commit_stage
1353
1354    ftqBranchTraceDB.log(
1355      data = logbundle /* hardware of type T */,
1356      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1357      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1358      clock = clock,
1359      reset = reset
1360    )
1361  }
1362
1363  val enq = io.fromBpu.resp
1364  val perf_redirect = backendRedirect
1365
1366  XSPerfAccumulate("entry", validEntries)
1367  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1368  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1369  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1370  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1371
1372  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1373
1374  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1375  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1376  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1377
1378  val from_bpu = io.fromBpu.resp.bits
1379  def in_entry_len_map_gen(resp: BpuToFtqBundle)(stage: String) = {
1380    val entry_len = (resp.last_stage_ftb_entry.getFallThrough(resp.s3.pc) - resp.s3.pc) >> instOffsetBits
1381    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1382    val entry_len_map = (1 to PredictWidth+1).map(i =>
1383      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.s3.valid)
1384    ).foldLeft(Map[String, UInt]())(_+_)
1385    entry_len_map
1386  }
1387  val s3_entry_len_map = in_entry_len_map_gen(from_bpu)("s3")
1388
1389  val to_ifu = io.toIfu.req.bits
1390
1391
1392
1393  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1394  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1395    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1396  ).foldLeft(Map[String, UInt]())(_+_)
1397
1398
1399
1400  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1401  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1402  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1403  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1404
1405
1406  val mbpBRights = mbpRights & commit_br_mask
1407  val mbpJRights = mbpRights & commit_jal_mask
1408  val mbpIRights = mbpRights & commit_jalr_mask
1409  val mbpCRights = mbpRights & commit_call_mask
1410  val mbpRRights = mbpRights & commit_ret_mask
1411
1412  val mbpBWrongs = mbpWrongs & commit_br_mask
1413  val mbpJWrongs = mbpWrongs & commit_jal_mask
1414  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1415  val mbpCWrongs = mbpWrongs & commit_call_mask
1416  val mbpRWrongs = mbpWrongs & commit_ret_mask
1417
1418  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1419
1420  def pred_stage_map(src: UInt, name: String) = {
1421    (0 until numBpStages).map(i =>
1422      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1423    ).foldLeft(Map[String, UInt]())(_+_)
1424  }
1425
1426  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1427  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1428  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1429  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1430  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1431  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1432
1433  val update_valid = io.toBpu.update.valid
1434  def u(cond: Bool) = update_valid && cond
1435  val ftb_false_hit = u(update.false_hit)
1436  // assert(!ftb_false_hit)
1437  val ftb_hit = u(commit_hit === h_hit)
1438
1439  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1440  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1441  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1442  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1443
1444  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1445
1446  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1447  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1448  val ftb_modified_entry_ifu_redirected = u(ifuRedirected(do_commit_ptr.value))
1449  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1450  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1451  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1452
1453  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1454  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1455  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1456    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1457  ).foldLeft(Map[String, UInt]())(_+_)
1458  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1459    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1460  ).foldLeft(Map[String, UInt]())(_+_)
1461
1462  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1463    f"ftq_has_entry_$i" ->( validEntries === i.U)
1464  ).foldLeft(Map[String, UInt]())(_+_)
1465
1466  val perfCountsMap = Map(
1467    "BpInstr" -> PopCount(mbpInstrs),
1468    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1469    "BpRight"  -> PopCount(mbpRights),
1470    "BpWrong"  -> PopCount(mbpWrongs),
1471    "BpBRight" -> PopCount(mbpBRights),
1472    "BpBWrong" -> PopCount(mbpBWrongs),
1473    "BpJRight" -> PopCount(mbpJRights),
1474    "BpJWrong" -> PopCount(mbpJWrongs),
1475    "BpIRight" -> PopCount(mbpIRights),
1476    "BpIWrong" -> PopCount(mbpIWrongs),
1477    "BpCRight" -> PopCount(mbpCRights),
1478    "BpCWrong" -> PopCount(mbpCWrongs),
1479    "BpRRight" -> PopCount(mbpRRights),
1480    "BpRWrong" -> PopCount(mbpRWrongs),
1481
1482    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1483    "ftb_hit"                      -> PopCount(ftb_hit),
1484    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1485    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1486    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1487    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1488    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1489    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1490    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1491    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1492    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1493    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1494  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++
1495  s3_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1496  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1497  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1498
1499  for((key, value) <- perfCountsMap) {
1500    XSPerfAccumulate(key, value)
1501  }
1502
1503  // --------------------------- Debug --------------------------------
1504  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1505  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1506  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1507  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1508  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1509    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1510  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1511
1512  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1513  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1514  //       case (((valid, pd), ans), taken) =>
1515  //       Mux(valid && pd.isBr,
1516  //         isWrong ^ Mux(ans.hit.asBool,
1517  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1518  //           !taken),
1519  //         !taken),
1520  //       false.B)
1521  //     }
1522  //   }
1523
1524  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1525  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1526  //       case (((valid, pd), ans), taken) =>
1527  //       Mux(valid && pd.isBr,
1528  //         isWrong ^ Mux(ans.hit.asBool,
1529  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1530  //           !taken),
1531  //         !taken),
1532  //       false.B)
1533  //     }
1534  //   }
1535
1536  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1537  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1538  //       case (((valid, pd), ans), taken) =>
1539  //       Mux(valid && pd.isBr,
1540  //         isWrong ^ (ans.taken.asBool === taken),
1541  //       false.B)
1542  //     }
1543  //   }
1544
1545  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1546  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1547  //       case (((valid, pd), ans), taken) =>
1548  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1549  //         isWrong ^ (!taken),
1550  //           false.B)
1551  //     }
1552  //   }
1553
1554  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1555  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1556  //       case (((valid, pd), ans), taken) =>
1557  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1558  //         isWrong ^ (ans.target === commitEntry.target),
1559  //           false.B)
1560  //     }
1561  //   }
1562
1563  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1564  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1565  //   // btb and ubtb pred jal and jalr as well
1566  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1567  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1568  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1569  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1570
1571  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1572  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1573
1574  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1575  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1576
1577  val perfEvents = Seq(
1578    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1579    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1580    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1581    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1582    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1583    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1584    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1585    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1586    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1587    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1588    ("BpRight                ", PopCount(mbpRights)                                                         ),
1589    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1590    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1591    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1592    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1593    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1594    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1595    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1596    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1597    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1598    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1599    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1600    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1601    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1602  )
1603  generatePerfEvent()
1604}
1605