xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 51e45dbbf87325e45ff2af6ca86ed6c7eed04464)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.frontend.icache._
26import xiangshan.backend.CtrlToFtqIO
27import xiangshan.backend.decode.ImmUnion
28import utility.ChiselDB
29
30class FtqDebugBundle extends Bundle {
31  val pc = UInt(39.W)
32  val target = UInt(39.W)
33  val isBr = Bool()
34  val isJmp = Bool()
35  val isCall = Bool()
36  val isRet = Bool()
37  val misPred = Bool()
38  val isTaken = Bool()
39  val predStage = UInt(2.W)
40}
41
42class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
43  p => p(XSCoreParamsKey).FtqSize
44){
45}
46
47object FtqPtr {
48  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
49    val ptr = Wire(new FtqPtr)
50    ptr.flag := f
51    ptr.value := v
52    ptr
53  }
54  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
55    apply(!ptr.flag, ptr.value)
56  }
57}
58
59class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
60
61  val io = IO(new Bundle() {
62    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
63    val ren = Input(Vec(numRead, Bool()))
64    val rdata = Output(Vec(numRead, gen))
65    val waddr = Input(UInt(log2Up(FtqSize).W))
66    val wen = Input(Bool())
67    val wdata = Input(gen)
68  })
69
70  for(i <- 0 until numRead){
71    val sram = Module(new SRAMTemplate(gen, FtqSize))
72    sram.io.r.req.valid := io.ren(i)
73    sram.io.r.req.bits.setIdx := io.raddr(i)
74    io.rdata(i) := sram.io.r.resp.data(0)
75    sram.io.w.req.valid := io.wen
76    sram.io.w.req.bits.setIdx := io.waddr
77    sram.io.w.req.bits.data := VecInit(io.wdata)
78  }
79
80}
81
82class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
83  val startAddr = UInt(VAddrBits.W)
84  val nextLineAddr = UInt(VAddrBits.W)
85  val isNextMask = Vec(PredictWidth, Bool())
86  val fallThruError = Bool()
87  // val carry = Bool()
88  def getPc(offset: UInt) = {
89    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
90    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
91    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
92        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
93  }
94  def fromBranchPrediction(resp: BranchPredictionBundle) = {
95    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
96    this.startAddr := resp.pc(3)
97    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
98    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
99      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
100    ))
101    this.fallThruError := resp.fallThruError(3)
102    this
103  }
104  override def toPrintable: Printable = {
105    p"startAddr:${Hexadecimal(startAddr)}"
106  }
107}
108
109class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
110  val brMask = Vec(PredictWidth, Bool())
111  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
112  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
113  val jalTarget = UInt(VAddrBits.W)
114  val rvcMask = Vec(PredictWidth, Bool())
115  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
116  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
117  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
118  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
119
120  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
121    val pds = pdWb.pd
122    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
123    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
124    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
125                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
126    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
127    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
128    this.jalTarget := pdWb.jalTarget
129  }
130
131  def toPd(offset: UInt) = {
132    require(offset.getWidth == log2Ceil(PredictWidth))
133    val pd = Wire(new PreDecodeInfo)
134    pd.valid := true.B
135    pd.isRVC := rvcMask(offset)
136    val isBr = brMask(offset)
137    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
138    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
139    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
140    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
141    pd
142  }
143}
144
145
146
147class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {}
148
149class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
150  val meta = UInt(MaxMetaLength.W)
151}
152
153class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
154  val target = UInt(VAddrBits.W)
155  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
156}
157
158
159class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
160  val ptr = Output(new FtqPtr)
161  val offset = Output(UInt(log2Ceil(PredictWidth).W))
162  val data = Input(gen)
163  def apply(ptr: FtqPtr, offset: UInt) = {
164    this.ptr := ptr
165    this.offset := offset
166    this.data
167  }
168}
169
170
171class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
172  val redirect = Valid(new BranchPredictionRedirect)
173  val update = Valid(new BranchPredictionUpdate)
174  val enq_ptr = Output(new FtqPtr)
175}
176
177class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
178  val req = Decoupled(new FetchRequestBundle)
179  val redirect = Valid(new BranchPredictionRedirect)
180  val topdown_redirect = Valid(new BranchPredictionRedirect)
181  val flushFromBpu = new Bundle {
182    // when ifu pipeline is not stalled,
183    // a packet from bpu s3 can reach f1 at most
184    val s2 = Valid(new FtqPtr)
185    val s3 = Valid(new FtqPtr)
186    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
187      src.valid && !isAfter(src.bits, idx_to_flush)
188    }
189    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
190    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
191  }
192}
193
194class FtqToICacheIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
195  //NOTE: req.bits must be prepare in T cycle
196  // while req.valid is set true in T + 1 cycle
197  val req = Decoupled(new FtqToICacheRequestBundle)
198}
199
200trait HasBackendRedirectInfo extends HasXSParameter {
201  def numRedirectPcRead = exuParameters.JmpCnt + exuParameters.AluCnt + 1
202  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
203}
204
205class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
206  // write to backend pc mem
207  val pc_mem_wen = Output(Bool())
208  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
209  val pc_mem_wdata = Output(new Ftq_RF_Components)
210  // newest target
211  val newest_entry_target = Output(UInt(VAddrBits.W))
212  val newest_entry_ptr = Output(new FtqPtr)
213}
214
215
216class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
217  val io = IO(new Bundle {
218    val start_addr = Input(UInt(VAddrBits.W))
219    val old_entry = Input(new FTBEntry)
220    val pd = Input(new Ftq_pd_Entry)
221    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
222    val target = Input(UInt(VAddrBits.W))
223    val hit = Input(Bool())
224    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
225
226    val new_entry = Output(new FTBEntry)
227    val new_br_insert_pos = Output(Vec(numBr, Bool()))
228    val taken_mask = Output(Vec(numBr, Bool()))
229    val jmp_taken = Output(Bool())
230    val mispred_mask = Output(Vec(numBr+1, Bool()))
231
232    // for perf counters
233    val is_init_entry = Output(Bool())
234    val is_old_entry = Output(Bool())
235    val is_new_br = Output(Bool())
236    val is_jalr_target_modified = Output(Bool())
237    val is_always_taken_modified = Output(Bool())
238    val is_br_full = Output(Bool())
239  })
240
241  // no mispredictions detected at predecode
242  val hit = io.hit
243  val pd = io.pd
244
245  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
246
247
248  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
249  val entry_has_jmp = pd.jmpInfo.valid
250  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
251  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
252  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
253  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
254  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
255  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
256
257  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
258  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
259
260  def carryPos = log2Ceil(PredictWidth)+instOffsetBits
261  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
262  // if not hit, establish a new entry
263  init_entry.valid := true.B
264  // tag is left for ftb to assign
265
266  // case br
267  val init_br_slot = init_entry.getSlotForBr(0)
268  when (cfi_is_br) {
269    init_br_slot.valid := true.B
270    init_br_slot.offset := io.cfiIndex.bits
271    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
272    init_entry.always_taken(0) := true.B // set to always taken on init
273  }
274
275  // case jmp
276  when (entry_has_jmp) {
277    init_entry.tailSlot.offset := pd.jmpOffset
278    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
279    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
280  }
281
282  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
283  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
284  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B)
285  init_entry.isJalr := new_jmp_is_jalr
286  init_entry.isCall := new_jmp_is_call
287  init_entry.isRet  := new_jmp_is_ret
288  // that means fall thru points to the middle of an inst
289  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset)
290
291  // if hit, check whether a new cfi(only br is possible) is detected
292  val oe = io.old_entry
293  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
294  val br_recorded = br_recorded_vec.asUInt.orR
295  val is_new_br = cfi_is_br && !br_recorded
296  val new_br_offset = io.cfiIndex.bits
297  // vec(i) means new br will be inserted BEFORE old br(i)
298  val allBrSlotsVec = oe.allSlotsForBr
299  val new_br_insert_onehot = VecInit((0 until numBr).map{
300    i => i match {
301      case 0 =>
302        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
303      case idx =>
304        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
305        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
306    }
307  })
308
309  val old_entry_modified = WireInit(io.old_entry)
310  for (i <- 0 until numBr) {
311    val slot = old_entry_modified.allSlotsForBr(i)
312    when (new_br_insert_onehot(i)) {
313      slot.valid := true.B
314      slot.offset := new_br_offset
315      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
316      old_entry_modified.always_taken(i) := true.B
317    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
318      old_entry_modified.always_taken(i) := false.B
319      // all other fields remain unchanged
320    }.otherwise {
321      // case i == 0, remain unchanged
322      if (i != 0) {
323        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
324        when (!noNeedToMoveFromFormerSlot) {
325          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
326          old_entry_modified.always_taken(i) := oe.always_taken(i)
327        }
328      }
329    }
330  }
331
332  // two circumstances:
333  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
334  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
335  //        the previous last br or the new br
336  val may_have_to_replace = oe.noEmptySlotForNewBr
337  val pft_need_to_change = is_new_br && may_have_to_replace
338  // it should either be the given last br or the new br
339  when (pft_need_to_change) {
340    val new_pft_offset =
341      Mux(!new_br_insert_onehot.asUInt.orR,
342        new_br_offset, oe.allSlotsForBr.last.offset)
343
344    // set jmp to invalid
345    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
346    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
347    old_entry_modified.last_may_be_rvi_call := false.B
348    old_entry_modified.isCall := false.B
349    old_entry_modified.isRet := false.B
350    old_entry_modified.isJalr := false.B
351  }
352
353  val old_entry_jmp_target_modified = WireInit(oe)
354  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
355  val old_tail_is_jmp = !oe.tailSlot.sharing
356  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
357  when (jalr_target_modified) {
358    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
359    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
360  }
361
362  val old_entry_always_taken = WireInit(oe)
363  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
364  for (i <- 0 until numBr) {
365    old_entry_always_taken.always_taken(i) :=
366      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
367    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
368  }
369  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
370
371
372
373  val derived_from_old_entry =
374    Mux(is_new_br, old_entry_modified,
375      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
376
377
378  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
379
380  io.new_br_insert_pos := new_br_insert_onehot
381  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
382    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
383  })
384  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
385  for (i <- 0 until numBr) {
386    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
387  }
388  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
389
390  // for perf counters
391  io.is_init_entry := !hit
392  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
393  io.is_new_br := hit && is_new_br
394  io.is_jalr_target_modified := hit && jalr_target_modified
395  io.is_always_taken_modified := hit && always_taken_modified
396  io.is_br_full := hit && is_new_br && may_have_to_replace
397}
398
399class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
400  val io = IO(new Bundle {
401    val ifuPtr_w       = Input(new FtqPtr)
402    val ifuPtrPlus1_w  = Input(new FtqPtr)
403    val ifuPtrPlus2_w  = Input(new FtqPtr)
404    val commPtr_w      = Input(new FtqPtr)
405    val commPtrPlus1_w = Input(new FtqPtr)
406    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
407    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
408    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
409    val commPtr_rdata      = Output(new Ftq_RF_Components)
410    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
411
412    val other_raddrs = Input(Vec(numOtherReads, UInt(log2Ceil(FtqSize).W)))
413    val other_rdatas = Output(Vec(numOtherReads, new Ftq_RF_Components))
414
415    val wen = Input(Bool())
416    val waddr = Input(UInt(log2Ceil(FtqSize).W))
417    val wdata = Input(new Ftq_RF_Components)
418  })
419
420  val num_pc_read = numOtherReads + 5
421  val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize,
422    num_pc_read, 1, "FtqPC"))
423  mem.io.wen(0)   := io.wen
424  mem.io.waddr(0) := io.waddr
425  mem.io.wdata(0) := io.wdata
426
427  // read one cycle ahead for ftq local reads
428  val raddr_vec = VecInit(io.other_raddrs ++
429    Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value, io.commPtrPlus1_w.value, io.commPtr_w.value))
430
431  mem.io.raddr := raddr_vec
432
433  io.other_rdatas       := mem.io.rdata.dropRight(5)
434  io.ifuPtr_rdata       := mem.io.rdata.dropRight(4).last
435  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(3).last
436  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(2).last
437  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
438  io.commPtr_rdata      := mem.io.rdata.last
439}
440
441class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
442  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
443  with HasICacheParameters{
444  val io = IO(new Bundle {
445    val fromBpu = Flipped(new BpuToFtqIO)
446    val fromIfu = Flipped(new IfuToFtqIO)
447    val fromBackend = Flipped(new CtrlToFtqIO)
448
449    val toBpu = new FtqToBpuIO
450    val toIfu = new FtqToIfuIO
451    val toICache = new FtqToICacheIO
452    val toBackend = new FtqToCtrlIO
453
454    val toPrefetch = new FtqPrefechBundle
455
456    val bpuInfo = new Bundle {
457      val bpRight = Output(UInt(XLEN.W))
458      val bpWrong = Output(UInt(XLEN.W))
459    }
460
461    val mmioCommitRead = Flipped(new mmioCommitRead)
462
463    // for perf
464    val ControlBTBMissBubble = Output(Bool())
465    val TAGEMissBubble = Output(Bool())
466    val SCMissBubble = Output(Bool())
467    val ITTAGEMissBubble = Output(Bool())
468    val RASMissBubble = Output(Bool())
469  })
470  io.bpuInfo := DontCare
471
472  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
473  dontTouch(topdown_stage)
474  // only driven by clock, not valid-ready
475  topdown_stage := io.fromBpu.resp.bits.topdown_info
476  io.toIfu.req.bits.topdown_info := topdown_stage
477
478  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
479
480  val backendRedirect = Wire(Valid(new BranchPredictionRedirect))
481  when(io.fromBackend.redirect.valid) {
482    assert(RegNext(io.fromBackend.ftqIdxAhead.map(_.valid).reduce(_|_)))
483    assert(io.fromBackend.ftqIdxSelOH.valid)
484    assert(PopCount(io.fromBackend.ftqIdxSelOH.bits) === 1.U)
485  }
486
487  val stage2Flush = backendRedirect.valid
488  val backendFlush = stage2Flush || RegNext(stage2Flush)
489  val ifuFlush = Wire(Bool())
490
491  val flush = stage2Flush || RegNext(stage2Flush)
492
493  val allowBpuIn, allowToIfu = WireInit(false.B)
494  val flushToIfu = !allowToIfu
495  allowBpuIn := !ifuFlush && !backendRedirect.valid
496  allowToIfu := !ifuFlush && !backendRedirect.valid
497
498  def copyNum = 5
499  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
500  val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
501  val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U))
502  val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
503  val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
504  val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
505  require(FtqSize >= 4)
506  val ifuPtr_write       = WireInit(ifuPtr)
507  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
508  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
509  val ifuWbPtr_write     = WireInit(ifuWbPtr)
510  val commPtr_write      = WireInit(commPtr)
511  val commPtrPlus1_write = WireInit(commPtrPlus1)
512  ifuPtr       := ifuPtr_write
513  ifuPtrPlus1  := ifuPtrPlus1_write
514  ifuPtrPlus2  := ifuPtrPlus2_write
515  ifuWbPtr     := ifuWbPtr_write
516  commPtr      := commPtr_write
517  commPtrPlus1 := commPtrPlus1_write
518  copied_ifu_ptr.map{ptr =>
519    ptr := ifuPtr_write
520    dontTouch(ptr)
521  }
522  val validEntries = distanceBetween(bpuPtr, commPtr)
523  val canCommit = Wire(Bool())
524
525  // **********************************************************************
526  // **************************** enq from bpu ****************************
527  // **********************************************************************
528  val new_entry_ready = validEntries < FtqSize.U || canCommit
529  io.fromBpu.resp.ready := new_entry_ready
530
531  val bpu_s2_resp = io.fromBpu.resp.bits.s2
532  val bpu_s3_resp = io.fromBpu.resp.bits.s3
533  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
534  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
535
536  io.toBpu.enq_ptr := bpuPtr
537  val enq_fire = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
538  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
539
540  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
541  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdxForFtq
542  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
543  val bpu_in_resp_idx = bpu_in_resp_ptr.value
544
545  // read ports:      prefetchReq ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
546  val ftq_pc_mem = Module(new FtqPcMemWrapper(1))
547  // resp from uBTB
548  ftq_pc_mem.io.wen := bpu_in_fire
549  ftq_pc_mem.io.waddr := bpu_in_resp_idx
550  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
551
552  //                                                            ifuRedirect + backendRedirect + commit
553  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+BackendRedirectNum+1))
554  // these info is intended to enq at the last stage of bpu
555  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
556  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
557  ftq_redirect_sram.io.wdata := io.fromBpu.resp.bits.last_stage_spec_info
558  println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3")
559  println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3")
560
561  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
562  // these info is intended to enq at the last stage of bpu
563  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
564  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
565  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta
566  //                                                            ifuRedirect + backendRedirect + commit
567  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+BackendRedirectNum+1, 1))
568  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid(3)
569  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
570  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
571
572
573  // multi-write
574  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
575  val newest_entry_target = Reg(UInt(VAddrBits.W))
576  val newest_entry_ptr = Reg(new FtqPtr)
577  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
578  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
579  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
580  val pred_s1_cycle = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
581
582  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
583  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
584    VecInit(Seq.fill(PredictWidth)(c_invalid))
585  }))
586
587  val f_to_send :: f_sent :: Nil = Enum(2)
588  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
589
590  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
591  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
592
593  // modify registers one cycle later to cut critical path
594  val last_cycle_bpu_in = RegNext(bpu_in_fire)
595  val last_cycle_bpu_in_ptr = RegNext(bpu_in_resp_ptr)
596  val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value
597  val last_cycle_bpu_target = RegNext(bpu_in_resp.getTarget(3))
598  val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex(3))
599  val last_cycle_bpu_in_stage = RegNext(bpu_in_stage)
600
601  def extra_copyNum_for_commitStateQueue = 2
602  val copied_last_cycle_bpu_in = VecInit(Seq.fill(copyNum+extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
603  val copied_last_cycle_bpu_in_ptr_for_ftq = VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_resp_ptr)))
604
605  when (last_cycle_bpu_in) {
606    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
607    cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
608    pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
609
610    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
611    newest_entry_target := last_cycle_bpu_target
612    newest_entry_ptr := last_cycle_bpu_in_ptr
613  }
614
615  // reduce fanout by delay write for a cycle
616  when (RegNext(last_cycle_bpu_in)) {
617    mispredict_vec(RegNext(last_cycle_bpu_in_idx)) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
618  }
619
620  // record s1 pred cycles
621  pred_s1_cycle.map(vec => {
622    when (bpu_in_fire && (bpu_in_stage === BP_S1)) {
623      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
624    }
625  })
626
627  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
628  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
629  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
630    case ((in, ptr), i) =>
631      when (in) {
632        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
633        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
634        for (j <- 0 until perSetEntries) {
635          when (ptr.value === (i*perSetEntries+j).U) {
636            commitStateQueue(i*perSetEntries+j) := VecInit(Seq.fill(PredictWidth)(c_invalid))
637          }
638        }
639      }
640  }
641
642  // num cycle is fixed
643  io.toBackend.newest_entry_ptr := RegNext(newest_entry_ptr)
644  io.toBackend.newest_entry_target := RegNext(newest_entry_target)
645
646
647  bpuPtr := bpuPtr + enq_fire
648  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
649  when (io.toIfu.req.fire && allowToIfu) {
650    ifuPtr_write := ifuPtrPlus1
651    ifuPtrPlus1_write := ifuPtrPlus2
652    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
653  }
654
655  // only use ftb result to assign hit status
656  when (bpu_s2_resp.valid(3)) {
657    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
658  }
659
660
661  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
662  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
663  when (bpu_s2_redirect) {
664    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
665    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
666    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
667    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
668      ifuPtr_write := bpu_s2_resp.ftq_idx
669      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
670      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
671    }
672  }
673
674  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
675  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
676  when (bpu_s3_redirect) {
677    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
678    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
679    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
680    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
681      ifuPtr_write := bpu_s3_resp.ftq_idx
682      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
683      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
684    }
685  }
686
687  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
688  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
689
690  (0 until copyNum).map{i =>
691    XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n")
692  }
693
694  // ****************************************************************
695  // **************************** to ifu ****************************
696  // ****************************************************************
697  // 0  for ifu, and 1-4 for ICache
698  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
699  val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
700  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
701  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
702  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
703
704  val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegNext(bpu_in_resp_ptr)))
705  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
706
707  // read pc and target
708  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
709  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
710  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
711  ftq_pc_mem.io.commPtr_w      := commPtr_write
712  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
713
714
715  io.toIfu.req.bits.ftqIdx := ifuPtr
716
717  val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components))
718  val toICacheEntryToSend = Wire(Vec(copyNum,Bool()))
719  val toIfuPcBundle = Wire(new Ftq_RF_Components)
720  val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
721  val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
722  val entry_next_addr  = Wire(UInt(VAddrBits.W))
723
724  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
725  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
726  val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this
727
728  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1))))
729  val copied_ifu_ptr_to_send   = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
730
731  for(i <- 0 until copyNum){
732    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){
733      toICachePcBundle(i) := copied_bpu_in_bypass_buf(i)
734      toICacheEntryToSend(i)   := true.B
735    }.elsewhen(copied_last_cycle_to_ifu_fire(i)){
736      toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i)
737      toICacheEntryToSend(i)   := copied_ifu_plus1_to_send(i)
738    }.otherwise{
739      toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i)
740      toICacheEntryToSend(i)   := copied_ifu_ptr_to_send(i)
741    }
742  }
743
744  // TODO: reconsider target address bypass logic
745  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
746    toIfuPcBundle := bpu_in_bypass_buf_for_ifu
747    entry_is_to_send := true.B
748    entry_next_addr := last_cycle_bpu_target
749    entry_ftq_offset := last_cycle_cfiIndex
750    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
751  }.elsewhen (last_cycle_to_ifu_fire) {
752    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
753    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
754                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
755    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
756                          bpu_in_bypass_buf_for_ifu.startAddr,
757                          Mux(ifuPtr === newest_entry_ptr,
758                            newest_entry_target,
759                            RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2
760  }.otherwise {
761    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
762    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
763                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
764    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
765                          bpu_in_bypass_buf_for_ifu.startAddr,
766                          Mux(ifuPtr === newest_entry_ptr,
767                            newest_entry_target,
768                            RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1
769  }
770
771  io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
772  io.toIfu.req.bits.nextStartAddr := entry_next_addr
773  io.toIfu.req.bits.ftqOffset := entry_ftq_offset
774  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
775
776  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
777  io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)}
778  io.toICache.req.bits.pcMemRead.zipWithIndex.map{case(copy,i) => copy.fromFtqPcBundle(toICachePcBundle(i))}
779  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
780  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
781  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
782  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
783  // }
784
785  // TODO: remove this
786  XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
787          p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n")
788
789  // when fall through is smaller in value than start address, there must be a false hit
790  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
791    when (io.toIfu.req.fire &&
792      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
793      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
794    ) {
795      entry_hit_status(ifuPtr.value) := h_false_hit
796      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
797    }
798    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
799  }
800
801  XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
802    io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr))
803
804  val ifu_req_should_be_flushed =
805    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
806    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
807
808    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
809      entry_fetch_status(ifuPtr.value) := f_sent
810    }
811
812  // *********************************************************************
813  // **************************** wb from ifu ****************************
814  // *********************************************************************
815  val pdWb = io.fromIfu.pdWb
816  val pds = pdWb.bits.pd
817  val ifu_wb_valid = pdWb.valid
818  val ifu_wb_idx = pdWb.bits.ftqIdx.value
819  // read ports:                                                         commit update
820  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
821  ftq_pd_mem.io.wen(0) := ifu_wb_valid
822  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
823  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
824
825  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
826  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
827  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
828  val pd_reg       = RegEnable(pds,             pdWb.valid)
829  val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid)
830  val wb_idx_reg   = RegEnable(ifu_wb_idx,      pdWb.valid)
831
832  when (ifu_wb_valid) {
833    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
834      case (v, inRange) => v && inRange
835    })
836    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
837      case (qe, v) => when (v) { qe := c_valid }
838    }
839  }
840
841  when (ifu_wb_valid) {
842    ifuWbPtr_write := ifuWbPtr + 1.U
843  }
844
845  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
846
847  ftb_entry_mem.io.raddr.head := ifu_wb_idx
848  val has_false_hit = WireInit(false.B)
849  when (RegNext(hit_pd_valid)) {
850    // check for false hit
851    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
852    val brSlots = pred_ftb_entry.brSlots
853    val tailSlot = pred_ftb_entry.tailSlot
854    // we check cfis that bpu predicted
855
856    // bpu predicted branches but denied by predecode
857    val br_false_hit =
858      brSlots.map{
859        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
860      }.reduce(_||_) ||
861      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
862        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
863
864    val jmpOffset = tailSlot.offset
865    val jmp_pd = pd_reg(jmpOffset)
866    val jal_false_hit = pred_ftb_entry.jmpValid &&
867      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
868       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
869       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
870       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
871      )
872
873    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
874    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
875
876    // assert(!has_false_hit)
877  }
878
879  when (has_false_hit) {
880    entry_hit_status(wb_idx_reg) := h_false_hit
881  }
882
883
884  // **********************************************************************
885  // ***************************** to backend *****************************
886  // **********************************************************************
887  // to backend pc mem / target
888  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
889  io.toBackend.pc_mem_waddr := RegNext(last_cycle_bpu_in_idx)
890  io.toBackend.pc_mem_wdata := RegNext(bpu_in_bypass_buf_for_ifu)
891
892  // *******************************************************************************
893  // **************************** redirect from backend ****************************
894  // *******************************************************************************
895
896  // redirect read cfiInfo, couples to redirectGen s2
897  val ftq_redirect_rdata = Wire(Vec(BackendRedirectNum, new Ftq_Redirect_SRAMEntry))
898  val ftb_redirect_rdata = Wire(Vec(BackendRedirectNum, new FTBEntry))
899  for (i <- 0 until BackendRedirectNum) {
900    ftq_redirect_sram.io.ren(i + 1) := io.fromBackend.ftqIdxAhead(i).valid
901    ftq_redirect_sram.io.raddr(i + 1) := io.fromBackend.ftqIdxAhead(i).bits.value
902    ftb_entry_mem.io.raddr(i + 1)     := io.fromBackend.ftqIdxAhead(i).bits.value
903
904    ftq_redirect_rdata(i) := ftq_redirect_sram.io.rdata(i + 1)
905    ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + 1)
906  }
907  val stage3CfiInfo = Mux1H(io.fromBackend.ftqIdxSelOH.bits, ftq_redirect_rdata)
908  val fromBackendRedirect = WireInit(backendRedirect)
909  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
910  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
911
912
913  val r_ftb_entry = Mux1H(io.fromBackend.ftqIdxSelOH.bits, ftb_redirect_rdata)
914  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
915
916  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
917  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
918  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(r_ftb_entry.brSlots(0).offset === r_ftqOffset,
919      r_ftb_entry.brSlots(0).sc, r_ftb_entry.tailSlot.sc)
920
921  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
922    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
923      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
924      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
925
926    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
927        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
928  }.otherwise {
929    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
930    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
931  }
932
933
934  // ***************************************************************************
935  // **************************** redirect from ifu ****************************
936  // ***************************************************************************
937  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
938  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
939  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
940  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
941  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
942  fromIfuRedirect.bits.BTBMissBubble := true.B
943  fromIfuRedirect.bits.debugIsMemVio := false.B
944  fromIfuRedirect.bits.debugIsCtrl := false.B
945
946  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
947  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
948  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
949  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
950  ifuRedirectCfiUpdate.target := pdWb.bits.target
951  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
952  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
953
954  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
955  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
956  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
957
958  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
959  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
960
961  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
962
963  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
964  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
965  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
966    toBpuCfi.target := toBpuCfi.topAddr
967  }
968
969  when (ifuRedirectReg.valid) {
970    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
971  } .elsewhen(RegNext(pdWb.valid)) {
972    // if pdWb and no redirect, set to false
973    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
974  }
975
976  // *********************************************************************
977  // **************************** wb from exu ****************************
978  // *********************************************************************
979
980  backendRedirect.valid := io.fromBackend.redirect.valid
981  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
982  backendRedirect.bits.BTBMissBubble := false.B
983
984
985  def extractRedirectInfo(wb: Valid[Redirect]) = {
986    val ftqPtr = wb.bits.ftqIdx
987    val ftqOffset = wb.bits.ftqOffset
988    val taken = wb.bits.cfiUpdate.taken
989    val mispred = wb.bits.cfiUpdate.isMisPred
990    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
991  }
992
993  // fix mispredict entry
994  val lastIsMispredict = RegNext(
995    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
996  )
997
998  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
999    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
1000    val r_idx = r_ptr.value
1001    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
1002    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
1003    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
1004      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
1005    } .elsewhen (r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
1006      cfiIndex_vec(r_idx).valid :=false.B
1007    }
1008    when (cfiIndex_bits_wen) {
1009      cfiIndex_vec(r_idx).bits := r_offset
1010    }
1011    newest_entry_target := redirect.bits.cfiUpdate.target
1012    newest_entry_ptr := r_ptr
1013    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1014    if (isBackend) {
1015      mispredict_vec(r_idx)(r_offset) := r_mispred
1016    }
1017  }
1018
1019  when(backendRedirect.valid) {
1020    updateCfiInfo(backendRedirect)
1021  }.elsewhen (ifuRedirectToBpu.valid) {
1022    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
1023  }
1024
1025  when (backendRedirect.valid) {
1026    when (backendRedirect.bits.ControlRedirectBubble) {
1027      when (fromBackendRedirect.bits.ControlBTBMissBubble) {
1028        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1029        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1030      } .elsewhen (fromBackendRedirect.bits.TAGEMissBubble) {
1031        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1032        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1033      } .elsewhen (fromBackendRedirect.bits.SCMissBubble) {
1034        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
1035        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1036      } .elsewhen (fromBackendRedirect.bits.ITTAGEMissBubble) {
1037        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1038        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1039      } .elsewhen (fromBackendRedirect.bits.RASMissBubble) {
1040        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
1041        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1042      }
1043
1044
1045    } .elsewhen (backendRedirect.bits.MemVioRedirectBubble) {
1046      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1047      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1048    } .otherwise {
1049      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1050      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1051    }
1052  } .elsewhen (ifuRedirectReg.valid) {
1053    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1054    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1055  }
1056
1057  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1058  io.TAGEMissBubble := fromBackendRedirect.bits.TAGEMissBubble
1059  io.SCMissBubble := fromBackendRedirect.bits.SCMissBubble
1060  io.ITTAGEMissBubble := fromBackendRedirect.bits.ITTAGEMissBubble
1061  io.RASMissBubble := fromBackendRedirect.bits.RASMissBubble
1062
1063  // ***********************************************************************************
1064  // **************************** flush ptr and state queue ****************************
1065  // ***********************************************************************************
1066
1067  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1068
1069  // when redirect, we should reset ptrs and status queues
1070  when(redirectVec.map(r => r.valid).reduce(_||_)){
1071    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1072    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1073    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1074    val next = idx + 1.U
1075    bpuPtr := next
1076    copied_bpu_ptr.map(_ := next)
1077    ifuPtr_write := next
1078    ifuWbPtr_write := next
1079    ifuPtrPlus1_write := idx + 2.U
1080    ifuPtrPlus2_write := idx + 3.U
1081
1082  }
1083  when(RegNext(redirectVec.map(r => r.valid).reduce(_||_))){
1084    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1085    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1086    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1087    when (RegNext(notIfu)) {
1088      commitStateQueue(RegNext(idx.value)).zipWithIndex.foreach({ case (s, i) =>
1089        when(i.U > RegNext(offset) || i.U === RegNext(offset) && RegNext(flushItSelf)){
1090          s := c_invalid
1091        }
1092      })
1093    }
1094  }
1095
1096
1097  // only the valid bit is actually needed
1098  io.toIfu.redirect.bits    := backendRedirect.bits
1099  io.toIfu.redirect.valid   := stage2Flush
1100  io.toIfu.topdown_redirect := fromBackendRedirect
1101
1102  // commit
1103  for (c <- io.fromBackend.rob_commits) {
1104    when(c.valid) {
1105      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
1106      // TODO: remove this
1107      // For instruction fusions, we also update the next instruction
1108      when (c.bits.commitType === 4.U) {
1109        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
1110      }.elsewhen(c.bits.commitType === 5.U) {
1111        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
1112      }.elsewhen(c.bits.commitType === 6.U) {
1113        val index = (c.bits.ftqIdx + 1.U).value
1114        commitStateQueue(index)(0) := c_commited
1115      }.elsewhen(c.bits.commitType === 7.U) {
1116        val index = (c.bits.ftqIdx + 1.U).value
1117        commitStateQueue(index)(1) := c_commited
1118      }
1119    }
1120  }
1121
1122  // ****************************************************************
1123  // **************************** to bpu ****************************
1124  // ****************************************************************
1125
1126  io.toBpu.redirect := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1127  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_=>0.U(64.W)))
1128  val redirect_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1129  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1130  XSPerfHistogram("ifu_redirect_latency", redirect_latency, !fromBackendRedirect.valid && ifuRedirectToBpu.valid, 0, 60, 1)
1131
1132  XSError(io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr), "Ftq received a redirect after its commit, check backend or replay")
1133
1134  val may_have_stall_from_bpu = Wire(Bool())
1135  val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1136  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1137  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1138    Cat(commitStateQueue(commPtr.value).map(s => {
1139      s === c_invalid || s === c_commited
1140    })).andR
1141
1142  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1143  val mmioLastCommit = isBefore(commPtr, mmioReadPtr) && (isAfter(ifuPtr,mmioReadPtr)  ||  mmioReadPtr ===   ifuPtr) &&
1144                       Cat(commitStateQueue(mmioReadPtr.value).map(s => { s === c_invalid || s === c_commited})).andR
1145  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1146
1147  // commit reads
1148  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1149  val commit_target =
1150    Mux(RegNext(commPtr === newest_entry_ptr),
1151      RegNext(newest_entry_target),
1152      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr))
1153  ftq_pd_mem.io.raddr.last := commPtr.value
1154  val commit_pd = ftq_pd_mem.io.rdata.last
1155  ftq_redirect_sram.io.ren.last := canCommit
1156  ftq_redirect_sram.io.raddr.last := commPtr.value
1157  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
1158  ftq_meta_1r_sram.io.ren(0) := canCommit
1159  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1160  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
1161  ftb_entry_mem.io.raddr.last := commPtr.value
1162  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
1163
1164  // need one cycle to read mem and srams
1165  val do_commit_ptr = RegNext(commPtr)
1166  val do_commit = RegNext(canCommit, init=false.B)
1167  when (canCommit) {
1168    commPtr_write := commPtrPlus1
1169    commPtrPlus1_write := commPtrPlus1 + 1.U
1170  }
1171  val commit_state = RegNext(commitStateQueue(commPtr.value))
1172  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1173  val do_commit_cfi = WireInit(cfiIndex_vec(do_commit_ptr.value))
1174  //
1175  //when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1176  //  can_commit_cfi.valid := false.B
1177  //}
1178  val commit_cfi = RegNext(can_commit_cfi)
1179  val debug_cfi = commitStateQueue(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_commited && do_commit_cfi.valid
1180
1181  val commit_mispredict  : Vec[Bool] = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
1182    case (mis, state) => mis && state === c_commited
1183  })
1184  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_commited)) // [PredictWidth]
1185  val can_commit_hit                 = entry_hit_status(commPtr.value)
1186  val commit_hit                     = RegNext(can_commit_hit)
1187  val diff_commit_target             = RegNext(update_target(commPtr.value)) // TODO: remove this
1188  val commit_stage                   = RegNext(pred_stage(commPtr.value))
1189  val commit_valid                   = commit_hit === h_hit || commit_cfi.valid // hit or taken
1190
1191  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1192  switch (bpu_ftb_update_stall) {
1193    is (0.U) {
1194      when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1195        bpu_ftb_update_stall := 2.U // 2-cycle stall
1196      }
1197    }
1198    is (2.U) {
1199      bpu_ftb_update_stall := 1.U
1200    }
1201    is (1.U) {
1202      bpu_ftb_update_stall := 0.U
1203    }
1204    is (3.U) {
1205      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1206    }
1207  }
1208
1209  // TODO: remove this
1210  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1211
1212  // update latency stats
1213  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1214  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1215
1216  io.toBpu.update := DontCare
1217  io.toBpu.update.valid := commit_valid && do_commit
1218  val update = io.toBpu.update.bits
1219  update.false_hit   := commit_hit === h_false_hit
1220  update.pc          := commit_pc_bundle.startAddr
1221  update.meta        := commit_meta.meta
1222  update.cfi_idx     := commit_cfi
1223  update.full_target := commit_target
1224  update.from_stage  := commit_stage
1225  update.spec_info   := commit_spec_meta
1226  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1227
1228  val commit_real_hit = commit_hit === h_hit
1229  val update_ftb_entry = update.ftb_entry
1230
1231  val ftbEntryGen = Module(new FTBEntryGen).io
1232  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1233  ftbEntryGen.old_entry      := commit_ftb_entry
1234  ftbEntryGen.pd             := commit_pd
1235  ftbEntryGen.cfiIndex       := commit_cfi
1236  ftbEntryGen.target         := commit_target
1237  ftbEntryGen.hit            := commit_real_hit
1238  ftbEntryGen.mispredict_vec := commit_mispredict
1239
1240  update_ftb_entry         := ftbEntryGen.new_entry
1241  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1242  update.mispred_mask      := ftbEntryGen.mispred_mask
1243  update.old_entry         := ftbEntryGen.is_old_entry
1244  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1245  update.br_taken_mask     := ftbEntryGen.taken_mask
1246  update.br_committed      := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1247    case (valid, offset) => valid && commit_instCommited(offset)
1248  }
1249  update.jmp_taken         := ftbEntryGen.jmp_taken
1250
1251  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1252  // update.full_pred.jalr_target := commit_target
1253  // update.full_pred.hit := true.B
1254  // when (update.full_pred.is_jalr) {
1255  //   update.full_pred.targets.last := commit_target
1256  // }
1257
1258  // ****************************************************************
1259  // *********************** to prefetch ****************************
1260  // ****************************************************************
1261
1262  ftq_pc_mem.io.other_raddrs(0) := DontCare
1263  if(cacheParams.enableICachePrefetch){
1264    val prefetchPtr = RegInit(FtqPtr(false.B, 0.U))
1265    val diff_prefetch_addr = WireInit(update_target(prefetchPtr.value)) //TODO: remove this
1266    // TODO : MUST WIDER
1267    prefetchPtr := prefetchPtr + io.toPrefetch.req.fire
1268
1269    val prefetch_too_late = (isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr)) || (prefetchPtr === ifuPtr)
1270    when(prefetch_too_late){
1271      when(prefetchPtr =/= bpuPtr){
1272        prefetchPtr := bpuPtr - 1.U
1273      }.otherwise{
1274        prefetchPtr := ifuPtr
1275      }
1276    }
1277
1278    ftq_pc_mem.io.other_raddrs(0) := prefetchPtr.value
1279
1280    when (bpu_s2_redirect && !isBefore(prefetchPtr, bpu_s2_resp.ftq_idx)) {
1281      prefetchPtr := bpu_s2_resp.ftq_idx
1282    }
1283
1284    when (bpu_s3_redirect && !isBefore(prefetchPtr, bpu_s3_resp.ftq_idx)) {
1285      prefetchPtr := bpu_s3_resp.ftq_idx
1286      // XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
1287    }
1288
1289
1290    val prefetch_is_to_send = WireInit(entry_fetch_status(prefetchPtr.value) === f_to_send)
1291    val prefetch_addr = Wire(UInt(VAddrBits.W))
1292
1293    when (last_cycle_bpu_in && bpu_in_bypass_ptr === prefetchPtr) {
1294      prefetch_is_to_send := true.B
1295      prefetch_addr := last_cycle_bpu_target
1296      diff_prefetch_addr := last_cycle_bpu_target // TODO: remove this
1297    }.otherwise{
1298      prefetch_addr := RegNext( ftq_pc_mem.io.other_rdatas(0).startAddr)
1299    }
1300    io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && prefetch_is_to_send
1301    io.toPrefetch.req.bits.target := prefetch_addr
1302
1303    when(redirectVec.map(r => r.valid).reduce(_||_)){
1304      val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1305      val next = r.ftqIdx + 1.U
1306      prefetchPtr := next
1307    }
1308
1309    // TODO: remove this
1310    // XSError(io.toPrefetch.req.valid && diff_prefetch_addr =/= prefetch_addr,
1311    //         f"\nprefetch_req_target wrong! prefetchPtr: ${prefetchPtr}, prefetch_addr: ${Hexadecimal(prefetch_addr)} diff_prefetch_addr: ${Hexadecimal(diff_prefetch_addr)}\n")
1312
1313
1314    XSError(isBefore(bpuPtr, prefetchPtr) && !isFull(bpuPtr, prefetchPtr), "\nprefetchPtr is before bpuPtr!\n")
1315//    XSError(isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr), "\nifuPtr is before prefetchPtr!\n")
1316  }
1317  else {
1318    io.toPrefetch.req <> DontCare
1319  }
1320
1321  // ******************************************************************************
1322  // **************************** commit perf counters ****************************
1323  // ******************************************************************************
1324
1325  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
1326  val commit_mispred_mask = commit_mispredict.asUInt
1327  val commit_not_mispred_mask = ~commit_mispred_mask
1328
1329  val commit_br_mask = commit_pd.brMask.asUInt
1330  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1331  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
1332
1333  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1334
1335  val mbpRights = mbpInstrs & commit_not_mispred_mask
1336  val mbpWrongs = mbpInstrs & commit_mispred_mask
1337
1338  io.bpuInfo.bpRight := PopCount(mbpRights)
1339  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1340
1341  val isWriteFTQTable = WireInit(Constantin.createRecord("isWriteFTQTable" + p(XSCoreParamsKey).HartId.toString))
1342  val ftqBranchTraceDB = ChiselDB.createTable("FTQTable" + p(XSCoreParamsKey).HartId.toString, new FtqDebugBundle)
1343  // Cfi Info
1344  for (i <- 0 until PredictWidth) {
1345    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
1346    val v = commit_state(i) === c_commited
1347    val isBr = commit_pd.brMask(i)
1348    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1349    val isCfi = isBr || isJmp
1350    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1351    val misPred = commit_mispredict(i)
1352    // val ghist = commit_spec_meta.ghist.predHist
1353    val histPtr = commit_spec_meta.histPtr
1354    val predCycle = commit_meta.meta(63, 0)
1355    val target = commit_target
1356
1357    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1358    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1359    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1360    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1361    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1362    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1363    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1364
1365    val logbundle = Wire(new FtqDebugBundle)
1366    logbundle.pc := pc
1367    logbundle.target := target
1368    logbundle.isBr := isBr
1369    logbundle.isJmp := isJmp
1370    logbundle.isCall := isJmp && commit_pd.hasCall
1371    logbundle.isRet := isJmp && commit_pd.hasRet
1372    logbundle.misPred := misPred
1373    logbundle.isTaken := isTaken
1374    logbundle.predStage := commit_stage
1375
1376    ftqBranchTraceDB.log(
1377      data = logbundle /* hardware of type T */,
1378      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1379      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1380      clock = clock,
1381      reset = reset
1382    )
1383  }
1384
1385  val enq = io.fromBpu.resp
1386  val perf_redirect = backendRedirect
1387
1388  XSPerfAccumulate("entry", validEntries)
1389  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1390  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1391  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1392  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1393
1394  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1395
1396  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1397  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1398  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1399  XSPerfAccumulate("bpu_to_ifu_bubble_when_ftq_full", (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready)
1400
1401  XSPerfAccumulate("redirectAhead_ValidNum", io.fromBackend.ftqIdxAhead.map(_.valid).reduce(_|_))
1402  XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid)
1403  XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid)
1404
1405  val from_bpu = io.fromBpu.resp.bits
1406  val to_ifu = io.toIfu.req.bits
1407
1408
1409  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth+1, 1)
1410
1411
1412
1413
1414  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1415  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1416  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1417  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1418
1419
1420  val mbpBRights = mbpRights & commit_br_mask
1421  val mbpJRights = mbpRights & commit_jal_mask
1422  val mbpIRights = mbpRights & commit_jalr_mask
1423  val mbpCRights = mbpRights & commit_call_mask
1424  val mbpRRights = mbpRights & commit_ret_mask
1425
1426  val mbpBWrongs = mbpWrongs & commit_br_mask
1427  val mbpJWrongs = mbpWrongs & commit_jal_mask
1428  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1429  val mbpCWrongs = mbpWrongs & commit_call_mask
1430  val mbpRWrongs = mbpWrongs & commit_ret_mask
1431
1432  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1433
1434  def pred_stage_map(src: UInt, name: String) = {
1435    (0 until numBpStages).map(i =>
1436      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1437    ).foldLeft(Map[String, UInt]())(_+_)
1438  }
1439
1440  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1441  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1442  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1443  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1444  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1445  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1446
1447  val update_valid = io.toBpu.update.valid
1448  def u(cond: Bool) = update_valid && cond
1449  val ftb_false_hit = u(update.false_hit)
1450  // assert(!ftb_false_hit)
1451  val ftb_hit = u(commit_hit === h_hit)
1452
1453  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1454  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1455  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1456  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1457
1458  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1459
1460  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1461  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1462  val ftb_modified_entry_ifu_redirected = u(ifuRedirected(do_commit_ptr.value))
1463  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1464  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1465  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1466
1467  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1468  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1469  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth+1, 1)
1470  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth+1, 1)
1471  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1472  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth+1, 1)
1473
1474  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize+1, 1)
1475
1476  val perfCountsMap = Map(
1477    "BpInstr" -> PopCount(mbpInstrs),
1478    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1479    "BpRight"  -> PopCount(mbpRights),
1480    "BpWrong"  -> PopCount(mbpWrongs),
1481    "BpBRight" -> PopCount(mbpBRights),
1482    "BpBWrong" -> PopCount(mbpBWrongs),
1483    "BpJRight" -> PopCount(mbpJRights),
1484    "BpJWrong" -> PopCount(mbpJWrongs),
1485    "BpIRight" -> PopCount(mbpIRights),
1486    "BpIWrong" -> PopCount(mbpIWrongs),
1487    "BpCRight" -> PopCount(mbpCRights),
1488    "BpCWrong" -> PopCount(mbpCWrongs),
1489    "BpRRight" -> PopCount(mbpRRights),
1490    "BpRWrong" -> PopCount(mbpRWrongs),
1491
1492    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1493    "ftb_hit"                      -> PopCount(ftb_hit),
1494    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1495    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1496    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1497    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1498    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1499    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1500    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1501    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1502    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1503    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1504  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1505       correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1506
1507  for((key, value) <- perfCountsMap) {
1508    XSPerfAccumulate(key, value)
1509  }
1510
1511  // --------------------------- Debug --------------------------------
1512  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1513  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1514  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1515  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1516  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1517    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1518  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1519
1520  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1521  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1522  //       case (((valid, pd), ans), taken) =>
1523  //       Mux(valid && pd.isBr,
1524  //         isWrong ^ Mux(ans.hit.asBool,
1525  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1526  //           !taken),
1527  //         !taken),
1528  //       false.B)
1529  //     }
1530  //   }
1531
1532  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1533  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1534  //       case (((valid, pd), ans), taken) =>
1535  //       Mux(valid && pd.isBr,
1536  //         isWrong ^ Mux(ans.hit.asBool,
1537  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1538  //           !taken),
1539  //         !taken),
1540  //       false.B)
1541  //     }
1542  //   }
1543
1544  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1545  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1546  //       case (((valid, pd), ans), taken) =>
1547  //       Mux(valid && pd.isBr,
1548  //         isWrong ^ (ans.taken.asBool === taken),
1549  //       false.B)
1550  //     }
1551  //   }
1552
1553  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1554  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1555  //       case (((valid, pd), ans), taken) =>
1556  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1557  //         isWrong ^ (!taken),
1558  //           false.B)
1559  //     }
1560  //   }
1561
1562  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1563  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1564  //       case (((valid, pd), ans), taken) =>
1565  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1566  //         isWrong ^ (ans.target === commitEntry.target),
1567  //           false.B)
1568  //     }
1569  //   }
1570
1571  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1572  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1573  //   // btb and ubtb pred jal and jalr as well
1574  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1575  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1576  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1577  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1578
1579  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1580  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1581
1582  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1583  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1584
1585  val perfEvents = Seq(
1586    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1587    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1588    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1589    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1590    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1591    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1592    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1593    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1594    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1595    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1596    ("BpRight                ", PopCount(mbpRights)                                                         ),
1597    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1598    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1599    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1600    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1601    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1602    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1603    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1604    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1605    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1606    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1607    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1608    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1609    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1610  )
1611  generatePerfEvent()
1612}