xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 9672f0b7124446b0dbe8f0a1e831208f22e01305)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.frontend.icache._
26import xiangshan.backend.CtrlToFtqIO
27import xiangshan.backend.decode.ImmUnion
28import utility.ChiselDB
29
30class FtqDebugBundle extends Bundle {
31  val pc = UInt(39.W)
32  val target = UInt(39.W)
33  val isBr = Bool()
34  val isJmp = Bool()
35  val isCall = Bool()
36  val isRet = Bool()
37  val misPred = Bool()
38  val isTaken = Bool()
39  val predStage = UInt(2.W)
40}
41
42class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
43  p => p(XSCoreParamsKey).FtqSize
44){
45}
46
47object FtqPtr {
48  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
49    val ptr = Wire(new FtqPtr)
50    ptr.flag := f
51    ptr.value := v
52    ptr
53  }
54  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
55    apply(!ptr.flag, ptr.value)
56  }
57}
58
59class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
60
61  val io = IO(new Bundle() {
62    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
63    val ren = Input(Vec(numRead, Bool()))
64    val rdata = Output(Vec(numRead, gen))
65    val waddr = Input(UInt(log2Up(FtqSize).W))
66    val wen = Input(Bool())
67    val wdata = Input(gen)
68  })
69
70  for(i <- 0 until numRead){
71    val sram = Module(new SRAMTemplate(gen, FtqSize))
72    sram.io.r.req.valid := io.ren(i)
73    sram.io.r.req.bits.setIdx := io.raddr(i)
74    io.rdata(i) := sram.io.r.resp.data(0)
75    sram.io.w.req.valid := io.wen
76    sram.io.w.req.bits.setIdx := io.waddr
77    sram.io.w.req.bits.data := VecInit(io.wdata)
78  }
79
80}
81
82class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
83  val startAddr = UInt(VAddrBits.W)
84  val nextLineAddr = UInt(VAddrBits.W)
85  val isNextMask = Vec(PredictWidth, Bool())
86  val fallThruError = Bool()
87  // val carry = Bool()
88  def getPc(offset: UInt) = {
89    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
90    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
91    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
92        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
93  }
94  def fromBranchPrediction(resp: BranchPredictionBundle) = {
95    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
96    this.startAddr := resp.pc(3)
97    this.nextLineAddr := resp.pc(3) + (FetchWidth * 4 * 2).U // may be broken on other configs
98    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
99      (resp.pc(3)(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool
100    ))
101    this.fallThruError := resp.fallThruError(3)
102    this
103  }
104  override def toPrintable: Printable = {
105    p"startAddr:${Hexadecimal(startAddr)}"
106  }
107}
108
109class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
110  val brMask = Vec(PredictWidth, Bool())
111  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
112  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
113  val jalTarget = UInt(VAddrBits.W)
114  val rvcMask = Vec(PredictWidth, Bool())
115  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
116  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
117  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
118  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
119
120  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
121    val pds = pdWb.pd
122    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
123    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
124    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
125                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
126    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
127    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
128    this.jalTarget := pdWb.jalTarget
129  }
130
131  def toPd(offset: UInt) = {
132    require(offset.getWidth == log2Ceil(PredictWidth))
133    val pd = Wire(new PreDecodeInfo)
134    pd.valid := true.B
135    pd.isRVC := rvcMask(offset)
136    val isBr = brMask(offset)
137    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
138    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
139    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
140    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
141    pd
142  }
143}
144
145
146
147class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends SpeculativeInfo {
148  val sc_disagree = Vec(numBr, Bool())
149}
150
151class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
152  val meta = UInt(MaxMetaLength.W)
153}
154
155class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
156  val target = UInt(VAddrBits.W)
157  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
158}
159
160
161class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
162  val ptr = Output(new FtqPtr)
163  val offset = Output(UInt(log2Ceil(PredictWidth).W))
164  val data = Input(gen)
165  def apply(ptr: FtqPtr, offset: UInt) = {
166    this.ptr := ptr
167    this.offset := offset
168    this.data
169  }
170}
171
172
173class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
174  val redirect = Valid(new BranchPredictionRedirect)
175  val update = Valid(new BranchPredictionUpdate)
176  val enq_ptr = Output(new FtqPtr)
177}
178
179class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
180  val req = Decoupled(new FetchRequestBundle)
181  val redirect = Valid(new BranchPredictionRedirect)
182  val topdown_redirect = Valid(new BranchPredictionRedirect)
183  val flushFromBpu = new Bundle {
184    // when ifu pipeline is not stalled,
185    // a packet from bpu s3 can reach f1 at most
186    val s2 = Valid(new FtqPtr)
187    val s3 = Valid(new FtqPtr)
188    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
189      src.valid && !isAfter(src.bits, idx_to_flush)
190    }
191    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
192    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
193  }
194}
195
196class FtqToICacheIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
197  //NOTE: req.bits must be prepare in T cycle
198  // while req.valid is set true in T + 1 cycle
199  val req = Decoupled(new FtqToICacheRequestBundle)
200}
201
202trait HasBackendRedirectInfo extends HasXSParameter {
203  def numRedirectPcRead = exuParameters.JmpCnt + exuParameters.AluCnt + 1
204  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
205}
206
207class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
208  // write to backend pc mem
209  val pc_mem_wen = Output(Bool())
210  val pc_mem_waddr = Output(UInt(log2Ceil(FtqSize).W))
211  val pc_mem_wdata = Output(new Ftq_RF_Components)
212  // newest target
213  val newest_entry_target = Output(UInt(VAddrBits.W))
214  val newest_entry_ptr = Output(new FtqPtr)
215}
216
217
218class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
219  val io = IO(new Bundle {
220    val start_addr = Input(UInt(VAddrBits.W))
221    val old_entry = Input(new FTBEntry)
222    val pd = Input(new Ftq_pd_Entry)
223    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
224    val target = Input(UInt(VAddrBits.W))
225    val hit = Input(Bool())
226    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
227
228    val new_entry = Output(new FTBEntry)
229    val new_br_insert_pos = Output(Vec(numBr, Bool()))
230    val taken_mask = Output(Vec(numBr, Bool()))
231    val jmp_taken = Output(Bool())
232    val mispred_mask = Output(Vec(numBr+1, Bool()))
233
234    // for perf counters
235    val is_init_entry = Output(Bool())
236    val is_old_entry = Output(Bool())
237    val is_new_br = Output(Bool())
238    val is_jalr_target_modified = Output(Bool())
239    val is_always_taken_modified = Output(Bool())
240    val is_br_full = Output(Bool())
241  })
242
243  // no mispredictions detected at predecode
244  val hit = io.hit
245  val pd = io.pd
246
247  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
248
249
250  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
251  val entry_has_jmp = pd.jmpInfo.valid
252  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
253  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
254  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
255  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
256  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
257  // val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
258
259  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
260  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
261
262  def carryPos = log2Ceil(PredictWidth)+instOffsetBits
263  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
264  // if not hit, establish a new entry
265  init_entry.valid := true.B
266  // tag is left for ftb to assign
267
268  // case br
269  val init_br_slot = init_entry.getSlotForBr(0)
270  when (cfi_is_br) {
271    init_br_slot.valid := true.B
272    init_br_slot.offset := io.cfiIndex.bits
273    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
274    init_entry.always_taken(0) := true.B // set to always taken on init
275  }
276
277  // case jmp
278  when (entry_has_jmp) {
279    init_entry.tailSlot.offset := pd.jmpOffset
280    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
281    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
282  }
283
284  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
285  init_entry.pftAddr := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft, getLower(io.start_addr))
286  init_entry.carry   := Mux(entry_has_jmp && !last_jmp_rvi, jmpPft(carryPos-instOffsetBits), true.B)
287  init_entry.isJalr := new_jmp_is_jalr
288  init_entry.isCall := new_jmp_is_call
289  init_entry.isRet  := new_jmp_is_ret
290  // that means fall thru points to the middle of an inst
291  init_entry.last_may_be_rvi_call := pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask(pd.jmpOffset)
292
293  // if hit, check whether a new cfi(only br is possible) is detected
294  val oe = io.old_entry
295  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
296  val br_recorded = br_recorded_vec.asUInt.orR
297  val is_new_br = cfi_is_br && !br_recorded
298  val new_br_offset = io.cfiIndex.bits
299  // vec(i) means new br will be inserted BEFORE old br(i)
300  val allBrSlotsVec = oe.allSlotsForBr
301  val new_br_insert_onehot = VecInit((0 until numBr).map{
302    i => i match {
303      case 0 =>
304        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
305      case idx =>
306        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
307        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
308    }
309  })
310
311  val old_entry_modified = WireInit(io.old_entry)
312  for (i <- 0 until numBr) {
313    val slot = old_entry_modified.allSlotsForBr(i)
314    when (new_br_insert_onehot(i)) {
315      slot.valid := true.B
316      slot.offset := new_br_offset
317      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
318      old_entry_modified.always_taken(i) := true.B
319    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
320      old_entry_modified.always_taken(i) := false.B
321      // all other fields remain unchanged
322    }.otherwise {
323      // case i == 0, remain unchanged
324      if (i != 0) {
325        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
326        when (!noNeedToMoveFromFormerSlot) {
327          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
328          old_entry_modified.always_taken(i) := oe.always_taken(i)
329        }
330      }
331    }
332  }
333
334  // two circumstances:
335  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
336  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
337  //        the previous last br or the new br
338  val may_have_to_replace = oe.noEmptySlotForNewBr
339  val pft_need_to_change = is_new_br && may_have_to_replace
340  // it should either be the given last br or the new br
341  when (pft_need_to_change) {
342    val new_pft_offset =
343      Mux(!new_br_insert_onehot.asUInt.orR,
344        new_br_offset, oe.allSlotsForBr.last.offset)
345
346    // set jmp to invalid
347    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
348    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
349    old_entry_modified.last_may_be_rvi_call := false.B
350    old_entry_modified.isCall := false.B
351    old_entry_modified.isRet := false.B
352    old_entry_modified.isJalr := false.B
353  }
354
355  val old_entry_jmp_target_modified = WireInit(oe)
356  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
357  val old_tail_is_jmp = !oe.tailSlot.sharing
358  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
359  when (jalr_target_modified) {
360    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
361    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
362  }
363
364  val old_entry_always_taken = WireInit(oe)
365  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
366  for (i <- 0 until numBr) {
367    old_entry_always_taken.always_taken(i) :=
368      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
369    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
370  }
371  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
372
373
374
375  val derived_from_old_entry =
376    Mux(is_new_br, old_entry_modified,
377      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
378
379
380  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
381
382  io.new_br_insert_pos := new_br_insert_onehot
383  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
384    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
385  })
386  io.jmp_taken := io.new_entry.jmpValid && io.new_entry.tailSlot.offset === io.cfiIndex.bits
387  for (i <- 0 until numBr) {
388    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
389  }
390  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
391
392  // for perf counters
393  io.is_init_entry := !hit
394  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
395  io.is_new_br := hit && is_new_br
396  io.is_jalr_target_modified := hit && jalr_target_modified
397  io.is_always_taken_modified := hit && always_taken_modified
398  io.is_br_full := hit && is_new_br && may_have_to_replace
399}
400
401class FtqPcMemWrapper(numOtherReads: Int)(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo {
402  val io = IO(new Bundle {
403    val ifuPtr_w       = Input(new FtqPtr)
404    val ifuPtrPlus1_w  = Input(new FtqPtr)
405    val ifuPtrPlus2_w  = Input(new FtqPtr)
406    val commPtr_w      = Input(new FtqPtr)
407    val commPtrPlus1_w = Input(new FtqPtr)
408    val ifuPtr_rdata       = Output(new Ftq_RF_Components)
409    val ifuPtrPlus1_rdata  = Output(new Ftq_RF_Components)
410    val ifuPtrPlus2_rdata  = Output(new Ftq_RF_Components)
411    val commPtr_rdata      = Output(new Ftq_RF_Components)
412    val commPtrPlus1_rdata = Output(new Ftq_RF_Components)
413
414    val other_raddrs = Input(Vec(numOtherReads, UInt(log2Ceil(FtqSize).W)))
415    val other_rdatas = Output(Vec(numOtherReads, new Ftq_RF_Components))
416
417    val wen = Input(Bool())
418    val waddr = Input(UInt(log2Ceil(FtqSize).W))
419    val wdata = Input(new Ftq_RF_Components)
420  })
421
422  val num_pc_read = numOtherReads + 5
423  val mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize,
424    num_pc_read, 1, "FtqPC"))
425  mem.io.wen(0)   := io.wen
426  mem.io.waddr(0) := io.waddr
427  mem.io.wdata(0) := io.wdata
428
429  // read one cycle ahead for ftq local reads
430  val raddr_vec = VecInit(io.other_raddrs ++
431    Seq(io.ifuPtr_w.value, io.ifuPtrPlus1_w.value, io.ifuPtrPlus2_w.value, io.commPtrPlus1_w.value, io.commPtr_w.value))
432
433  mem.io.raddr := raddr_vec
434
435  io.other_rdatas       := mem.io.rdata.dropRight(5)
436  io.ifuPtr_rdata       := mem.io.rdata.dropRight(4).last
437  io.ifuPtrPlus1_rdata  := mem.io.rdata.dropRight(3).last
438  io.ifuPtrPlus2_rdata  := mem.io.rdata.dropRight(2).last
439  io.commPtrPlus1_rdata := mem.io.rdata.dropRight(1).last
440  io.commPtr_rdata      := mem.io.rdata.last
441}
442
443class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
444  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
445  with HasICacheParameters{
446  val io = IO(new Bundle {
447    val fromBpu = Flipped(new BpuToFtqIO)
448    val fromIfu = Flipped(new IfuToFtqIO)
449    val fromBackend = Flipped(new CtrlToFtqIO)
450
451    val toBpu = new FtqToBpuIO
452    val toIfu = new FtqToIfuIO
453    val toICache = new FtqToICacheIO
454    val toBackend = new FtqToCtrlIO
455
456    val toPrefetch = new FtqPrefechBundle
457
458    val bpuInfo = new Bundle {
459      val bpRight = Output(UInt(XLEN.W))
460      val bpWrong = Output(UInt(XLEN.W))
461    }
462
463    val mmioCommitRead = Flipped(new mmioCommitRead)
464
465    // for perf
466    val ControlBTBMissBubble = Output(Bool())
467    val TAGEMissBubble = Output(Bool())
468    val SCMissBubble = Output(Bool())
469    val ITTAGEMissBubble = Output(Bool())
470    val RASMissBubble = Output(Bool())
471  })
472  io.bpuInfo := DontCare
473
474  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
475  dontTouch(topdown_stage)
476  // only driven by clock, not valid-ready
477  topdown_stage := io.fromBpu.resp.bits.topdown_info
478  io.toIfu.req.bits.topdown_info := topdown_stage
479
480  val ifuRedirected = RegInit(VecInit(Seq.fill(FtqSize)(false.B)))
481
482  val backendRedirect = Wire(Valid(new BranchPredictionRedirect))
483  when(io.fromBackend.redirect.valid) {
484    assert(RegNext(io.fromBackend.ftqIdxAhead.map(_.valid).reduce(_|_)))
485    assert(io.fromBackend.ftqIdxSelOH.valid)
486    assert(PopCount(io.fromBackend.ftqIdxSelOH.bits) === 1.U)
487  }
488
489  val stage2Flush = backendRedirect.valid
490  val backendFlush = stage2Flush || RegNext(stage2Flush)
491  val ifuFlush = Wire(Bool())
492
493  val flush = stage2Flush || RegNext(stage2Flush)
494
495  val allowBpuIn, allowToIfu = WireInit(false.B)
496  val flushToIfu = !allowToIfu
497  allowBpuIn := !ifuFlush && !backendRedirect.valid
498  allowToIfu := !ifuFlush && !backendRedirect.valid
499
500  def copyNum = 5
501  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
502  val ifuPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
503  val ifuPtrPlus2 = RegInit(FtqPtr(false.B, 2.U))
504  val commPtrPlus1 = RegInit(FtqPtr(false.B, 1.U))
505  val copied_ifu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
506  val copied_bpu_ptr = Seq.fill(copyNum)(RegInit(FtqPtr(false.B, 0.U)))
507  require(FtqSize >= 4)
508  val ifuPtr_write       = WireInit(ifuPtr)
509  val ifuPtrPlus1_write  = WireInit(ifuPtrPlus1)
510  val ifuPtrPlus2_write  = WireInit(ifuPtrPlus2)
511  val ifuWbPtr_write     = WireInit(ifuWbPtr)
512  val commPtr_write      = WireInit(commPtr)
513  val commPtrPlus1_write = WireInit(commPtrPlus1)
514  ifuPtr       := ifuPtr_write
515  ifuPtrPlus1  := ifuPtrPlus1_write
516  ifuPtrPlus2  := ifuPtrPlus2_write
517  ifuWbPtr     := ifuWbPtr_write
518  commPtr      := commPtr_write
519  commPtrPlus1 := commPtrPlus1_write
520  copied_ifu_ptr.map{ptr =>
521    ptr := ifuPtr_write
522    dontTouch(ptr)
523  }
524  val validEntries = distanceBetween(bpuPtr, commPtr)
525  val canCommit = Wire(Bool())
526
527  // **********************************************************************
528  // **************************** enq from bpu ****************************
529  // **********************************************************************
530  val new_entry_ready = validEntries < FtqSize.U || canCommit
531  io.fromBpu.resp.ready := new_entry_ready
532
533  val bpu_s2_resp = io.fromBpu.resp.bits.s2
534  val bpu_s3_resp = io.fromBpu.resp.bits.s3
535  val bpu_s2_redirect = bpu_s2_resp.valid(3) && bpu_s2_resp.hasRedirect(3)
536  val bpu_s3_redirect = bpu_s3_resp.valid(3) && bpu_s3_resp.hasRedirect(3)
537
538  io.toBpu.enq_ptr := bpuPtr
539  val enq_fire = io.fromBpu.resp.fire && allowBpuIn // from bpu s1
540  val bpu_in_fire = (io.fromBpu.resp.fire || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
541
542  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
543  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdxForFtq
544  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
545  val bpu_in_resp_idx = bpu_in_resp_ptr.value
546
547  // read ports:      prefetchReq ++  ifuReq1 + ifuReq2 + ifuReq3 + commitUpdate2 + commitUpdate
548  val ftq_pc_mem = Module(new FtqPcMemWrapper(1))
549  // resp from uBTB
550  ftq_pc_mem.io.wen := bpu_in_fire
551  ftq_pc_mem.io.waddr := bpu_in_resp_idx
552  ftq_pc_mem.io.wdata.fromBranchPrediction(bpu_in_resp)
553
554  //                                                            ifuRedirect + backendRedirect + commit
555  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+BackendRedirectNum+1))
556  // these info is intended to enq at the last stage of bpu
557  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
558  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
559  ftq_redirect_sram.io.wdata := io.fromBpu.resp.bits.last_stage_spec_info
560  println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3")
561  println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3")
562
563  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
564  // these info is intended to enq at the last stage of bpu
565  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid(3)
566  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
567  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.last_stage_meta
568  //                                                            ifuRedirect + backendRedirect + commit
569  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+BackendRedirectNum+1, 1))
570  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid(3)
571  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
572  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.last_stage_ftb_entry
573
574
575  // multi-write
576  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough //TODO: remove this
577  val newest_entry_target = Reg(UInt(VAddrBits.W))
578  val newest_entry_ptr = Reg(new FtqPtr)
579  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
580  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
581  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
582  val pred_s1_cycle = if (!env.FPGAPlatform) Some(Reg(Vec(FtqSize, UInt(64.W)))) else None
583
584  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
585  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
586    VecInit(Seq.fill(PredictWidth)(c_invalid))
587  }))
588
589  val f_to_send :: f_sent :: Nil = Enum(2)
590  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
591
592  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
593  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
594
595  // modify registers one cycle later to cut critical path
596  val last_cycle_bpu_in = RegNext(bpu_in_fire)
597  val last_cycle_bpu_in_ptr = RegNext(bpu_in_resp_ptr)
598  val last_cycle_bpu_in_idx = last_cycle_bpu_in_ptr.value
599  val last_cycle_bpu_target = RegNext(bpu_in_resp.getTarget(3))
600  val last_cycle_cfiIndex = RegNext(bpu_in_resp.cfiIndex(3))
601  val last_cycle_bpu_in_stage = RegNext(bpu_in_stage)
602
603  def extra_copyNum_for_commitStateQueue = 2
604  val copied_last_cycle_bpu_in = VecInit(Seq.fill(copyNum+extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_fire)))
605  val copied_last_cycle_bpu_in_ptr_for_ftq = VecInit(Seq.fill(extra_copyNum_for_commitStateQueue)(RegNext(bpu_in_resp_ptr)))
606
607  when (last_cycle_bpu_in) {
608    entry_fetch_status(last_cycle_bpu_in_idx) := f_to_send
609    cfiIndex_vec(last_cycle_bpu_in_idx) := last_cycle_cfiIndex
610    pred_stage(last_cycle_bpu_in_idx) := last_cycle_bpu_in_stage
611
612    update_target(last_cycle_bpu_in_idx) := last_cycle_bpu_target // TODO: remove this
613    newest_entry_target := last_cycle_bpu_target
614    newest_entry_ptr := last_cycle_bpu_in_ptr
615  }
616
617  // reduce fanout by delay write for a cycle
618  when (RegNext(last_cycle_bpu_in)) {
619    mispredict_vec(RegNext(last_cycle_bpu_in_idx)) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
620  }
621
622  // record s1 pred cycles
623  pred_s1_cycle.map(vec => {
624    when (bpu_in_fire && (bpu_in_stage === BP_S1)) {
625      vec(bpu_in_resp_ptr.value) := bpu_in_resp.full_pred(0).predCycle.getOrElse(0.U)
626    }
627  })
628
629  // reduce fanout using copied last_cycle_bpu_in and copied last_cycle_bpu_in_ptr
630  val copied_last_cycle_bpu_in_for_ftq = copied_last_cycle_bpu_in.takeRight(extra_copyNum_for_commitStateQueue)
631  copied_last_cycle_bpu_in_for_ftq.zip(copied_last_cycle_bpu_in_ptr_for_ftq).zipWithIndex.map {
632    case ((in, ptr), i) =>
633      when (in) {
634        val perSetEntries = FtqSize / extra_copyNum_for_commitStateQueue // 32
635        require(FtqSize % extra_copyNum_for_commitStateQueue == 0)
636        for (j <- 0 until perSetEntries) {
637          when (ptr.value === (i*perSetEntries+j).U) {
638            commitStateQueue(i*perSetEntries+j) := VecInit(Seq.fill(PredictWidth)(c_invalid))
639          }
640        }
641      }
642  }
643
644  // num cycle is fixed
645  io.toBackend.newest_entry_ptr := RegNext(newest_entry_ptr)
646  io.toBackend.newest_entry_target := RegNext(newest_entry_target)
647
648
649  bpuPtr := bpuPtr + enq_fire
650  copied_bpu_ptr.map(_ := bpuPtr + enq_fire)
651  when (io.toIfu.req.fire && allowToIfu) {
652    ifuPtr_write := ifuPtrPlus1
653    ifuPtrPlus1_write := ifuPtrPlus2
654    ifuPtrPlus2_write := ifuPtrPlus2 + 1.U
655  }
656
657  // only use ftb result to assign hit status
658  when (bpu_s2_resp.valid(3)) {
659    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred(3).hit, h_hit, h_not_hit)
660  }
661
662
663  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
664  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
665  when (bpu_s2_redirect) {
666    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
667    copied_bpu_ptr.map(_ := bpu_s2_resp.ftq_idx + 1.U)
668    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
669    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
670      ifuPtr_write := bpu_s2_resp.ftq_idx
671      ifuPtrPlus1_write := bpu_s2_resp.ftq_idx + 1.U
672      ifuPtrPlus2_write := bpu_s2_resp.ftq_idx + 2.U
673    }
674  }
675
676  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
677  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
678  when (bpu_s3_redirect) {
679    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
680    copied_bpu_ptr.map(_ := bpu_s3_resp.ftq_idx + 1.U)
681    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
682    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
683      ifuPtr_write := bpu_s3_resp.ftq_idx
684      ifuPtrPlus1_write := bpu_s3_resp.ftq_idx + 1.U
685      ifuPtrPlus2_write := bpu_s3_resp.ftq_idx + 2.U
686    }
687  }
688
689  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
690  XSError(isBefore(ifuWbPtr, commPtr) && !isFull(ifuWbPtr, commPtr), "\ncommPtr is before ifuWbPtr!\n")
691
692  (0 until copyNum).map{i =>
693    XSError(copied_bpu_ptr(i) =/= bpuPtr, "\ncopiedBpuPtr is different from bpuPtr!\n")
694  }
695
696  // ****************************************************************
697  // **************************** to ifu ****************************
698  // ****************************************************************
699  // 0  for ifu, and 1-4 for ICache
700  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)
701  val copied_bpu_in_bypass_buf = VecInit(Seq.fill(copyNum)(RegEnable(ftq_pc_mem.io.wdata, bpu_in_fire)))
702  val bpu_in_bypass_buf_for_ifu = bpu_in_bypass_buf
703  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
704  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
705
706  val copied_bpu_in_bypass_ptr = VecInit(Seq.fill(copyNum)(RegNext(bpu_in_resp_ptr)))
707  val copied_last_cycle_to_ifu_fire = VecInit(Seq.fill(copyNum)(RegNext(io.toIfu.req.fire)))
708
709  // read pc and target
710  ftq_pc_mem.io.ifuPtr_w       := ifuPtr_write
711  ftq_pc_mem.io.ifuPtrPlus1_w  := ifuPtrPlus1_write
712  ftq_pc_mem.io.ifuPtrPlus2_w  := ifuPtrPlus2_write
713  ftq_pc_mem.io.commPtr_w      := commPtr_write
714  ftq_pc_mem.io.commPtrPlus1_w := commPtrPlus1_write
715
716
717  io.toIfu.req.bits.ftqIdx := ifuPtr
718
719  val toICachePcBundle = Wire(Vec(copyNum,new Ftq_RF_Components))
720  val toICacheEntryToSend = Wire(Vec(copyNum,Bool()))
721  val toIfuPcBundle = Wire(new Ftq_RF_Components)
722  val entry_is_to_send = WireInit(entry_fetch_status(ifuPtr.value) === f_to_send)
723  val entry_ftq_offset = WireInit(cfiIndex_vec(ifuPtr.value))
724  val entry_next_addr  = Wire(UInt(VAddrBits.W))
725
726  val pc_mem_ifu_ptr_rdata   = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtr_rdata)))
727  val pc_mem_ifu_plus1_rdata = VecInit(Seq.fill(copyNum)(RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)))
728  val diff_entry_next_addr = WireInit(update_target(ifuPtr.value)) //TODO: remove this
729
730  val copied_ifu_plus1_to_send = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1))))
731  val copied_ifu_ptr_to_send   = VecInit(Seq.fill(copyNum)(RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) || RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr)))
732
733  for(i <- 0 until copyNum){
734    when(copied_last_cycle_bpu_in(i) && copied_bpu_in_bypass_ptr(i) === copied_ifu_ptr(i)){
735      toICachePcBundle(i) := copied_bpu_in_bypass_buf(i)
736      toICacheEntryToSend(i)   := true.B
737    }.elsewhen(copied_last_cycle_to_ifu_fire(i)){
738      toICachePcBundle(i) := pc_mem_ifu_plus1_rdata(i)
739      toICacheEntryToSend(i)   := copied_ifu_plus1_to_send(i)
740    }.otherwise{
741      toICachePcBundle(i) := pc_mem_ifu_ptr_rdata(i)
742      toICacheEntryToSend(i)   := copied_ifu_ptr_to_send(i)
743    }
744  }
745
746  // TODO: reconsider target address bypass logic
747  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
748    toIfuPcBundle := bpu_in_bypass_buf_for_ifu
749    entry_is_to_send := true.B
750    entry_next_addr := last_cycle_bpu_target
751    entry_ftq_offset := last_cycle_cfiIndex
752    diff_entry_next_addr := last_cycle_bpu_target // TODO: remove this
753  }.elsewhen (last_cycle_to_ifu_fire) {
754    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata)
755    entry_is_to_send := RegNext(entry_fetch_status(ifuPtrPlus1.value) === f_to_send) ||
756                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1)) // reduce potential bubbles
757    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
758                          bpu_in_bypass_buf_for_ifu.startAddr,
759                          Mux(ifuPtr === newest_entry_ptr,
760                            newest_entry_target,
761                            RegNext(ftq_pc_mem.io.ifuPtrPlus2_rdata.startAddr))) // ifuPtr+2
762  }.otherwise {
763    toIfuPcBundle := RegNext(ftq_pc_mem.io.ifuPtr_rdata)
764    entry_is_to_send := RegNext(entry_fetch_status(ifuPtr.value) === f_to_send) ||
765                        RegNext(last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) // reduce potential bubbles
766    entry_next_addr := Mux(last_cycle_bpu_in && bpu_in_bypass_ptr === (ifuPtrPlus1),
767                          bpu_in_bypass_buf_for_ifu.startAddr,
768                          Mux(ifuPtr === newest_entry_ptr,
769                            newest_entry_target,
770                            RegNext(ftq_pc_mem.io.ifuPtrPlus1_rdata.startAddr))) // ifuPtr+1
771  }
772
773  io.toIfu.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
774  io.toIfu.req.bits.nextStartAddr := entry_next_addr
775  io.toIfu.req.bits.ftqOffset := entry_ftq_offset
776  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
777
778  io.toICache.req.valid := entry_is_to_send && ifuPtr =/= bpuPtr
779  io.toICache.req.bits.readValid.zipWithIndex.map{case(copy, i) => copy := toICacheEntryToSend(i) && copied_ifu_ptr(i) =/= copied_bpu_ptr(i)}
780  io.toICache.req.bits.pcMemRead.zipWithIndex.map{case(copy,i) => copy.fromFtqPcBundle(toICachePcBundle(i))}
781  // io.toICache.req.bits.bypassSelect := last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr
782  // io.toICache.req.bits.bpuBypassWrite.zipWithIndex.map{case(bypassWrtie, i) =>
783  //   bypassWrtie.startAddr := bpu_in_bypass_buf.tail(i).startAddr
784  //   bypassWrtie.nextlineStart := bpu_in_bypass_buf.tail(i).nextLineAddr
785  // }
786
787  // TODO: remove this
788  XSError(io.toIfu.req.valid && diff_entry_next_addr =/= entry_next_addr,
789          p"\nifu_req_target wrong! ifuPtr: ${ifuPtr}, entry_next_addr: ${Hexadecimal(entry_next_addr)} diff_entry_next_addr: ${Hexadecimal(diff_entry_next_addr)}\n")
790
791  // when fall through is smaller in value than start address, there must be a false hit
792  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
793    when (io.toIfu.req.fire &&
794      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
795      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
796    ) {
797      entry_hit_status(ifuPtr.value) := h_false_hit
798      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
799    }
800    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
801  }
802
803  XSPerfAccumulate(f"fall_through_error_to_ifu", toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit &&
804    io.toIfu.req.fire && !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) && !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr))
805
806  val ifu_req_should_be_flushed =
807    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
808    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
809
810    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
811      entry_fetch_status(ifuPtr.value) := f_sent
812    }
813
814  // *********************************************************************
815  // **************************** wb from ifu ****************************
816  // *********************************************************************
817  val pdWb = io.fromIfu.pdWb
818  val pds = pdWb.bits.pd
819  val ifu_wb_valid = pdWb.valid
820  val ifu_wb_idx = pdWb.bits.ftqIdx.value
821  // read ports:                                                         commit update
822  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
823  ftq_pd_mem.io.wen(0) := ifu_wb_valid
824  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
825  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
826
827  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
828  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
829  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
830  val pd_reg       = RegEnable(pds,             pdWb.valid)
831  val start_pc_reg = RegEnable(pdWb.bits.pc(0), pdWb.valid)
832  val wb_idx_reg   = RegEnable(ifu_wb_idx,      pdWb.valid)
833
834  when (ifu_wb_valid) {
835    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
836      case (v, inRange) => v && inRange
837    })
838    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
839      case (qe, v) => when (v) { qe := c_valid }
840    }
841  }
842
843  when (ifu_wb_valid) {
844    ifuWbPtr_write := ifuWbPtr + 1.U
845  }
846
847  XSError(ifu_wb_valid && isAfter(pdWb.bits.ftqIdx, ifuPtr), "IFU returned a predecode before its req, check IFU")
848
849  ftb_entry_mem.io.raddr.head := ifu_wb_idx
850  val has_false_hit = WireInit(false.B)
851  when (RegNext(hit_pd_valid)) {
852    // check for false hit
853    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
854    val brSlots = pred_ftb_entry.brSlots
855    val tailSlot = pred_ftb_entry.tailSlot
856    // we check cfis that bpu predicted
857
858    // bpu predicted branches but denied by predecode
859    val br_false_hit =
860      brSlots.map{
861        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
862      }.reduce(_||_) ||
863      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
864        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
865
866    val jmpOffset = tailSlot.offset
867    val jmp_pd = pd_reg(jmpOffset)
868    val jal_false_hit = pred_ftb_entry.jmpValid &&
869      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
870       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
871       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
872       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
873      )
874
875    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
876    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
877
878    // assert(!has_false_hit)
879  }
880
881  when (has_false_hit) {
882    entry_hit_status(wb_idx_reg) := h_false_hit
883  }
884
885
886  // **********************************************************************
887  // ***************************** to backend *****************************
888  // **********************************************************************
889  // to backend pc mem / target
890  io.toBackend.pc_mem_wen   := RegNext(last_cycle_bpu_in)
891  io.toBackend.pc_mem_waddr := RegNext(last_cycle_bpu_in_idx)
892  io.toBackend.pc_mem_wdata := RegNext(bpu_in_bypass_buf_for_ifu)
893
894  // *******************************************************************************
895  // **************************** redirect from backend ****************************
896  // *******************************************************************************
897
898  // redirect read cfiInfo, couples to redirectGen s2
899  val ftq_redirect_rdata = Wire(Vec(BackendRedirectNum, new Ftq_Redirect_SRAMEntry))
900  val ftb_redirect_rdata = Wire(Vec(BackendRedirectNum, new FTBEntry))
901  for (i <- 0 until BackendRedirectNum) {
902    ftq_redirect_sram.io.ren(i + 1) := io.fromBackend.ftqIdxAhead(i).valid
903    ftq_redirect_sram.io.raddr(i + 1) := io.fromBackend.ftqIdxAhead(i).bits.value
904    ftb_entry_mem.io.raddr(i + 1)     := io.fromBackend.ftqIdxAhead(i).bits.value
905
906    ftq_redirect_rdata(i) := ftq_redirect_sram.io.rdata(i + 1)
907    ftb_redirect_rdata(i) := ftb_entry_mem.io.rdata(i + 1)
908  }
909  val stage3CfiInfo = Mux1H(io.fromBackend.ftqIdxSelOH.bits, ftq_redirect_rdata)
910  val fromBackendRedirect = WireInit(backendRedirect)
911  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
912  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
913
914
915  val r_ftb_entry = Mux1H(io.fromBackend.ftqIdxSelOH.bits, ftb_redirect_rdata)
916  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
917
918  backendRedirectCfi.br_hit := r_ftb_entry.brIsSaved(r_ftqOffset)
919  backendRedirectCfi.jr_hit := r_ftb_entry.isJalr && r_ftb_entry.tailSlot.offset === r_ftqOffset
920  // FIXME: not portable
921  backendRedirectCfi.sc_hit := backendRedirectCfi.br_hit && Mux(r_ftb_entry.brSlots(0).offset === r_ftqOffset,
922    stage3CfiInfo.sc_disagree(0), stage3CfiInfo.sc_disagree(1))
923
924  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
925    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
926      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
927      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
928
929    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
930        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
931  }.otherwise {
932    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
933    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
934  }
935
936
937  // ***************************************************************************
938  // **************************** redirect from ifu ****************************
939  // ***************************************************************************
940  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
941  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
942  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
943  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
944  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
945  fromIfuRedirect.bits.BTBMissBubble := true.B
946  fromIfuRedirect.bits.debugIsMemVio := false.B
947  fromIfuRedirect.bits.debugIsCtrl := false.B
948
949  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
950  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
951  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
952  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
953  ifuRedirectCfiUpdate.target := pdWb.bits.target
954  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
955  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
956
957  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new BranchPredictionRedirect)))
958  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
959  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
960
961  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
962  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
963
964  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
965
966  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
967  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
968  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet && ifuRedirectReg.bits.cfiUpdate.pd.valid) {
969    toBpuCfi.target := toBpuCfi.topAddr
970  }
971
972  when (ifuRedirectReg.valid) {
973    ifuRedirected(ifuRedirectReg.bits.ftqIdx.value) := true.B
974  } .elsewhen(RegNext(pdWb.valid)) {
975    // if pdWb and no redirect, set to false
976    ifuRedirected(last_cycle_bpu_in_ptr.value) := false.B
977  }
978
979  // *********************************************************************
980  // **************************** wb from exu ****************************
981  // *********************************************************************
982
983  backendRedirect.valid := io.fromBackend.redirect.valid
984  backendRedirect.bits.connectRedirect(io.fromBackend.redirect.bits)
985  backendRedirect.bits.BTBMissBubble := false.B
986
987
988  def extractRedirectInfo(wb: Valid[Redirect]) = {
989    val ftqPtr = wb.bits.ftqIdx
990    val ftqOffset = wb.bits.ftqOffset
991    val taken = wb.bits.cfiUpdate.taken
992    val mispred = wb.bits.cfiUpdate.isMisPred
993    (wb.valid, ftqPtr, ftqOffset, taken, mispred)
994  }
995
996  // fix mispredict entry
997  val lastIsMispredict = RegNext(
998    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
999  )
1000
1001  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
1002    val (r_valid, r_ptr, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
1003    val r_idx = r_ptr.value
1004    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
1005    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
1006    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
1007      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
1008    } .elsewhen (r_valid && !r_taken && r_offset =/= cfiIndex_vec(r_idx).bits) {
1009      cfiIndex_vec(r_idx).valid :=false.B
1010    }
1011    when (cfiIndex_bits_wen) {
1012      cfiIndex_vec(r_idx).bits := r_offset
1013    }
1014    newest_entry_target := redirect.bits.cfiUpdate.target
1015    newest_entry_ptr := r_ptr
1016    update_target(r_idx) := redirect.bits.cfiUpdate.target // TODO: remove this
1017    if (isBackend) {
1018      mispredict_vec(r_idx)(r_offset) := r_mispred
1019    }
1020  }
1021
1022  when(backendRedirect.valid) {
1023    updateCfiInfo(backendRedirect)
1024  }.elsewhen (ifuRedirectToBpu.valid) {
1025    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
1026  }
1027
1028  when (backendRedirect.valid) {
1029    when (backendRedirect.bits.ControlRedirectBubble) {
1030      when (fromBackendRedirect.bits.ControlBTBMissBubble) {
1031        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1032        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1033      } .elsewhen (fromBackendRedirect.bits.TAGEMissBubble) {
1034        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1035        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
1036      } .elsewhen (fromBackendRedirect.bits.SCMissBubble) {
1037        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
1038        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.SCMissBubble.id) := true.B
1039      } .elsewhen (fromBackendRedirect.bits.ITTAGEMissBubble) {
1040        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1041        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
1042      } .elsewhen (fromBackendRedirect.bits.RASMissBubble) {
1043        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
1044        io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.RASMissBubble.id) := true.B
1045      }
1046
1047
1048    } .elsewhen (backendRedirect.bits.MemVioRedirectBubble) {
1049      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1050      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
1051    } .otherwise {
1052      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1053      io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
1054    }
1055  } .elsewhen (ifuRedirectReg.valid) {
1056    topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1057    io.toIfu.req.bits.topdown_info.reasons(TopDownCounters.BTBMissBubble.id) := true.B
1058  }
1059
1060  io.ControlBTBMissBubble := fromBackendRedirect.bits.ControlBTBMissBubble
1061  io.TAGEMissBubble := fromBackendRedirect.bits.TAGEMissBubble
1062  io.SCMissBubble := fromBackendRedirect.bits.SCMissBubble
1063  io.ITTAGEMissBubble := fromBackendRedirect.bits.ITTAGEMissBubble
1064  io.RASMissBubble := fromBackendRedirect.bits.RASMissBubble
1065
1066  // ***********************************************************************************
1067  // **************************** flush ptr and state queue ****************************
1068  // ***********************************************************************************
1069
1070  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
1071
1072  // when redirect, we should reset ptrs and status queues
1073  when(redirectVec.map(r => r.valid).reduce(_||_)){
1074    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1075    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1076    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1077    val next = idx + 1.U
1078    bpuPtr := next
1079    copied_bpu_ptr.map(_ := next)
1080    ifuPtr_write := next
1081    ifuWbPtr_write := next
1082    ifuPtrPlus1_write := idx + 2.U
1083    ifuPtrPlus2_write := idx + 3.U
1084
1085  }
1086  when(RegNext(redirectVec.map(r => r.valid).reduce(_||_))){
1087    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1088    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
1089    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
1090    when (RegNext(notIfu)) {
1091      commitStateQueue(RegNext(idx.value)).zipWithIndex.foreach({ case (s, i) =>
1092        when(i.U > RegNext(offset) || i.U === RegNext(offset) && RegNext(flushItSelf)){
1093          s := c_invalid
1094        }
1095      })
1096    }
1097  }
1098
1099
1100  // only the valid bit is actually needed
1101  io.toIfu.redirect.bits    := backendRedirect.bits
1102  io.toIfu.redirect.valid   := stage2Flush
1103  io.toIfu.topdown_redirect := fromBackendRedirect
1104
1105  // commit
1106  for (c <- io.fromBackend.rob_commits) {
1107    when(c.valid) {
1108      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
1109      // TODO: remove this
1110      // For instruction fusions, we also update the next instruction
1111      when (c.bits.commitType === 4.U) {
1112        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
1113      }.elsewhen(c.bits.commitType === 5.U) {
1114        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
1115      }.elsewhen(c.bits.commitType === 6.U) {
1116        val index = (c.bits.ftqIdx + 1.U).value
1117        commitStateQueue(index)(0) := c_commited
1118      }.elsewhen(c.bits.commitType === 7.U) {
1119        val index = (c.bits.ftqIdx + 1.U).value
1120        commitStateQueue(index)(1) := c_commited
1121      }
1122    }
1123  }
1124
1125  // ****************************************************************
1126  // **************************** to bpu ****************************
1127  // ****************************************************************
1128
1129  io.toBpu.redirect := Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
1130  val dummy_s1_pred_cycle_vec = VecInit(List.tabulate(FtqSize)(_=>0.U(64.W)))
1131  val redirect_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(io.toBpu.redirect.bits.ftqIdx.value) + 1.U
1132  XSPerfHistogram("backend_redirect_latency", redirect_latency, fromBackendRedirect.valid, 0, 60, 1)
1133  XSPerfHistogram("ifu_redirect_latency", redirect_latency, !fromBackendRedirect.valid && ifuRedirectToBpu.valid, 0, 60, 1)
1134
1135  XSError(io.toBpu.redirect.valid && isBefore(io.toBpu.redirect.bits.ftqIdx, commPtr), "Ftq received a redirect after its commit, check backend or replay")
1136
1137  val may_have_stall_from_bpu = Wire(Bool())
1138  val bpu_ftb_update_stall = RegInit(0.U(2.W)) // 2-cycle stall, so we need 3 states
1139  may_have_stall_from_bpu := bpu_ftb_update_stall =/= 0.U
1140  canCommit := commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
1141    Cat(commitStateQueue(commPtr.value).map(s => {
1142      s === c_invalid || s === c_commited
1143    })).andR
1144
1145  val mmioReadPtr = io.mmioCommitRead.mmioFtqPtr
1146  val mmioLastCommit = isBefore(commPtr, mmioReadPtr) && (isAfter(ifuPtr,mmioReadPtr)  ||  mmioReadPtr ===   ifuPtr) &&
1147                       Cat(commitStateQueue(mmioReadPtr.value).map(s => { s === c_invalid || s === c_commited})).andR
1148  io.mmioCommitRead.mmioLastCommit := RegNext(mmioLastCommit)
1149
1150  // commit reads
1151  val commit_pc_bundle = RegNext(ftq_pc_mem.io.commPtr_rdata)
1152  val commit_target =
1153    Mux(RegNext(commPtr === newest_entry_ptr),
1154      RegNext(newest_entry_target),
1155      RegNext(ftq_pc_mem.io.commPtrPlus1_rdata.startAddr))
1156  ftq_pd_mem.io.raddr.last := commPtr.value
1157  val commit_pd = ftq_pd_mem.io.rdata.last
1158  ftq_redirect_sram.io.ren.last := canCommit
1159  ftq_redirect_sram.io.raddr.last := commPtr.value
1160  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
1161  ftq_meta_1r_sram.io.ren(0) := canCommit
1162  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
1163  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
1164  ftb_entry_mem.io.raddr.last := commPtr.value
1165  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
1166
1167  // need one cycle to read mem and srams
1168  val do_commit_ptr = RegNext(commPtr)
1169  val do_commit = RegNext(canCommit, init=false.B)
1170  when (canCommit) {
1171    commPtr_write := commPtrPlus1
1172    commPtrPlus1_write := commPtrPlus1 + 1.U
1173  }
1174  val commit_state = RegNext(commitStateQueue(commPtr.value))
1175  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
1176  val do_commit_cfi = WireInit(cfiIndex_vec(do_commit_ptr.value))
1177  //
1178  //when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
1179  //  can_commit_cfi.valid := false.B
1180  //}
1181  val commit_cfi = RegNext(can_commit_cfi)
1182  val debug_cfi = commitStateQueue(do_commit_ptr.value)(do_commit_cfi.bits) =/= c_commited && do_commit_cfi.valid
1183
1184  val commit_mispredict  : Vec[Bool] = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
1185    case (mis, state) => mis && state === c_commited
1186  })
1187  val commit_instCommited: Vec[Bool] = VecInit(commit_state.map(_ === c_commited)) // [PredictWidth]
1188  val can_commit_hit                 = entry_hit_status(commPtr.value)
1189  val commit_hit                     = RegNext(can_commit_hit)
1190  val diff_commit_target             = RegNext(update_target(commPtr.value)) // TODO: remove this
1191  val commit_stage                   = RegNext(pred_stage(commPtr.value))
1192  val commit_valid                   = commit_hit === h_hit || commit_cfi.valid // hit or taken
1193
1194  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
1195  switch (bpu_ftb_update_stall) {
1196    is (0.U) {
1197      when (can_commit_cfi.valid && !to_bpu_hit && canCommit) {
1198        bpu_ftb_update_stall := 2.U // 2-cycle stall
1199      }
1200    }
1201    is (2.U) {
1202      bpu_ftb_update_stall := 1.U
1203    }
1204    is (1.U) {
1205      bpu_ftb_update_stall := 0.U
1206    }
1207    is (3.U) {
1208      XSError(true.B, "bpu_ftb_update_stall should be 0, 1 or 2")
1209    }
1210  }
1211
1212  // TODO: remove this
1213  XSError(do_commit && diff_commit_target =/= commit_target, "\ncommit target should be the same as update target\n")
1214
1215  // update latency stats
1216  val update_latency = GTimer() - pred_s1_cycle.getOrElse(dummy_s1_pred_cycle_vec)(do_commit_ptr.value) + 1.U
1217  XSPerfHistogram("bpu_update_latency", update_latency, io.toBpu.update.valid, 0, 64, 2)
1218
1219  io.toBpu.update := DontCare
1220  io.toBpu.update.valid := commit_valid && do_commit
1221  val update = io.toBpu.update.bits
1222  update.false_hit   := commit_hit === h_false_hit
1223  update.pc          := commit_pc_bundle.startAddr
1224  update.meta        := commit_meta.meta
1225  update.cfi_idx     := commit_cfi
1226  update.full_target := commit_target
1227  update.from_stage  := commit_stage
1228  update.spec_info   := commit_spec_meta
1229  XSError(commit_valid && do_commit && debug_cfi, "\ncommit cfi can be non c_commited\n")
1230
1231  val commit_real_hit = commit_hit === h_hit
1232  val update_ftb_entry = update.ftb_entry
1233
1234  val ftbEntryGen = Module(new FTBEntryGen).io
1235  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
1236  ftbEntryGen.old_entry      := commit_ftb_entry
1237  ftbEntryGen.pd             := commit_pd
1238  ftbEntryGen.cfiIndex       := commit_cfi
1239  ftbEntryGen.target         := commit_target
1240  ftbEntryGen.hit            := commit_real_hit
1241  ftbEntryGen.mispredict_vec := commit_mispredict
1242
1243  update_ftb_entry         := ftbEntryGen.new_entry
1244  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
1245  update.mispred_mask      := ftbEntryGen.mispred_mask
1246  update.old_entry         := ftbEntryGen.is_old_entry
1247  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
1248  update.br_taken_mask     := ftbEntryGen.taken_mask
1249  update.br_committed      := (ftbEntryGen.new_entry.brValids zip ftbEntryGen.new_entry.brOffset) map {
1250    case (valid, offset) => valid && commit_instCommited(offset)
1251  }
1252  update.jmp_taken         := ftbEntryGen.jmp_taken
1253
1254  // update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
1255  // update.full_pred.jalr_target := commit_target
1256  // update.full_pred.hit := true.B
1257  // when (update.full_pred.is_jalr) {
1258  //   update.full_pred.targets.last := commit_target
1259  // }
1260
1261  // ****************************************************************
1262  // *********************** to prefetch ****************************
1263  // ****************************************************************
1264
1265  ftq_pc_mem.io.other_raddrs(0) := DontCare
1266  if(cacheParams.enableICachePrefetch){
1267    val prefetchPtr = RegInit(FtqPtr(false.B, 0.U))
1268    val diff_prefetch_addr = WireInit(update_target(prefetchPtr.value)) //TODO: remove this
1269    // TODO : MUST WIDER
1270    prefetchPtr := prefetchPtr + io.toPrefetch.req.fire
1271
1272    val prefetch_too_late = (isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr)) || (prefetchPtr === ifuPtr)
1273    when(prefetch_too_late){
1274      when(prefetchPtr =/= bpuPtr){
1275        prefetchPtr := bpuPtr - 1.U
1276      }.otherwise{
1277        prefetchPtr := ifuPtr
1278      }
1279    }
1280
1281    ftq_pc_mem.io.other_raddrs(0) := prefetchPtr.value
1282
1283    when (bpu_s2_redirect && !isBefore(prefetchPtr, bpu_s2_resp.ftq_idx)) {
1284      prefetchPtr := bpu_s2_resp.ftq_idx
1285    }
1286
1287    when (bpu_s3_redirect && !isBefore(prefetchPtr, bpu_s3_resp.ftq_idx)) {
1288      prefetchPtr := bpu_s3_resp.ftq_idx
1289      // XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
1290    }
1291
1292
1293    val prefetch_is_to_send = WireInit(entry_fetch_status(prefetchPtr.value) === f_to_send)
1294    val prefetch_addr = Wire(UInt(VAddrBits.W))
1295
1296    when (last_cycle_bpu_in && bpu_in_bypass_ptr === prefetchPtr) {
1297      prefetch_is_to_send := true.B
1298      prefetch_addr := last_cycle_bpu_target
1299      diff_prefetch_addr := last_cycle_bpu_target // TODO: remove this
1300    }.otherwise{
1301      prefetch_addr := RegNext( ftq_pc_mem.io.other_rdatas(0).startAddr)
1302    }
1303    io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && prefetch_is_to_send
1304    io.toPrefetch.req.bits.target := prefetch_addr
1305
1306    when(redirectVec.map(r => r.valid).reduce(_||_)){
1307      val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
1308      val next = r.ftqIdx + 1.U
1309      prefetchPtr := next
1310    }
1311
1312    // TODO: remove this
1313    // XSError(io.toPrefetch.req.valid && diff_prefetch_addr =/= prefetch_addr,
1314    //         f"\nprefetch_req_target wrong! prefetchPtr: ${prefetchPtr}, prefetch_addr: ${Hexadecimal(prefetch_addr)} diff_prefetch_addr: ${Hexadecimal(diff_prefetch_addr)}\n")
1315
1316
1317    XSError(isBefore(bpuPtr, prefetchPtr) && !isFull(bpuPtr, prefetchPtr), "\nprefetchPtr is before bpuPtr!\n")
1318//    XSError(isBefore(prefetchPtr, ifuPtr) && !isFull(ifuPtr, prefetchPtr), "\nifuPtr is before prefetchPtr!\n")
1319  }
1320  else {
1321    io.toPrefetch.req <> DontCare
1322  }
1323
1324  // ******************************************************************************
1325  // **************************** commit perf counters ****************************
1326  // ******************************************************************************
1327
1328  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
1329  val commit_mispred_mask = commit_mispredict.asUInt
1330  val commit_not_mispred_mask = ~commit_mispred_mask
1331
1332  val commit_br_mask = commit_pd.brMask.asUInt
1333  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
1334  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
1335
1336  val mbpInstrs = commit_inst_mask & commit_cfi_mask
1337
1338  val mbpRights = mbpInstrs & commit_not_mispred_mask
1339  val mbpWrongs = mbpInstrs & commit_mispred_mask
1340
1341  io.bpuInfo.bpRight := PopCount(mbpRights)
1342  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
1343
1344  val isWriteFTQTable = WireInit(Constantin.createRecord("isWriteFTQTable" + p(XSCoreParamsKey).HartId.toString))
1345  val ftqBranchTraceDB = ChiselDB.createTable("FTQTable" + p(XSCoreParamsKey).HartId.toString, new FtqDebugBundle)
1346  // Cfi Info
1347  for (i <- 0 until PredictWidth) {
1348    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
1349    val v = commit_state(i) === c_commited
1350    val isBr = commit_pd.brMask(i)
1351    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
1352    val isCfi = isBr || isJmp
1353    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
1354    val misPred = commit_mispredict(i)
1355    // val ghist = commit_spec_meta.ghist.predHist
1356    val histPtr = commit_spec_meta.histPtr
1357    val predCycle = commit_meta.meta(63, 0)
1358    val target = commit_target
1359
1360    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1361    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1362    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1363    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1364    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1365    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1366    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1367
1368    val logbundle = Wire(new FtqDebugBundle)
1369    logbundle.pc := pc
1370    logbundle.target := target
1371    logbundle.isBr := isBr
1372    logbundle.isJmp := isJmp
1373    logbundle.isCall := isJmp && commit_pd.hasCall
1374    logbundle.isRet := isJmp && commit_pd.hasRet
1375    logbundle.misPred := misPred
1376    logbundle.isTaken := isTaken
1377    logbundle.predStage := commit_stage
1378
1379    ftqBranchTraceDB.log(
1380      data = logbundle /* hardware of type T */,
1381      en = isWriteFTQTable.orR && v && do_commit && isCfi,
1382      site = "FTQ" + p(XSCoreParamsKey).HartId.toString,
1383      clock = clock,
1384      reset = reset
1385    )
1386  }
1387
1388  val enq = io.fromBpu.resp
1389  val perf_redirect = backendRedirect
1390
1391  XSPerfAccumulate("entry", validEntries)
1392  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1393  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1394  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1395  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1396
1397  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1398
1399  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1400  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1401  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1402  XSPerfAccumulate("bpu_to_ifu_bubble_when_ftq_full", (bpuPtr === ifuPtr) && isFull(bpuPtr, commPtr) && io.toIfu.req.ready)
1403
1404  XSPerfAccumulate("redirectAhead_ValidNum", io.fromBackend.ftqIdxAhead.map(_.valid).reduce(_|_))
1405  XSPerfAccumulate("fromBackendRedirect_ValidNum", io.fromBackend.redirect.valid)
1406  XSPerfAccumulate("toBpuRedirect_ValidNum", io.toBpu.redirect.valid)
1407
1408  val from_bpu = io.fromBpu.resp.bits
1409  val to_ifu = io.toIfu.req.bits
1410
1411
1412  XSPerfHistogram("commit_num_inst", PopCount(commit_inst_mask), do_commit, 0, PredictWidth+1, 1)
1413
1414
1415
1416
1417  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1418  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1419  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1420  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1421
1422
1423  val mbpBRights = mbpRights & commit_br_mask
1424  val mbpJRights = mbpRights & commit_jal_mask
1425  val mbpIRights = mbpRights & commit_jalr_mask
1426  val mbpCRights = mbpRights & commit_call_mask
1427  val mbpRRights = mbpRights & commit_ret_mask
1428
1429  val mbpBWrongs = mbpWrongs & commit_br_mask
1430  val mbpJWrongs = mbpWrongs & commit_jal_mask
1431  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1432  val mbpCWrongs = mbpWrongs & commit_call_mask
1433  val mbpRWrongs = mbpWrongs & commit_ret_mask
1434
1435  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1436
1437  def pred_stage_map(src: UInt, name: String) = {
1438    (0 until numBpStages).map(i =>
1439      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1440    ).foldLeft(Map[String, UInt]())(_+_)
1441  }
1442
1443  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1444  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1445  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1446  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1447  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1448  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1449
1450  val update_valid = io.toBpu.update.valid
1451  def u(cond: Bool) = update_valid && cond
1452  val ftb_false_hit = u(update.false_hit)
1453  // assert(!ftb_false_hit)
1454  val ftb_hit = u(commit_hit === h_hit)
1455
1456  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1457  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1458  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1459  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1460
1461  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1462
1463  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1464  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1465  val ftb_modified_entry_ifu_redirected = u(ifuRedirected(do_commit_ptr.value))
1466  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1467  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1468  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1469
1470  def getFtbEntryLen(pc: UInt, entry: FTBEntry) = (entry.getFallThrough(pc) - pc) >> instOffsetBits
1471  val gen_ftb_entry_len = getFtbEntryLen(update.pc, ftbEntryGen.new_entry)
1472  XSPerfHistogram("ftb_init_entry_len", gen_ftb_entry_len, ftb_new_entry, 0, PredictWidth+1, 1)
1473  XSPerfHistogram("ftb_modified_entry_len", gen_ftb_entry_len, ftb_modified_entry, 0, PredictWidth+1, 1)
1474  val s3_ftb_entry_len = getFtbEntryLen(from_bpu.s3.pc(0), from_bpu.last_stage_ftb_entry)
1475  XSPerfHistogram("s3_ftb_entry_len", s3_ftb_entry_len, from_bpu.s3.valid(0), 0, PredictWidth+1, 1)
1476
1477  XSPerfHistogram("ftq_has_entry", validEntries, true.B, 0, FtqSize+1, 1)
1478
1479  val perfCountsMap = Map(
1480    "BpInstr" -> PopCount(mbpInstrs),
1481    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1482    "BpRight"  -> PopCount(mbpRights),
1483    "BpWrong"  -> PopCount(mbpWrongs),
1484    "BpBRight" -> PopCount(mbpBRights),
1485    "BpBWrong" -> PopCount(mbpBWrongs),
1486    "BpJRight" -> PopCount(mbpJRights),
1487    "BpJWrong" -> PopCount(mbpJWrongs),
1488    "BpIRight" -> PopCount(mbpIRights),
1489    "BpIWrong" -> PopCount(mbpIWrongs),
1490    "BpCRight" -> PopCount(mbpCRights),
1491    "BpCWrong" -> PopCount(mbpCWrongs),
1492    "BpRRight" -> PopCount(mbpRRights),
1493    "BpRWrong" -> PopCount(mbpRWrongs),
1494
1495    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1496    "ftb_hit"                      -> PopCount(ftb_hit),
1497    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1498    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1499    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1500    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1501    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1502    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1503    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1504    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1505    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1506    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1507  ) ++ mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1508       correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1509
1510  for((key, value) <- perfCountsMap) {
1511    XSPerfAccumulate(key, value)
1512  }
1513
1514  // --------------------------- Debug --------------------------------
1515  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1516  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1517  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1518  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1519  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1520    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1521  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1522
1523  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1524  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1525  //       case (((valid, pd), ans), taken) =>
1526  //       Mux(valid && pd.isBr,
1527  //         isWrong ^ Mux(ans.hit.asBool,
1528  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1529  //           !taken),
1530  //         !taken),
1531  //       false.B)
1532  //     }
1533  //   }
1534
1535  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1536  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1537  //       case (((valid, pd), ans), taken) =>
1538  //       Mux(valid && pd.isBr,
1539  //         isWrong ^ Mux(ans.hit.asBool,
1540  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1541  //           !taken),
1542  //         !taken),
1543  //       false.B)
1544  //     }
1545  //   }
1546
1547  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1548  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1549  //       case (((valid, pd), ans), taken) =>
1550  //       Mux(valid && pd.isBr,
1551  //         isWrong ^ (ans.taken.asBool === taken),
1552  //       false.B)
1553  //     }
1554  //   }
1555
1556  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1557  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1558  //       case (((valid, pd), ans), taken) =>
1559  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1560  //         isWrong ^ (!taken),
1561  //           false.B)
1562  //     }
1563  //   }
1564
1565  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1566  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1567  //       case (((valid, pd), ans), taken) =>
1568  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1569  //         isWrong ^ (ans.target === commitEntry.target),
1570  //           false.B)
1571  //     }
1572  //   }
1573
1574  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1575  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1576  //   // btb and ubtb pred jal and jalr as well
1577  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1578  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1579  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1580  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1581
1582  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1583  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1584
1585  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1586  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1587
1588  val perfEvents = Seq(
1589    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1590    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1591    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1592    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1593    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1594    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1595    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1596    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1597    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1598    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1599    ("BpRight                ", PopCount(mbpRights)                                                         ),
1600    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1601    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1602    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1603    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1604    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1605    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1606    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1607    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1608    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1609    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1610    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1611    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1612    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1613  )
1614  generatePerfEvent()
1615}