xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 2f4a3aa4472816faf8f455eb6c05df7b49468b99)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError}
23import xiangshan._
24import scala.tools.nsc.doc.model.Val
25import utils.{ParallelPriorityMux, ParallelPriorityEncoder}
26import xiangshan.backend.{CtrlToFtqIO}
27import firrtl.annotations.MemoryLoadFileType
28
29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
30  p => p(XSCoreParamsKey).FtqSize
31){
32  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
33}
34
35object FtqPtr {
36  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
37    val ptr = Wire(new FtqPtr)
38    ptr.flag := f
39    ptr.value := v
40    ptr
41  }
42  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
43    apply(!ptr.flag, ptr.value)
44  }
45}
46
47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
48
49  val io = IO(new Bundle() {
50    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
51    val ren = Input(Vec(numRead, Bool()))
52    val rdata = Output(Vec(numRead, gen))
53    val waddr = Input(UInt(log2Up(FtqSize).W))
54    val wen = Input(Bool())
55    val wdata = Input(gen)
56  })
57
58  for(i <- 0 until numRead){
59    val sram = Module(new SRAMTemplate(gen, FtqSize))
60    sram.io.r.req.valid := io.ren(i)
61    sram.io.r.req.bits.setIdx := io.raddr(i)
62    io.rdata(i) := sram.io.r.resp.data(0)
63    sram.io.w.req.valid := io.wen
64    sram.io.w.req.bits.setIdx := io.waddr
65    sram.io.w.req.bits.data := VecInit(io.wdata)
66  }
67
68}
69
70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
71  // TODO: move pftAddr, oversize, carry to another mem
72  val startAddr = UInt(VAddrBits.W)
73  val nextRangeAddr = UInt(VAddrBits.W)
74  val pftAddr = UInt((log2Ceil(PredictWidth)+1).W)
75  val isNextMask = Vec(PredictWidth, Bool())
76  val oversize = Bool()
77  val carry = Bool()
78  def getPc(offset: UInt) = {
79    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits)
80    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits)
81    Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)),
82        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
83  }
84  def getFallThrough() = {
85    getFallThroughAddr(this.startAddr, this.carry, this.pftAddr)
86  }
87  def fallThroughError() = {
88    !carry && startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) > pftAddr
89  }
90  def fromBranchPrediction(resp: BranchPredictionBundle) = {
91    this.startAddr := resp.pc
92    this.nextRangeAddr := resp.pc + (FetchWidth * 4).U
93    this.pftAddr := resp.ftb_entry.pftAddr
94    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
95      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
96    ))
97    this.oversize := resp.ftb_entry.oversize
98    this.carry := resp.ftb_entry.carry
99    this
100  }
101  override def toPrintable: Printable = {
102    p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}"
103  }
104}
105
106class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
107  val brMask = Vec(PredictWidth, Bool())
108  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
109  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
110  val jalTarget = UInt(VAddrBits.W)
111  val rvcMask = Vec(PredictWidth, Bool())
112  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
113  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
114  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
115  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
116
117  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
118    val pds = pdWb.pd
119    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
120    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
121    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
122                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
123    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
124    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
125    this.jalTarget := pdWb.jalTarget
126  }
127
128  def toPd(offset: UInt) = {
129    require(offset.getWidth == log2Ceil(PredictWidth))
130    val pd = Wire(new PreDecodeInfo)
131    pd.valid := true.B
132    pd.isRVC := rvcMask(offset)
133    val isBr = brMask(offset)
134    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
135    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
136    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
137    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
138    pd
139  }
140}
141
142
143
144class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
145  val rasSp = UInt(log2Ceil(RasSize).W)
146  val rasEntry = new RASEntry
147  val specCnt = Vec(numBr, UInt(10.W))
148  // val ghist = new ShiftingGlobalHistory
149  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
150  val histPtr = new CGHPtr
151  val phist = UInt(PathHistoryLength.W)
152  val phNewBit = UInt(1.W)
153
154  def fromBranchPrediction(resp: BranchPredictionBundle) = {
155    this.rasSp := resp.rasSp
156    this.rasEntry := resp.rasTop
157    this.specCnt := resp.specCnt
158    // this.ghist := resp.ghist
159    this.folded_hist := resp.folded_hist
160    this.histPtr := resp.histPtr
161    this.phist := resp.phist
162    this.phNewBit := resp.pc(instOffsetBits)
163    this
164  }
165}
166
167class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
168  val meta = UInt(MaxMetaLength.W)
169}
170
171class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
172  val target = UInt(VAddrBits.W)
173  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
174}
175
176// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
177//   val startAddr = UInt(VAddrBits.W)
178//   val fallThruAddr = UInt(VAddrBits.W)
179//   val isNextMask = Vec(PredictWidth, Bool())
180
181//   val meta = UInt(MaxMetaLength.W)
182
183//   val rasSp = UInt(log2Ceil(RasSize).W)
184//   val rasEntry = new RASEntry
185//   val hist = new ShiftingGlobalHistory
186//   val specCnt = Vec(numBr, UInt(10.W))
187
188//   val valids = Vec(PredictWidth, Bool())
189//   val brMask = Vec(PredictWidth, Bool())
190//   // isJalr, isCall, isRet
191//   val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
192//   val jmpOffset = UInt(log2Ceil(PredictWidth).W)
193
194//   val mispredVec = Vec(PredictWidth, Bool())
195//   val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
196//   val target = UInt(VAddrBits.W)
197// }
198
199class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
200  val ptr = Output(new FtqPtr)
201  val offset = Output(UInt(log2Ceil(PredictWidth).W))
202  val data = Input(gen)
203  def apply(ptr: FtqPtr, offset: UInt) = {
204    this.ptr := ptr
205    this.offset := offset
206    this.data
207  }
208  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
209}
210
211
212class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
213  val redirect = Valid(new BranchPredictionRedirect)
214  val update = Valid(new BranchPredictionUpdate)
215  val enq_ptr = Output(new FtqPtr)
216}
217
218class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
219  val req = Decoupled(new FetchRequestBundle)
220  val redirect = Valid(new Redirect)
221  val flushFromBpu = new Bundle {
222    // when ifu pipeline is not stalled,
223    // a packet from bpu s3 can reach f1 at most
224    val s2 = Valid(new FtqPtr)
225    val s3 = Valid(new FtqPtr)
226    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
227      src.valid && !isAfter(src.bits, idx_to_flush)
228    }
229    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
230    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
231  }
232}
233
234trait HasBackendRedirectInfo extends HasXSParameter {
235  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
236  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
237}
238
239class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
240  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
241  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
242  def getJumpPcRead = pc_reads.head
243  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
244  def getMemPredPcRead = pc_reads.init.last
245  def getRobFlushPcRead = pc_reads.last
246}
247
248
249class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
250  val io = IO(new Bundle {
251    val start_addr = Input(UInt(VAddrBits.W))
252    val old_entry = Input(new FTBEntry)
253    val pd = Input(new Ftq_pd_Entry)
254    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
255    val target = Input(UInt(VAddrBits.W))
256    val hit = Input(Bool())
257    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
258
259    val new_entry = Output(new FTBEntry)
260    val new_br_insert_pos = Output(Vec(numBr, Bool()))
261    val taken_mask = Output(Vec(numBr, Bool()))
262    val mispred_mask = Output(Vec(numBr+1, Bool()))
263
264    // for perf counters
265    val is_init_entry = Output(Bool())
266    val is_old_entry = Output(Bool())
267    val is_new_br = Output(Bool())
268    val is_jalr_target_modified = Output(Bool())
269    val is_always_taken_modified = Output(Bool())
270    val is_br_full = Output(Bool())
271  })
272
273  // no mispredictions detected at predecode
274  val hit = io.hit
275  val pd = io.pd
276
277  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
278
279
280  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
281  val entry_has_jmp = pd.jmpInfo.valid
282  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
283  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
284  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
285  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
286  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
287  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
288
289  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
290  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
291
292  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
293  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
294  // if not hit, establish a new entry
295  init_entry.valid := true.B
296  // tag is left for ftb to assign
297
298  // case br
299  val init_br_slot = init_entry.getSlotForBr(0)
300  when (cfi_is_br) {
301    init_br_slot.valid := true.B
302    init_br_slot.offset := io.cfiIndex.bits
303    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
304    init_entry.always_taken(0) := true.B // set to always taken on init
305  }
306  // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
307
308  // case jmp
309  when (entry_has_jmp) {
310    init_entry.tailSlot.offset := pd.jmpOffset
311    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
312    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
313  }
314
315  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
316  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
317  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
318  init_entry.isJalr := new_jmp_is_jalr
319  init_entry.isCall := new_jmp_is_call
320  init_entry.isRet  := new_jmp_is_ret
321  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
322
323  init_entry.oversize := last_br_rvi || last_jmp_rvi
324
325  // if hit, check whether a new cfi(only br is possible) is detected
326  val oe = io.old_entry
327  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
328  val br_recorded = br_recorded_vec.asUInt.orR
329  val is_new_br = cfi_is_br && !br_recorded
330  val new_br_offset = io.cfiIndex.bits
331  // vec(i) means new br will be inserted BEFORE old br(i)
332  val allBrSlotsVec = oe.allSlotsForBr
333  val new_br_insert_onehot = VecInit((0 until numBr).map{
334    i => i match {
335      case 0 =>
336        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
337      case idx =>
338        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
339        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
340    }
341  })
342
343  val old_entry_modified = WireInit(io.old_entry)
344  for (i <- 0 until numBr) {
345    val slot = old_entry_modified.allSlotsForBr(i)
346    when (new_br_insert_onehot(i)) {
347      slot.valid := true.B
348      slot.offset := new_br_offset
349      slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
350      old_entry_modified.always_taken(i) := true.B
351    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
352      old_entry_modified.always_taken(i) := false.B
353      // all other fields remain unchanged
354    }.otherwise {
355      // case i == 0, remain unchanged
356      if (i != 0) {
357        val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
358        when (!noNeedToMoveFromFormerSlot) {
359          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
360          old_entry_modified.always_taken(i) := oe.always_taken(i)
361        }
362      }
363    }
364  }
365
366  // two circumstances:
367  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
368  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
369  //        the previous last br or the new br
370  val may_have_to_replace = oe.noEmptySlotForNewBr
371  val pft_need_to_change = is_new_br && may_have_to_replace
372  // it should either be the given last br or the new br
373  when (pft_need_to_change) {
374    val new_pft_offset =
375      Mux(!new_br_insert_onehot.asUInt.orR,
376        new_br_offset, oe.allSlotsForBr.last.offset)
377
378    // set jmp to invalid
379    if (!shareTailSlot) {
380      old_entry_modified.tailSlot.valid := false.B
381    }
382    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
383    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
384    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
385    old_entry_modified.oversize := false.B
386    old_entry_modified.isCall := false.B
387    old_entry_modified.isRet := false.B
388    old_entry_modified.isJalr := false.B
389  }
390
391  val old_entry_jmp_target_modified = WireInit(oe)
392  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
393  val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B
394  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
395  when (jalr_target_modified) {
396    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
397    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
398  }
399
400  val old_entry_always_taken = WireInit(oe)
401  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
402  for (i <- 0 until numBr) {
403    old_entry_always_taken.always_taken(i) :=
404      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
405    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
406  }
407  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
408
409
410
411  val derived_from_old_entry =
412    Mux(is_new_br, old_entry_modified,
413      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
414
415
416  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
417
418  io.new_br_insert_pos := new_br_insert_onehot
419  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
420    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
421  })
422  for (i <- 0 until numBr) {
423    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
424  }
425  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
426
427  // for perf counters
428  io.is_init_entry := !hit
429  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
430  io.is_new_br := hit && is_new_br
431  io.is_jalr_target_modified := hit && jalr_target_modified
432  io.is_always_taken_modified := hit && always_taken_modified
433  io.is_br_full := hit && is_new_br && may_have_to_replace
434}
435
436class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
437  with HasBackendRedirectInfo with BPUUtils with HasBPUConst {
438  val io = IO(new Bundle {
439    val fromBpu = Flipped(new BpuToFtqIO)
440    val fromIfu = Flipped(new IfuToFtqIO)
441    val fromBackend = Flipped(new CtrlToFtqIO)
442
443    val toBpu = new FtqToBpuIO
444    val toIfu = new FtqToIfuIO
445    val toBackend = new FtqToCtrlIO
446
447    val bpuInfo = new Bundle {
448      val bpRight = Output(UInt(XLEN.W))
449      val bpWrong = Output(UInt(XLEN.W))
450    }
451  })
452  io.bpuInfo := DontCare
453
454  val robFlush = io.fromBackend.robFlush
455  val stage2Redirect = io.fromBackend.stage2Redirect
456  val stage3Redirect = io.fromBackend.stage3Redirect
457
458  val stage2Flush = stage2Redirect.valid || robFlush.valid
459  val backendFlush = stage2Flush || RegNext(stage2Flush)
460  val ifuFlush = Wire(Bool())
461
462  val flush = stage2Flush || RegNext(stage2Flush)
463
464  val allowBpuIn, allowToIfu = WireInit(false.B)
465  val flushToIfu = !allowToIfu
466  allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
467  allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
468
469  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
470  val validEntries = distanceBetween(bpuPtr, commPtr)
471
472  // **********************************************************************
473  // **************************** enq from bpu ****************************
474  // **********************************************************************
475  val new_entry_ready = validEntries < FtqSize.U
476  io.fromBpu.resp.ready := new_entry_ready
477
478  val bpu_s2_resp = io.fromBpu.resp.bits.s2
479  val bpu_s3_resp = io.fromBpu.resp.bits.s3
480  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
481  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
482
483  io.toBpu.enq_ptr := bpuPtr
484  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
485  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
486
487  val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp)
488  val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx)
489  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
490  val bpu_in_resp_idx = bpu_in_resp_ptr.value
491
492  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
493  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
494  // resp from uBTB
495  ftq_pc_mem.io.wen(0) := bpu_in_fire
496  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
497  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
498
499  //                                                            ifuRedirect + backendRedirect + commit
500  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
501  // these info is intended to enq at the last stage of bpu
502  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
503  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
504  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
505
506  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
507  // these info is intended to enq at the last stage of bpu
508  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
509  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
510  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
511  //                                                            ifuRedirect + backendRedirect + commit
512  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
513  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
514  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
515  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
516
517
518  // multi-write
519  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
520  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
521  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
522  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
523
524  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
525  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
526    VecInit(Seq.fill(PredictWidth)(c_invalid))
527  }))
528
529  val f_to_send :: f_sent :: Nil = Enum(2)
530  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
531
532  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
533  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
534
535
536  when (bpu_in_fire) {
537    entry_fetch_status(bpu_in_resp_idx) := f_to_send
538    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
539    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex
540    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
541    update_target(bpu_in_resp_idx) := bpu_in_resp.target
542    pred_stage(bpu_in_resp_idx) := bpu_in_stage
543  }
544
545  bpuPtr := bpuPtr + enq_fire
546  ifuPtr := ifuPtr + io.toIfu.req.fire
547
548  // only use ftb result to assign hit status
549  when (bpu_s2_resp.valid) {
550    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit)
551  }
552
553
554  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
555  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
556  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
557    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
558    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
559    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
560      ifuPtr := bpu_s2_resp.ftq_idx
561    }
562  }
563
564  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
565  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
566  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
567    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
568    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
569    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
570      ifuPtr := bpu_s3_resp.ftq_idx
571    }
572    XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
573  }
574
575  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
576
577  // ****************************************************************
578  // **************************** to ifu ****************************
579  // ****************************************************************
580  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
581  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
582  val last_cycle_bpu_in = RegNext(bpu_in_fire)
583  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
584
585  // read pc and target
586  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
587  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
588
589  val toIfuReq = Wire(chiselTypeOf(io.toIfu.req))
590
591  toIfuReq.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
592  toIfuReq.bits.ftqIdx := ifuPtr
593  toIfuReq.bits.target := update_target(ifuPtr.value)
594  toIfuReq.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
595  toIfuReq.bits.fallThruError  := false.B
596
597  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
598    toIfuReq.bits.fromFtqPcBundle(bpu_in_bypass_buf)
599  }.elsewhen (last_cycle_to_ifu_fire) {
600    toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
601  }.otherwise {
602    toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
603  }
604
605  io.toIfu.req <> toIfuReq
606
607  // when fall through is smaller in value than start address, there must be a false hit
608  when (toIfuReq.bits.fallThroughError() && entry_hit_status(ifuPtr.value) === h_hit) {
609    when (io.toIfu.req.fire &&
610      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
611      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
612    ) {
613      entry_hit_status(ifuPtr.value) := h_false_hit
614      XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
615    }
616    io.toIfu.req.bits.fallThruAddr   := toIfuReq.bits.startAddr + (FetchWidth*4).U
617    io.toIfu.req.bits.fallThruError  := true.B
618    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
619  }
620
621  val ifu_req_should_be_flushed =
622    io.toIfu.flushFromBpu.shouldFlushByStage2(toIfuReq.bits.ftqIdx) ||
623    io.toIfu.flushFromBpu.shouldFlushByStage3(toIfuReq.bits.ftqIdx)
624
625  when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
626    entry_fetch_status(ifuPtr.value) := f_sent
627  }
628
629
630  // *********************************************************************
631  // **************************** wb from ifu ****************************
632  // *********************************************************************
633  val pdWb = io.fromIfu.pdWb
634  val pds = pdWb.bits.pd
635  val ifu_wb_valid = pdWb.valid
636  val ifu_wb_idx = pdWb.bits.ftqIdx.value
637  // read ports:                                                         commit update
638  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
639  ftq_pd_mem.io.wen(0) := ifu_wb_valid
640  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
641  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
642
643  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
644  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
645  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
646  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
647  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
648  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
649
650  when (ifu_wb_valid) {
651    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
652      case (v, inRange) => v && inRange
653    })
654    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
655      case (qe, v) => when (v) { qe := c_valid }
656    }
657  }
658
659  ifuWbPtr := ifuWbPtr + ifu_wb_valid
660
661  ftb_entry_mem.io.raddr.head := ifu_wb_idx
662  val has_false_hit = WireInit(false.B)
663  when (RegNext(hit_pd_valid)) {
664    // check for false hit
665    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
666    val brSlots = pred_ftb_entry.brSlots
667    val tailSlot = pred_ftb_entry.tailSlot
668    // we check cfis that bpu predicted
669
670    // bpu predicted branches but denied by predecode
671    val br_false_hit =
672      brSlots.map{
673        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
674      }.reduce(_||_) ||
675      (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
676        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
677
678    val jmpOffset = tailSlot.offset
679    val jmp_pd = pd_reg(jmpOffset)
680    val jal_false_hit = pred_ftb_entry.jmpValid &&
681      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
682       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
683       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
684       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
685      )
686
687    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
688    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
689
690    // assert(!has_false_hit)
691  }
692
693  when (has_false_hit) {
694    entry_hit_status(wb_idx_reg) := h_false_hit
695  }
696
697
698  // **********************************************************************
699  // **************************** backend read ****************************
700  // **********************************************************************
701
702  // pc reads
703  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
704    ftq_pc_mem.io.raddr(i) := req.ptr.value
705    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
706  }
707  // target read
708  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
709
710  // *******************************************************************************
711  // **************************** redirect from backend ****************************
712  // *******************************************************************************
713
714  // redirect read cfiInfo, couples to redirectGen s2
715  ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid
716  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
717
718  ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
719
720  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
721  val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect)
722  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
723  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
724
725  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
726  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
727
728  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
729    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
730      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
731      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
732
733    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
734        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
735  }.otherwise {
736    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
737    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
738  }
739
740
741  // ***************************************************************************
742  // **************************** redirect from ifu ****************************
743  // ***************************************************************************
744  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
745  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
746  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
747  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
748  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
749
750  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
751  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
752  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
753  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
754  ifuRedirectCfiUpdate.target := pdWb.bits.target
755  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
756  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
757
758  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
759  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
760  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
761
762  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
763  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
764
765  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
766
767  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
768  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
769  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
770    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
771  }
772
773  // *********************************************************************
774  // **************************** wb from exu ****************************
775  // *********************************************************************
776
777  def extractRedirectInfo(wb: Valid[Redirect]) = {
778    val ftqIdx = wb.bits.ftqIdx.value
779    val ftqOffset = wb.bits.ftqOffset
780    val taken = wb.bits.cfiUpdate.taken
781    val mispred = wb.bits.cfiUpdate.isMisPred
782    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
783  }
784
785  // fix mispredict entry
786  val lastIsMispredict = RegNext(
787    stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B
788  )
789
790  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
791    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
792    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
793    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
794    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
795      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
796    }
797    when (cfiIndex_bits_wen) {
798      cfiIndex_vec(r_idx).bits := r_offset
799    }
800    update_target(r_idx) := redirect.bits.cfiUpdate.target
801    if (isBackend) {
802      mispredict_vec(r_idx)(r_offset) := r_mispred
803    }
804  }
805
806  when(stage3Redirect.valid && lastIsMispredict) {
807    updateCfiInfo(stage3Redirect)
808  }.elsewhen (ifuRedirectToBpu.valid) {
809    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
810  }
811
812  // ***********************************************************************************
813  // **************************** flush ptr and state queue ****************************
814  // ***********************************************************************************
815
816  val redirectVec = VecInit(robFlush, stage2Redirect, fromIfuRedirect)
817
818  // when redirect, we should reset ptrs and status queues
819  when(redirectVec.map(r => r.valid).reduce(_||_)){
820    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
821    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
822    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
823    val next = idx + 1.U
824    bpuPtr := next
825    ifuPtr := next
826    ifuWbPtr := next
827    when (notIfu) {
828      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
829        when(i.U > offset || i.U === offset && flushItSelf){
830          s := c_invalid
831        }
832      })
833    }
834  }
835
836  // only the valid bit is actually needed
837  io.toIfu.redirect.bits    := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits)
838  io.toIfu.redirect.valid   := stage2Flush
839
840  // commit
841  for (c <- io.fromBackend.rob_commits) {
842    when(c.valid) {
843      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
844      // TODO: remove this
845      // For instruction fusions, we also update the next instruction
846      when (c.bits.commitType === 4.U) {
847        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
848      }.elsewhen(c.bits.commitType === 5.U) {
849        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
850      }.elsewhen(c.bits.commitType === 6.U) {
851        val index = (c.bits.ftqIdx + 1.U).value
852        commitStateQueue(index)(0) := c_commited
853      }.elsewhen(c.bits.commitType === 7.U) {
854        val index = (c.bits.ftqIdx + 1.U).value
855        commitStateQueue(index)(1) := c_commited
856      }
857    }
858  }
859
860  // ****************************************************************
861  // **************************** to bpu ****************************
862  // ****************************************************************
863
864  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
865
866  val may_have_stall_from_bpu = RegInit(false.B)
867  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
868    Cat(commitStateQueue(commPtr.value).map(s => {
869      s === c_invalid || s === c_commited
870    })).andR()
871
872  // commit reads
873  ftq_pc_mem.io.raddr.last := commPtr.value
874  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
875  ftq_pd_mem.io.raddr.last := commPtr.value
876  val commit_pd = ftq_pd_mem.io.rdata.last
877  ftq_redirect_sram.io.ren.last := canCommit
878  ftq_redirect_sram.io.raddr.last := commPtr.value
879  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
880  ftq_meta_1r_sram.io.ren(0) := canCommit
881  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
882  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
883  ftb_entry_mem.io.raddr.last := commPtr.value
884  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
885
886  // need one cycle to read mem and srams
887  val do_commit_ptr = RegNext(commPtr)
888  val do_commit = RegNext(canCommit, init=false.B)
889  when (canCommit) { commPtr := commPtr + 1.U }
890  val commit_state = RegNext(commitStateQueue(commPtr.value))
891  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
892  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
893    can_commit_cfi.valid := false.B
894  }
895  val commit_cfi = RegNext(can_commit_cfi)
896
897  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
898    case (mis, state) => mis && state === c_commited
899  })
900  val can_commit_hit = entry_hit_status(commPtr.value)
901  val commit_hit = RegNext(can_commit_hit)
902  val commit_target = RegNext(update_target(commPtr.value))
903  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
904
905  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
906  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
907
908  io.toBpu.update := DontCare
909  io.toBpu.update.valid := commit_valid && do_commit
910  val update = io.toBpu.update.bits
911  update.false_hit   := commit_hit === h_false_hit
912  update.pc          := commit_pc_bundle.startAddr
913  update.preds.hit   := commit_hit === h_hit || commit_hit === h_false_hit
914  update.meta        := commit_meta.meta
915  update.full_target := commit_target
916  update.fromFtqRedirectSram(commit_spec_meta)
917
918  val commit_real_hit = commit_hit === h_hit
919  val update_ftb_entry = update.ftb_entry
920
921  val ftbEntryGen = Module(new FTBEntryGen).io
922  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
923  ftbEntryGen.old_entry      := commit_ftb_entry
924  ftbEntryGen.pd             := commit_pd
925  ftbEntryGen.cfiIndex       := commit_cfi
926  ftbEntryGen.target         := commit_target
927  ftbEntryGen.hit            := commit_real_hit
928  ftbEntryGen.mispredict_vec := commit_mispredict
929
930  update_ftb_entry         := ftbEntryGen.new_entry
931  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
932  update.mispred_mask      := ftbEntryGen.mispred_mask
933  update.old_entry         := ftbEntryGen.is_old_entry
934  update.preds.br_taken_mask  := ftbEntryGen.taken_mask
935
936  // ******************************************************************************
937  // **************************** commit perf counters ****************************
938  // ******************************************************************************
939
940  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
941  val commit_mispred_mask = commit_mispredict.asUInt
942  val commit_not_mispred_mask = ~commit_mispred_mask
943
944  val commit_br_mask = commit_pd.brMask.asUInt
945  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
946  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
947
948  val mbpInstrs = commit_inst_mask & commit_cfi_mask
949
950  val mbpRights = mbpInstrs & commit_not_mispred_mask
951  val mbpWrongs = mbpInstrs & commit_mispred_mask
952
953  io.bpuInfo.bpRight := PopCount(mbpRights)
954  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
955
956  // Cfi Info
957  for (i <- 0 until PredictWidth) {
958    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
959    val v = commit_state(i) === c_commited
960    val isBr = commit_pd.brMask(i)
961    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
962    val isCfi = isBr || isJmp
963    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
964    val misPred = commit_mispredict(i)
965    // val ghist = commit_spec_meta.ghist.predHist
966    val histPtr = commit_spec_meta.histPtr
967    val predCycle = commit_meta.meta(63, 0)
968    val target = commit_target
969
970    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
971    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
972    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
973    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
974    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
975    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
976    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
977  }
978
979  val enq = io.fromBpu.resp
980  val perf_redirect = io.fromBackend.stage2Redirect
981
982  XSPerfAccumulate("entry", validEntries)
983  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
984  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
985  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
986  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
987
988  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
989
990  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
991  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
992  XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr)
993
994  val from_bpu = io.fromBpu.resp.bits
995  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
996    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
997    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
998    val entry_len_map = (1 to PredictWidth+1).map(i =>
999      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
1000    ).foldLeft(Map[String, UInt]())(_+_)
1001    entry_len_map
1002  }
1003  val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1")
1004  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1005  val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1006
1007  val to_ifu = io.toIfu.req.bits
1008  val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits
1009  val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U)
1010  val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i =>
1011    f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire)
1012  ).foldLeft(Map[String, UInt]())(_+_)
1013
1014
1015
1016  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1017  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1018    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1019  ).foldLeft(Map[String, UInt]())(_+_)
1020
1021
1022
1023  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1024  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1025  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1026  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1027
1028
1029  val mbpBRights = mbpRights & commit_br_mask
1030  val mbpJRights = mbpRights & commit_jal_mask
1031  val mbpIRights = mbpRights & commit_jalr_mask
1032  val mbpCRights = mbpRights & commit_call_mask
1033  val mbpRRights = mbpRights & commit_ret_mask
1034
1035  val mbpBWrongs = mbpWrongs & commit_br_mask
1036  val mbpJWrongs = mbpWrongs & commit_jal_mask
1037  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1038  val mbpCWrongs = mbpWrongs & commit_call_mask
1039  val mbpRWrongs = mbpWrongs & commit_ret_mask
1040
1041  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1042
1043  def pred_stage_map(src: UInt, name: String) = {
1044    (0 until numBpStages).map(i =>
1045      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1046    ).foldLeft(Map[String, UInt]())(_+_)
1047  }
1048
1049  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1050  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1051  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1052  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1053  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1054  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1055
1056  val update_valid = io.toBpu.update.valid
1057  def u(cond: Bool) = update_valid && cond
1058  val ftb_false_hit = u(update.false_hit)
1059  // assert(!ftb_false_hit)
1060  val ftb_hit = u(commit_hit === h_hit)
1061
1062  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1063  val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid
1064  val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0)
1065  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid
1066
1067  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1068
1069  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1070  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1071  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1072  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1073  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1074
1075  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1076  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1077  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1078    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1079  ).foldLeft(Map[String, UInt]())(_+_)
1080  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1081    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1082  ).foldLeft(Map[String, UInt]())(_+_)
1083
1084  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1085    f"ftq_has_entry_$i" ->( validEntries === i.U)
1086  ).foldLeft(Map[String, UInt]())(_+_)
1087
1088  val perfCountsMap = Map(
1089    "BpInstr" -> PopCount(mbpInstrs),
1090    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1091    "BpRight"  -> PopCount(mbpRights),
1092    "BpWrong"  -> PopCount(mbpWrongs),
1093    "BpBRight" -> PopCount(mbpBRights),
1094    "BpBWrong" -> PopCount(mbpBWrongs),
1095    "BpJRight" -> PopCount(mbpJRights),
1096    "BpJWrong" -> PopCount(mbpJWrongs),
1097    "BpIRight" -> PopCount(mbpIRights),
1098    "BpIWrong" -> PopCount(mbpIWrongs),
1099    "BpCRight" -> PopCount(mbpCRights),
1100    "BpCWrong" -> PopCount(mbpCWrongs),
1101    "BpRRight" -> PopCount(mbpRRights),
1102    "BpRWrong" -> PopCount(mbpRWrongs),
1103
1104    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1105    "ftb_hit"                      -> PopCount(ftb_hit),
1106    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1107    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1108    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1109    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1110    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1111    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1112    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1113    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1114    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1115    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1116  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++
1117  s2_entry_len_map ++ s3_entry_len_map ++
1118  to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1119  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1120  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1121
1122  for((key, value) <- perfCountsMap) {
1123    XSPerfAccumulate(key, value)
1124  }
1125
1126  // --------------------------- Debug --------------------------------
1127  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1128  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1129  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1130  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1131  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1132    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1133  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1134
1135  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1136  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1137  //       case (((valid, pd), ans), taken) =>
1138  //       Mux(valid && pd.isBr,
1139  //         isWrong ^ Mux(ans.hit.asBool,
1140  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1141  //           !taken),
1142  //         !taken),
1143  //       false.B)
1144  //     }
1145  //   }
1146
1147  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1148  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1149  //       case (((valid, pd), ans), taken) =>
1150  //       Mux(valid && pd.isBr,
1151  //         isWrong ^ Mux(ans.hit.asBool,
1152  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1153  //           !taken),
1154  //         !taken),
1155  //       false.B)
1156  //     }
1157  //   }
1158
1159  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1160  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1161  //       case (((valid, pd), ans), taken) =>
1162  //       Mux(valid && pd.isBr,
1163  //         isWrong ^ (ans.taken.asBool === taken),
1164  //       false.B)
1165  //     }
1166  //   }
1167
1168  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1169  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1170  //       case (((valid, pd), ans), taken) =>
1171  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1172  //         isWrong ^ (!taken),
1173  //           false.B)
1174  //     }
1175  //   }
1176
1177  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1178  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1179  //       case (((valid, pd), ans), taken) =>
1180  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1181  //         isWrong ^ (ans.target === commitEntry.target),
1182  //           false.B)
1183  //     }
1184  //   }
1185
1186  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1187  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1188  //   // btb and ubtb pred jal and jalr as well
1189  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1190  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1191  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1192  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1193
1194  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1195  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1196
1197  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1198  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1199  val perfinfo = IO(new Bundle(){
1200    val perfEvents = Output(new PerfEventsBundle(22))
1201  })
1202  val perfEvents = Seq(
1203    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1204    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1205    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1206    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1207    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1208    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1209    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1210    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1211    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1212    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1213    ("BpRight                ", PopCount(mbpRights)                                                         ),
1214    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1215    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1216    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1217    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1218    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1219    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1220    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1221    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1222    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1223    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1224    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1225    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1226    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1227  )
1228
1229  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
1230    perf_out.incr_step := RegNext(perf)
1231  }
1232}
1233