xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision c49b0e7fe673d974e1547da3c04423fb071e8910)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import xiangshan._
24import xiangshan.backend.CtrlToFtqIO
25
26class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
27  p => p(XSCoreParamsKey).FtqSize
28){
29  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
30}
31
32object FtqPtr {
33  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
34    val ptr = Wire(new FtqPtr)
35    ptr.flag := f
36    ptr.value := v
37    ptr
38  }
39  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
40    apply(!ptr.flag, ptr.value)
41  }
42}
43
44class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
45
46  val io = IO(new Bundle() {
47    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
48    val ren = Input(Vec(numRead, Bool()))
49    val rdata = Output(Vec(numRead, gen))
50    val waddr = Input(UInt(log2Up(FtqSize).W))
51    val wen = Input(Bool())
52    val wdata = Input(gen)
53  })
54
55  for(i <- 0 until numRead){
56    val sram = Module(new SRAMTemplate(gen, FtqSize))
57    sram.io.r.req.valid := io.ren(i)
58    sram.io.r.req.bits.setIdx := io.raddr(i)
59    io.rdata(i) := sram.io.r.resp.data(0)
60    sram.io.w.req.valid := io.wen
61    sram.io.w.req.bits.setIdx := io.waddr
62    sram.io.w.req.bits.data := VecInit(io.wdata)
63  }
64
65}
66
67class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
68  // TODO: move pftAddr, oversize, carry to another mem
69  val startAddr = UInt(VAddrBits.W)
70  val nextRangeAddr = UInt(VAddrBits.W)
71  val pftAddr = UInt((log2Ceil(PredictWidth)+1).W)
72  val isNextMask = Vec(PredictWidth, Bool())
73  val oversize = Bool()
74  val carry = Bool()
75  def getPc(offset: UInt) = {
76    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
77    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
78    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextRangeAddr, startAddr)),
79        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
80  }
81  def getFallThrough() = {
82    def getHigher(pc: UInt) = pc.head(VAddrBits-log2Ceil(PredictWidth)-instOffsetBits-1)
83    val startHigher = getHigher(startAddr)
84    val nextHigher  = getHigher(nextRangeAddr)
85    val higher = Mux(carry, nextHigher, startHigher)
86    Cat(higher, pftAddr, 0.U(instOffsetBits.W))
87  }
88  def fallThroughError() = {
89    val startLower        = Cat(0.U(1.W), startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits))
90    val endLowerwithCarry = Cat(carry,    pftAddr)
91    require(startLower.getWidth == log2Ceil(PredictWidth)+2)
92    require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2)
93    startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U
94  }
95  def fromBranchPrediction(resp: BranchPredictionBundle) = {
96    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
97    this.startAddr := resp.pc
98    this.nextRangeAddr := resp.pc + (FetchWidth * 4 * 2).U
99    this.pftAddr :=
100      Mux(resp.preds.hit, resp.preds.fallThroughAddr(instOffsetBits+log2Ceil(PredictWidth),instOffsetBits),
101        resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U)
102    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
103      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
104    ))
105    this.oversize := Mux(resp.preds.hit, resp.preds.oversize, false.B)
106    this.carry :=
107      Mux(resp.preds.hit,
108        carryPos(resp.pc) ^ carryPos(resp.preds.fallThroughAddr),
109        resp.pc(instOffsetBits + log2Ceil(PredictWidth)).asBool
110      )
111    this
112  }
113  override def toPrintable: Printable = {
114    p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}"
115  }
116}
117
118class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
119  val brMask = Vec(PredictWidth, Bool())
120  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
121  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
122  val jalTarget = UInt(VAddrBits.W)
123  val rvcMask = Vec(PredictWidth, Bool())
124  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
125  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
126  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
127  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
128
129  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
130    val pds = pdWb.pd
131    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
132    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
133    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
134                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
135    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
136    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
137    this.jalTarget := pdWb.jalTarget
138  }
139
140  def toPd(offset: UInt) = {
141    require(offset.getWidth == log2Ceil(PredictWidth))
142    val pd = Wire(new PreDecodeInfo)
143    pd.valid := true.B
144    pd.isRVC := rvcMask(offset)
145    val isBr = brMask(offset)
146    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
147    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
148    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
149    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
150    pd
151  }
152}
153
154
155
156class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
157  val rasSp = UInt(log2Ceil(RasSize).W)
158  val rasEntry = new RASEntry
159  val specCnt = Vec(numBr, UInt(10.W))
160  // val ghist = new ShiftingGlobalHistory
161  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
162  val histPtr = new CGHPtr
163  val phist = UInt(PathHistoryLength.W)
164  val phNewBit = UInt(1.W)
165
166  def fromBranchPrediction(resp: BranchPredictionBundle) = {
167    this.rasSp := resp.rasSp
168    this.rasEntry := resp.rasTop
169    this.specCnt := resp.specCnt
170    // this.ghist := resp.ghist
171    this.folded_hist := resp.folded_hist
172    this.histPtr := resp.histPtr
173    this.phist := resp.phist
174    this.phNewBit := resp.pc(instOffsetBits)
175    this
176  }
177}
178
179class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
180  val meta = UInt(MaxMetaLength.W)
181}
182
183class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
184  val target = UInt(VAddrBits.W)
185  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
186}
187
188// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
189//   val startAddr = UInt(VAddrBits.W)
190//   val fallThruAddr = UInt(VAddrBits.W)
191//   val isNextMask = Vec(PredictWidth, Bool())
192
193//   val meta = UInt(MaxMetaLength.W)
194
195//   val rasSp = UInt(log2Ceil(RasSize).W)
196//   val rasEntry = new RASEntry
197//   val hist = new ShiftingGlobalHistory
198//   val specCnt = Vec(numBr, UInt(10.W))
199
200//   val valids = Vec(PredictWidth, Bool())
201//   val brMask = Vec(PredictWidth, Bool())
202//   // isJalr, isCall, isRet
203//   val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
204//   val jmpOffset = UInt(log2Ceil(PredictWidth).W)
205
206//   val mispredVec = Vec(PredictWidth, Bool())
207//   val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
208//   val target = UInt(VAddrBits.W)
209// }
210
211class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
212  val ptr = Output(new FtqPtr)
213  val offset = Output(UInt(log2Ceil(PredictWidth).W))
214  val data = Input(gen)
215  def apply(ptr: FtqPtr, offset: UInt) = {
216    this.ptr := ptr
217    this.offset := offset
218    this.data
219  }
220  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
221}
222
223
224class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
225  val redirect = Valid(new BranchPredictionRedirect)
226  val update = Valid(new BranchPredictionUpdate)
227  val enq_ptr = Output(new FtqPtr)
228}
229
230class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
231  val req = Decoupled(new FetchRequestBundle)
232  val redirect = Valid(new Redirect)
233  val flushFromBpu = new Bundle {
234    // when ifu pipeline is not stalled,
235    // a packet from bpu s3 can reach f1 at most
236    val s2 = Valid(new FtqPtr)
237    // val s3 = Valid(new FtqPtr)
238    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
239      src.valid && !isAfter(src.bits, idx_to_flush)
240    }
241    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
242    // def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
243  }
244}
245
246trait HasBackendRedirectInfo extends HasXSParameter {
247  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
248  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
249}
250
251class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
252  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
253  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
254  def getJumpPcRead = pc_reads.head
255  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
256  def getMemPredPcRead = pc_reads.init.last
257  def getRobFlushPcRead = pc_reads.last
258}
259
260
261class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
262  val io = IO(new Bundle {
263    val start_addr = Input(UInt(VAddrBits.W))
264    val old_entry = Input(new FTBEntry)
265    val pd = Input(new Ftq_pd_Entry)
266    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
267    val target = Input(UInt(VAddrBits.W))
268    val hit = Input(Bool())
269    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
270
271    val new_entry = Output(new FTBEntry)
272    val new_br_insert_pos = Output(Vec(numBr, Bool()))
273    val taken_mask = Output(Vec(numBr, Bool()))
274    val mispred_mask = Output(Vec(numBr+1, Bool()))
275
276    // for perf counters
277    val is_init_entry = Output(Bool())
278    val is_old_entry = Output(Bool())
279    val is_new_br = Output(Bool())
280    val is_jalr_target_modified = Output(Bool())
281    val is_always_taken_modified = Output(Bool())
282    val is_br_full = Output(Bool())
283  })
284
285  // no mispredictions detected at predecode
286  val hit = io.hit
287  val pd = io.pd
288
289  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
290
291
292  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
293  val entry_has_jmp = pd.jmpInfo.valid
294  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
295  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
296  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
297  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
298  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
299  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
300
301  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
302  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
303
304  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
305  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
306  // if not hit, establish a new entry
307  init_entry.valid := true.B
308  // tag is left for ftb to assign
309
310  // case br
311  val init_br_slot = init_entry.getSlotForBr(0)
312  when (cfi_is_br) {
313    init_br_slot.valid := true.B
314    init_br_slot.offset := io.cfiIndex.bits
315    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
316    init_entry.always_taken(0) := true.B // set to always taken on init
317  }
318  // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
319
320  // case jmp
321  when (entry_has_jmp) {
322    init_entry.tailSlot.offset := pd.jmpOffset
323    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
324    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
325  }
326
327  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
328  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
329  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
330  init_entry.isJalr := new_jmp_is_jalr
331  init_entry.isCall := new_jmp_is_call
332  init_entry.isRet  := new_jmp_is_ret
333  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
334
335  init_entry.oversize := last_br_rvi || last_jmp_rvi
336
337  // if hit, check whether a new cfi(only br is possible) is detected
338  val oe = io.old_entry
339  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
340  val br_recorded = br_recorded_vec.asUInt.orR
341  val is_new_br = cfi_is_br && !br_recorded
342  val new_br_offset = io.cfiIndex.bits
343  // vec(i) means new br will be inserted BEFORE old br(i)
344  val allBrSlotsVec = oe.allSlotsForBr
345  val new_br_insert_onehot = VecInit((0 until numBr).map{
346    i => i match {
347      case 0 =>
348        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
349      case idx =>
350        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
351        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
352    }
353  })
354
355  val old_entry_modified = WireInit(io.old_entry)
356  for (i <- 0 until numBr) {
357    val slot = old_entry_modified.allSlotsForBr(i)
358    when (new_br_insert_onehot(i)) {
359      slot.valid := true.B
360      slot.offset := new_br_offset
361      slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
362      old_entry_modified.always_taken(i) := true.B
363    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
364      old_entry_modified.always_taken(i) := false.B
365      // all other fields remain unchanged
366    }.otherwise {
367      // case i == 0, remain unchanged
368      if (i != 0) {
369        val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
370        when (!noNeedToMoveFromFormerSlot) {
371          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
372          old_entry_modified.always_taken(i) := oe.always_taken(i)
373        }
374      }
375    }
376  }
377
378  // two circumstances:
379  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
380  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
381  //        the previous last br or the new br
382  val may_have_to_replace = oe.noEmptySlotForNewBr
383  val pft_need_to_change = is_new_br && may_have_to_replace
384  // it should either be the given last br or the new br
385  when (pft_need_to_change) {
386    val new_pft_offset =
387      Mux(!new_br_insert_onehot.asUInt.orR,
388        new_br_offset, oe.allSlotsForBr.last.offset)
389
390    // set jmp to invalid
391    if (!shareTailSlot) {
392      old_entry_modified.tailSlot.valid := false.B
393    }
394    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
395    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
396    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
397    old_entry_modified.oversize := false.B
398    old_entry_modified.isCall := false.B
399    old_entry_modified.isRet := false.B
400    old_entry_modified.isJalr := false.B
401  }
402
403  val old_entry_jmp_target_modified = WireInit(oe)
404  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
405  val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B
406  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
407  when (jalr_target_modified) {
408    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
409    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
410  }
411
412  val old_entry_always_taken = WireInit(oe)
413  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
414  for (i <- 0 until numBr) {
415    old_entry_always_taken.always_taken(i) :=
416      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
417    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
418  }
419  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
420
421
422
423  val derived_from_old_entry =
424    Mux(is_new_br, old_entry_modified,
425      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
426
427
428  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
429
430  io.new_br_insert_pos := new_br_insert_onehot
431  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
432    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
433  })
434  for (i <- 0 until numBr) {
435    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
436  }
437  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
438
439  // for perf counters
440  io.is_init_entry := !hit
441  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
442  io.is_new_br := hit && is_new_br
443  io.is_jalr_target_modified := hit && jalr_target_modified
444  io.is_always_taken_modified := hit && always_taken_modified
445  io.is_br_full := hit && is_new_br && may_have_to_replace
446}
447
448class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
449  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents {
450  val io = IO(new Bundle {
451    val fromBpu = Flipped(new BpuToFtqIO)
452    val fromIfu = Flipped(new IfuToFtqIO)
453    val fromBackend = Flipped(new CtrlToFtqIO)
454
455    val toBpu = new FtqToBpuIO
456    val toIfu = new FtqToIfuIO
457    val toBackend = new FtqToCtrlIO
458
459    val bpuInfo = new Bundle {
460      val bpRight = Output(UInt(XLEN.W))
461      val bpWrong = Output(UInt(XLEN.W))
462    }
463  })
464  io.bpuInfo := DontCare
465
466  val stage2Redirect = io.fromBackend.stage2Redirect
467  val stage3Redirect = RegNext(io.fromBackend.stage2Redirect)
468
469  val stage2Flush = stage2Redirect.valid
470  val backendFlush = stage2Flush || RegNext(stage2Flush)
471  val ifuFlush = Wire(Bool())
472
473  val flush = stage2Flush || RegNext(stage2Flush)
474
475  val allowBpuIn, allowToIfu = WireInit(false.B)
476  val flushToIfu = !allowToIfu
477  allowBpuIn := !ifuFlush && !stage2Redirect.valid && !stage3Redirect.valid
478  allowToIfu := !ifuFlush && !stage2Redirect.valid && !stage3Redirect.valid
479
480  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
481  val validEntries = distanceBetween(bpuPtr, commPtr)
482
483  // **********************************************************************
484  // **************************** enq from bpu ****************************
485  // **********************************************************************
486  val new_entry_ready = validEntries < FtqSize.U
487  io.fromBpu.resp.ready := new_entry_ready
488
489  val bpu_s2_resp = io.fromBpu.resp.bits.s2
490  // val bpu_s3_resp = io.fromBpu.resp.bits.s3
491  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
492  // val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
493
494  io.toBpu.enq_ptr := bpuPtr
495  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
496  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect/*  || bpu_s3_redirect */) && allowBpuIn
497
498  val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp)
499  val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx)
500  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
501  val bpu_in_resp_idx = bpu_in_resp_ptr.value
502
503  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
504  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
505  // resp from uBTB
506  ftq_pc_mem.io.wen(0) := bpu_in_fire
507  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
508  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
509
510  //                                                            ifuRedirect + backendRedirect + commit
511  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
512  // these info is intended to enq at the last stage of bpu
513  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
514  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
515  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
516
517  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
518  // these info is intended to enq at the last stage of bpu
519  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
520  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
521  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
522  //                                                            ifuRedirect + backendRedirect + commit
523  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
524  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
525  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
526  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
527
528
529  // multi-write
530  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
531  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
532  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
533  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
534
535  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
536  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
537    VecInit(Seq.fill(PredictWidth)(c_invalid))
538  }))
539
540  val f_to_send :: f_sent :: Nil = Enum(2)
541  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
542
543  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
544  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
545
546
547  when (bpu_in_fire) {
548    entry_fetch_status(bpu_in_resp_idx) := f_to_send
549    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
550    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex
551    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
552    update_target(bpu_in_resp_idx) := bpu_in_resp.target
553    pred_stage(bpu_in_resp_idx) := bpu_in_stage
554  }
555
556  bpuPtr := bpuPtr + enq_fire
557  ifuPtr := ifuPtr + io.toIfu.req.fire
558
559  // only use ftb result to assign hit status
560  when (bpu_s2_resp.valid) {
561    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit)
562  }
563
564
565  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
566  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
567  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
568    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
569    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
570    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
571      ifuPtr := bpu_s2_resp.ftq_idx
572    }
573  }
574
575  // io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
576  // io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
577  // when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
578  //   bpuPtr := bpu_s3_resp.ftq_idx + 1.U
579  //   // only when ifuPtr runs ahead of bpu s2 resp should we recover it
580  //   when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
581  //     ifuPtr := bpu_s3_resp.ftq_idx
582  //   }
583  //   XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
584  // }
585
586  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
587
588  // ****************************************************************
589  // **************************** to ifu ****************************
590  // ****************************************************************
591  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
592  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
593  val last_cycle_bpu_in = RegNext(bpu_in_fire)
594  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
595
596  // read pc and target
597  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
598  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
599
600  io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
601  io.toIfu.req.bits.ftqIdx := ifuPtr
602  io.toIfu.req.bits.target := update_target(ifuPtr.value)
603  io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
604
605  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
606    io.toIfu.req.bits.fromFtqPcBundle(bpu_in_bypass_buf)
607  }.elsewhen (last_cycle_to_ifu_fire) {
608    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
609  }.otherwise {
610    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
611  }
612
613  // when fall through is smaller in value than start address, there must be a false hit
614  when (io.toIfu.req.bits.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
615    when (io.toIfu.req.fire &&
616      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr)/*  &&
617      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr) */
618    ) {
619      entry_hit_status(ifuPtr.value) := h_false_hit
620      XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
621    }
622    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
623  }
624
625  val ifu_req_should_be_flushed =
626    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx)/*  ||
627    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx) */
628
629  when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
630    entry_fetch_status(ifuPtr.value) := f_sent
631  }
632
633
634  // *********************************************************************
635  // **************************** wb from ifu ****************************
636  // *********************************************************************
637  val pdWb = io.fromIfu.pdWb
638  val pds = pdWb.bits.pd
639  val ifu_wb_valid = pdWb.valid
640  val ifu_wb_idx = pdWb.bits.ftqIdx.value
641  // read ports:                                                         commit update
642  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
643  ftq_pd_mem.io.wen(0) := ifu_wb_valid
644  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
645  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
646
647  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
648  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
649  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
650  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
651  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
652  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
653
654  when (ifu_wb_valid) {
655    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
656      case (v, inRange) => v && inRange
657    })
658    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
659      case (qe, v) => when (v) { qe := c_valid }
660    }
661  }
662
663  ifuWbPtr := ifuWbPtr + ifu_wb_valid
664
665  ftb_entry_mem.io.raddr.head := ifu_wb_idx
666  val has_false_hit = WireInit(false.B)
667  when (RegNext(hit_pd_valid)) {
668    // check for false hit
669    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
670    val brSlots = pred_ftb_entry.brSlots
671    val tailSlot = pred_ftb_entry.tailSlot
672    // we check cfis that bpu predicted
673
674    // bpu predicted branches but denied by predecode
675    val br_false_hit =
676      brSlots.map{
677        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
678      }.reduce(_||_) ||
679      (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
680        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
681
682    val jmpOffset = tailSlot.offset
683    val jmp_pd = pd_reg(jmpOffset)
684    val jal_false_hit = pred_ftb_entry.jmpValid &&
685      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
686       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
687       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
688       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
689      )
690
691    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
692    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
693
694    // assert(!has_false_hit)
695  }
696
697  when (has_false_hit) {
698    entry_hit_status(wb_idx_reg) := h_false_hit
699  }
700
701
702  // **********************************************************************
703  // **************************** backend read ****************************
704  // **********************************************************************
705
706  // pc reads
707  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
708    ftq_pc_mem.io.raddr(i) := req.ptr.value
709    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
710  }
711  // target read
712  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
713
714  // *******************************************************************************
715  // **************************** redirect from backend ****************************
716  // *******************************************************************************
717
718  // redirect read cfiInfo, couples to redirectGen s2
719  ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid
720  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
721
722  ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
723
724  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
725  val fromBackendRedirect = WireInit(stage3Redirect)
726  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
727  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
728
729  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
730  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
731
732  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
733    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
734      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
735      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
736
737    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
738        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
739  }.otherwise {
740    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
741    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
742  }
743
744
745  // ***************************************************************************
746  // **************************** redirect from ifu ****************************
747  // ***************************************************************************
748  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
749  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
750  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
751  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
752  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
753
754  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
755  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
756  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
757  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
758  ifuRedirectCfiUpdate.target := pdWb.bits.target
759  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
760  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
761
762  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
763  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
764  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
765
766  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
767  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
768
769  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
770
771  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
772  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
773  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
774    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
775  }
776
777  // *********************************************************************
778  // **************************** wb from exu ****************************
779  // *********************************************************************
780
781  def extractRedirectInfo(wb: Valid[Redirect]) = {
782    val ftqIdx = wb.bits.ftqIdx.value
783    val ftqOffset = wb.bits.ftqOffset
784    val taken = wb.bits.cfiUpdate.taken
785    val mispred = wb.bits.cfiUpdate.isMisPred
786    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
787  }
788
789  // fix mispredict entry
790  val lastIsMispredict = RegNext(
791    stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B
792  )
793
794  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
795    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
796    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
797    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
798    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
799      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
800    }
801    when (cfiIndex_bits_wen) {
802      cfiIndex_vec(r_idx).bits := r_offset
803    }
804    update_target(r_idx) := redirect.bits.cfiUpdate.target
805    if (isBackend) {
806      mispredict_vec(r_idx)(r_offset) := r_mispred
807    }
808  }
809
810  when(stage3Redirect.valid && lastIsMispredict) {
811    updateCfiInfo(stage3Redirect)
812  }.elsewhen (ifuRedirectToBpu.valid) {
813    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
814  }
815
816  // ***********************************************************************************
817  // **************************** flush ptr and state queue ****************************
818  // ***********************************************************************************
819
820  val redirectVec = VecInit(stage2Redirect, fromIfuRedirect)
821
822  // when redirect, we should reset ptrs and status queues
823  when(redirectVec.map(r => r.valid).reduce(_||_)){
824    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
825    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
826    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
827    val next = idx + 1.U
828    bpuPtr := next
829    ifuPtr := next
830    ifuWbPtr := next
831    when (notIfu) {
832      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
833        when(i.U > offset || i.U === offset && flushItSelf){
834          s := c_invalid
835        }
836      })
837    }
838  }
839
840  // only the valid bit is actually needed
841  io.toIfu.redirect.bits    := stage2Redirect.bits
842  io.toIfu.redirect.valid   := stage2Flush
843
844  // commit
845  for (c <- io.fromBackend.rob_commits) {
846    when(c.valid) {
847      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
848      // TODO: remove this
849      // For instruction fusions, we also update the next instruction
850      when (c.bits.commitType === 4.U) {
851        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
852      }.elsewhen(c.bits.commitType === 5.U) {
853        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
854      }.elsewhen(c.bits.commitType === 6.U) {
855        val index = (c.bits.ftqIdx + 1.U).value
856        commitStateQueue(index)(0) := c_commited
857      }.elsewhen(c.bits.commitType === 7.U) {
858        val index = (c.bits.ftqIdx + 1.U).value
859        commitStateQueue(index)(1) := c_commited
860      }
861    }
862  }
863
864  // ****************************************************************
865  // **************************** to bpu ****************************
866  // ****************************************************************
867
868  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
869
870  val may_have_stall_from_bpu = RegInit(false.B)
871  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
872    Cat(commitStateQueue(commPtr.value).map(s => {
873      s === c_invalid || s === c_commited
874    })).andR()
875
876  // commit reads
877  ftq_pc_mem.io.raddr.last := commPtr.value
878  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
879  ftq_pd_mem.io.raddr.last := commPtr.value
880  val commit_pd = ftq_pd_mem.io.rdata.last
881  ftq_redirect_sram.io.ren.last := canCommit
882  ftq_redirect_sram.io.raddr.last := commPtr.value
883  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
884  ftq_meta_1r_sram.io.ren(0) := canCommit
885  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
886  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
887  ftb_entry_mem.io.raddr.last := commPtr.value
888  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
889
890  // need one cycle to read mem and srams
891  val do_commit_ptr = RegNext(commPtr)
892  val do_commit = RegNext(canCommit, init=false.B)
893  when (canCommit) { commPtr := commPtr + 1.U }
894  val commit_state = RegNext(commitStateQueue(commPtr.value))
895  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
896  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
897    can_commit_cfi.valid := false.B
898  }
899  val commit_cfi = RegNext(can_commit_cfi)
900
901  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
902    case (mis, state) => mis && state === c_commited
903  })
904  val can_commit_hit = entry_hit_status(commPtr.value)
905  val commit_hit = RegNext(can_commit_hit)
906  val commit_target = RegNext(update_target(commPtr.value))
907  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
908
909  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
910  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
911
912  io.toBpu.update := DontCare
913  io.toBpu.update.valid := commit_valid && do_commit
914  val update = io.toBpu.update.bits
915  update.false_hit   := commit_hit === h_false_hit
916  update.pc          := commit_pc_bundle.startAddr
917  update.preds.hit   := commit_hit === h_hit || commit_hit === h_false_hit
918  update.meta        := commit_meta.meta
919  update.full_target := commit_target
920  update.fromFtqRedirectSram(commit_spec_meta)
921
922  val commit_real_hit = commit_hit === h_hit
923  val update_ftb_entry = update.ftb_entry
924
925  val ftbEntryGen = Module(new FTBEntryGen).io
926  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
927  ftbEntryGen.old_entry      := commit_ftb_entry
928  ftbEntryGen.pd             := commit_pd
929  ftbEntryGen.cfiIndex       := commit_cfi
930  ftbEntryGen.target         := commit_target
931  ftbEntryGen.hit            := commit_real_hit
932  ftbEntryGen.mispredict_vec := commit_mispredict
933
934  update_ftb_entry         := ftbEntryGen.new_entry
935  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
936  update.mispred_mask      := ftbEntryGen.mispred_mask
937  update.old_entry         := ftbEntryGen.is_old_entry
938  update.preds.br_taken_mask  := ftbEntryGen.taken_mask
939
940  // ******************************************************************************
941  // **************************** commit perf counters ****************************
942  // ******************************************************************************
943
944  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
945  val commit_mispred_mask = commit_mispredict.asUInt
946  val commit_not_mispred_mask = ~commit_mispred_mask
947
948  val commit_br_mask = commit_pd.brMask.asUInt
949  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
950  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
951
952  val mbpInstrs = commit_inst_mask & commit_cfi_mask
953
954  val mbpRights = mbpInstrs & commit_not_mispred_mask
955  val mbpWrongs = mbpInstrs & commit_mispred_mask
956
957  io.bpuInfo.bpRight := PopCount(mbpRights)
958  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
959
960  // Cfi Info
961  for (i <- 0 until PredictWidth) {
962    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
963    val v = commit_state(i) === c_commited
964    val isBr = commit_pd.brMask(i)
965    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
966    val isCfi = isBr || isJmp
967    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
968    val misPred = commit_mispredict(i)
969    // val ghist = commit_spec_meta.ghist.predHist
970    val histPtr = commit_spec_meta.histPtr
971    val predCycle = commit_meta.meta(63, 0)
972    val target = commit_target
973
974    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
975    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
976    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
977    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
978    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
979    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
980    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
981  }
982
983  val enq = io.fromBpu.resp
984  val perf_redirect = io.fromBackend.stage2Redirect
985
986  XSPerfAccumulate("entry", validEntries)
987  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
988  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
989  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
990  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
991
992  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
993
994  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
995  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
996  XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr)
997
998  val from_bpu = io.fromBpu.resp.bits
999  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
1000    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
1001    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1002    val entry_len_map = (1 to PredictWidth+1).map(i =>
1003      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
1004    ).foldLeft(Map[String, UInt]())(_+_)
1005    entry_len_map
1006  }
1007  val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1")
1008  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1009  // val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1010
1011  val to_ifu = io.toIfu.req.bits
1012  val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits
1013  val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U)
1014  val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i =>
1015    f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire)
1016  ).foldLeft(Map[String, UInt]())(_+_)
1017
1018
1019
1020  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1021  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1022    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1023  ).foldLeft(Map[String, UInt]())(_+_)
1024
1025
1026
1027  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1028  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1029  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1030  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1031
1032
1033  val mbpBRights = mbpRights & commit_br_mask
1034  val mbpJRights = mbpRights & commit_jal_mask
1035  val mbpIRights = mbpRights & commit_jalr_mask
1036  val mbpCRights = mbpRights & commit_call_mask
1037  val mbpRRights = mbpRights & commit_ret_mask
1038
1039  val mbpBWrongs = mbpWrongs & commit_br_mask
1040  val mbpJWrongs = mbpWrongs & commit_jal_mask
1041  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1042  val mbpCWrongs = mbpWrongs & commit_call_mask
1043  val mbpRWrongs = mbpWrongs & commit_ret_mask
1044
1045  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1046
1047  def pred_stage_map(src: UInt, name: String) = {
1048    (0 until numBpStages).map(i =>
1049      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1050    ).foldLeft(Map[String, UInt]())(_+_)
1051  }
1052
1053  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1054  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1055  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1056  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1057  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1058  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1059
1060  val update_valid = io.toBpu.update.valid
1061  def u(cond: Bool) = update_valid && cond
1062  val ftb_false_hit = u(update.false_hit)
1063  // assert(!ftb_false_hit)
1064  val ftb_hit = u(commit_hit === h_hit)
1065
1066  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1067  val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid
1068  val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0)
1069  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid
1070
1071  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1072
1073  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1074  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1075  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1076  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1077  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1078
1079  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1080  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1081  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1082    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1083  ).foldLeft(Map[String, UInt]())(_+_)
1084  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1085    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1086  ).foldLeft(Map[String, UInt]())(_+_)
1087
1088  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1089    f"ftq_has_entry_$i" ->( validEntries === i.U)
1090  ).foldLeft(Map[String, UInt]())(_+_)
1091
1092  val perfCountsMap = Map(
1093    "BpInstr" -> PopCount(mbpInstrs),
1094    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1095    "BpRight"  -> PopCount(mbpRights),
1096    "BpWrong"  -> PopCount(mbpWrongs),
1097    "BpBRight" -> PopCount(mbpBRights),
1098    "BpBWrong" -> PopCount(mbpBWrongs),
1099    "BpJRight" -> PopCount(mbpJRights),
1100    "BpJWrong" -> PopCount(mbpJWrongs),
1101    "BpIRight" -> PopCount(mbpIRights),
1102    "BpIWrong" -> PopCount(mbpIWrongs),
1103    "BpCRight" -> PopCount(mbpCRights),
1104    "BpCWrong" -> PopCount(mbpCWrongs),
1105    "BpRRight" -> PopCount(mbpRRights),
1106    "BpRWrong" -> PopCount(mbpRWrongs),
1107
1108    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1109    "ftb_hit"                      -> PopCount(ftb_hit),
1110    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1111    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1112    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1113    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1114    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1115    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1116    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1117    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1118    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1119    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1120  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++
1121  s2_entry_len_map ++ /* s3_entry_len_map ++ */
1122  to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1123  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1124  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1125
1126  for((key, value) <- perfCountsMap) {
1127    XSPerfAccumulate(key, value)
1128  }
1129
1130  // --------------------------- Debug --------------------------------
1131  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1132  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1133  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1134  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1135  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1136    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1137  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1138
1139  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1140  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1141  //       case (((valid, pd), ans), taken) =>
1142  //       Mux(valid && pd.isBr,
1143  //         isWrong ^ Mux(ans.hit.asBool,
1144  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1145  //           !taken),
1146  //         !taken),
1147  //       false.B)
1148  //     }
1149  //   }
1150
1151  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1152  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1153  //       case (((valid, pd), ans), taken) =>
1154  //       Mux(valid && pd.isBr,
1155  //         isWrong ^ Mux(ans.hit.asBool,
1156  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1157  //           !taken),
1158  //         !taken),
1159  //       false.B)
1160  //     }
1161  //   }
1162
1163  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1164  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1165  //       case (((valid, pd), ans), taken) =>
1166  //       Mux(valid && pd.isBr,
1167  //         isWrong ^ (ans.taken.asBool === taken),
1168  //       false.B)
1169  //     }
1170  //   }
1171
1172  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1173  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1174  //       case (((valid, pd), ans), taken) =>
1175  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1176  //         isWrong ^ (!taken),
1177  //           false.B)
1178  //     }
1179  //   }
1180
1181  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1182  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1183  //       case (((valid, pd), ans), taken) =>
1184  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1185  //         isWrong ^ (ans.target === commitEntry.target),
1186  //           false.B)
1187  //     }
1188  //   }
1189
1190  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1191  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1192  //   // btb and ubtb pred jal and jalr as well
1193  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1194  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1195  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1196  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1197
1198  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1199  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1200
1201  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1202  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1203
1204  val perfEvents = Seq(
1205    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1206    // ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1207    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1208    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1209    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1210    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1211    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1212    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1213    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1214    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1215    ("BpRight                ", PopCount(mbpRights)                                                         ),
1216    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1217    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1218    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1219    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1220    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1221    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1222    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1223    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1224    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1225    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1226    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1227    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1228    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1229  )
1230  generatePerfEvent()
1231}
1232