xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 0cb78322508dace56454d5d983b1c19b10997363)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError}
23import xiangshan._
24import scala.tools.nsc.doc.model.Val
25import utils.{ParallelPriorityMux, ParallelPriorityEncoder}
26import xiangshan.backend.{CtrlToFtqIO}
27import firrtl.annotations.MemoryLoadFileType
28
29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
30  p => p(XSCoreParamsKey).FtqSize
31){
32  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
33}
34
35object FtqPtr {
36  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
37    val ptr = Wire(new FtqPtr)
38    ptr.flag := f
39    ptr.value := v
40    ptr
41  }
42  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
43    apply(!ptr.flag, ptr.value)
44  }
45}
46
47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
48
49  val io = IO(new Bundle() {
50    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
51    val ren = Input(Vec(numRead, Bool()))
52    val rdata = Output(Vec(numRead, gen))
53    val waddr = Input(UInt(log2Up(FtqSize).W))
54    val wen = Input(Bool())
55    val wdata = Input(gen)
56  })
57
58  for(i <- 0 until numRead){
59    val sram = Module(new SRAMTemplate(gen, FtqSize))
60    sram.io.r.req.valid := io.ren(i)
61    sram.io.r.req.bits.setIdx := io.raddr(i)
62    io.rdata(i) := sram.io.r.resp.data(0)
63    sram.io.w.req.valid := io.wen
64    sram.io.w.req.bits.setIdx := io.waddr
65    sram.io.w.req.bits.data := VecInit(io.wdata)
66  }
67
68}
69
70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
71  // TODO: move pftAddr, oversize, carry to another mem
72  val startAddr = UInt(VAddrBits.W)
73  val nextRangeAddr = UInt(VAddrBits.W)
74  val pftAddr = UInt((log2Ceil(PredictWidth)+1).W)
75  val isNextMask = Vec(PredictWidth, Bool())
76  val oversize = Bool()
77  val carry = Bool()
78  def getPc(offset: UInt) = {
79    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
80    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
81    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextRangeAddr, startAddr)),
82        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
83  }
84  def getFallThrough() = {
85    def getHigher(pc: UInt) = pc.head(VAddrBits-log2Ceil(PredictWidth)-instOffsetBits-1)
86    val startHigher = getHigher(startAddr)
87    val nextHigher  = getHigher(nextRangeAddr)
88    val higher = Mux(carry, nextHigher, startHigher)
89    Cat(higher, pftAddr, 0.U(instOffsetBits.W))
90  }
91  def fallThroughError() = {
92    val startLower        = Cat(0.U(1.W), startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits))
93    val endLowerwithCarry = Cat(carry,    pftAddr)
94    require(startLower.getWidth == log2Ceil(PredictWidth)+2)
95    require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2)
96    startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U
97  }
98  def fromBranchPrediction(resp: BranchPredictionBundle) = {
99    this.startAddr := resp.pc
100    this.nextRangeAddr := resp.pc + (FetchWidth * 4 * 2).U
101    this.pftAddr := resp.ftb_entry.pftAddr
102    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
103      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
104    ))
105    this.oversize := resp.ftb_entry.oversize
106    this.carry := resp.ftb_entry.carry
107    this
108  }
109  override def toPrintable: Printable = {
110    p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}"
111  }
112}
113
114class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
115  val brMask = Vec(PredictWidth, Bool())
116  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
117  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
118  val jalTarget = UInt(VAddrBits.W)
119  val rvcMask = Vec(PredictWidth, Bool())
120  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
121  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
122  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
123  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
124
125  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
126    val pds = pdWb.pd
127    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
128    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
129    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
130                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
131    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
132    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
133    this.jalTarget := pdWb.jalTarget
134  }
135
136  def toPd(offset: UInt) = {
137    require(offset.getWidth == log2Ceil(PredictWidth))
138    val pd = Wire(new PreDecodeInfo)
139    pd.valid := true.B
140    pd.isRVC := rvcMask(offset)
141    val isBr = brMask(offset)
142    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
143    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
144    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
145    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
146    pd
147  }
148}
149
150
151
152class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
153  val rasSp = UInt(log2Ceil(RasSize).W)
154  val rasEntry = new RASEntry
155  val specCnt = Vec(numBr, UInt(10.W))
156  // val ghist = new ShiftingGlobalHistory
157  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
158  val histPtr = new CGHPtr
159  val phist = UInt(PathHistoryLength.W)
160  val phNewBit = UInt(1.W)
161
162  def fromBranchPrediction(resp: BranchPredictionBundle) = {
163    this.rasSp := resp.rasSp
164    this.rasEntry := resp.rasTop
165    this.specCnt := resp.specCnt
166    // this.ghist := resp.ghist
167    this.folded_hist := resp.folded_hist
168    this.histPtr := resp.histPtr
169    this.phist := resp.phist
170    this.phNewBit := resp.pc(instOffsetBits)
171    this
172  }
173}
174
175class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
176  val meta = UInt(MaxMetaLength.W)
177}
178
179class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
180  val target = UInt(VAddrBits.W)
181  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
182}
183
184// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
185//   val startAddr = UInt(VAddrBits.W)
186//   val fallThruAddr = UInt(VAddrBits.W)
187//   val isNextMask = Vec(PredictWidth, Bool())
188
189//   val meta = UInt(MaxMetaLength.W)
190
191//   val rasSp = UInt(log2Ceil(RasSize).W)
192//   val rasEntry = new RASEntry
193//   val hist = new ShiftingGlobalHistory
194//   val specCnt = Vec(numBr, UInt(10.W))
195
196//   val valids = Vec(PredictWidth, Bool())
197//   val brMask = Vec(PredictWidth, Bool())
198//   // isJalr, isCall, isRet
199//   val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
200//   val jmpOffset = UInt(log2Ceil(PredictWidth).W)
201
202//   val mispredVec = Vec(PredictWidth, Bool())
203//   val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
204//   val target = UInt(VAddrBits.W)
205// }
206
207class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
208  val ptr = Output(new FtqPtr)
209  val offset = Output(UInt(log2Ceil(PredictWidth).W))
210  val data = Input(gen)
211  def apply(ptr: FtqPtr, offset: UInt) = {
212    this.ptr := ptr
213    this.offset := offset
214    this.data
215  }
216  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
217}
218
219
220class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
221  val redirect = Valid(new BranchPredictionRedirect)
222  val update = Valid(new BranchPredictionUpdate)
223  val enq_ptr = Output(new FtqPtr)
224}
225
226class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
227  val req = Decoupled(new FetchRequestBundle)
228  val redirect = Valid(new Redirect)
229  val flushFromBpu = new Bundle {
230    // when ifu pipeline is not stalled,
231    // a packet from bpu s3 can reach f1 at most
232    val s2 = Valid(new FtqPtr)
233    val s3 = Valid(new FtqPtr)
234    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
235      src.valid && !isAfter(src.bits, idx_to_flush)
236    }
237    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
238    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
239  }
240}
241
242trait HasBackendRedirectInfo extends HasXSParameter {
243  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
244  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
245}
246
247class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
248  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
249  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
250  def getJumpPcRead = pc_reads.head
251  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
252  def getMemPredPcRead = pc_reads.init.last
253  def getRobFlushPcRead = pc_reads.last
254}
255
256
257class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
258  val io = IO(new Bundle {
259    val start_addr = Input(UInt(VAddrBits.W))
260    val old_entry = Input(new FTBEntry)
261    val pd = Input(new Ftq_pd_Entry)
262    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
263    val target = Input(UInt(VAddrBits.W))
264    val hit = Input(Bool())
265    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
266
267    val new_entry = Output(new FTBEntry)
268    val new_br_insert_pos = Output(Vec(numBr, Bool()))
269    val taken_mask = Output(Vec(numBr, Bool()))
270    val mispred_mask = Output(Vec(numBr+1, Bool()))
271
272    // for perf counters
273    val is_init_entry = Output(Bool())
274    val is_old_entry = Output(Bool())
275    val is_new_br = Output(Bool())
276    val is_jalr_target_modified = Output(Bool())
277    val is_always_taken_modified = Output(Bool())
278    val is_br_full = Output(Bool())
279  })
280
281  // no mispredictions detected at predecode
282  val hit = io.hit
283  val pd = io.pd
284
285  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
286
287
288  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
289  val entry_has_jmp = pd.jmpInfo.valid
290  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
291  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
292  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
293  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
294  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
295  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
296
297  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
298  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
299
300  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
301  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
302  // if not hit, establish a new entry
303  init_entry.valid := true.B
304  // tag is left for ftb to assign
305
306  // case br
307  val init_br_slot = init_entry.getSlotForBr(0)
308  when (cfi_is_br) {
309    init_br_slot.valid := true.B
310    init_br_slot.offset := io.cfiIndex.bits
311    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
312    init_entry.always_taken(0) := true.B // set to always taken on init
313  }
314  // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
315
316  // case jmp
317  when (entry_has_jmp) {
318    init_entry.tailSlot.offset := pd.jmpOffset
319    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
320    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
321  }
322
323  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
324  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
325  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
326  init_entry.isJalr := new_jmp_is_jalr
327  init_entry.isCall := new_jmp_is_call
328  init_entry.isRet  := new_jmp_is_ret
329  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
330
331  init_entry.oversize := last_br_rvi || last_jmp_rvi
332
333  // if hit, check whether a new cfi(only br is possible) is detected
334  val oe = io.old_entry
335  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
336  val br_recorded = br_recorded_vec.asUInt.orR
337  val is_new_br = cfi_is_br && !br_recorded
338  val new_br_offset = io.cfiIndex.bits
339  // vec(i) means new br will be inserted BEFORE old br(i)
340  val allBrSlotsVec = oe.allSlotsForBr
341  val new_br_insert_onehot = VecInit((0 until numBr).map{
342    i => i match {
343      case 0 =>
344        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
345      case idx =>
346        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
347        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
348    }
349  })
350
351  val old_entry_modified = WireInit(io.old_entry)
352  for (i <- 0 until numBr) {
353    val slot = old_entry_modified.allSlotsForBr(i)
354    when (new_br_insert_onehot(i)) {
355      slot.valid := true.B
356      slot.offset := new_br_offset
357      slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
358      old_entry_modified.always_taken(i) := true.B
359    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
360      old_entry_modified.always_taken(i) := false.B
361      // all other fields remain unchanged
362    }.otherwise {
363      // case i == 0, remain unchanged
364      if (i != 0) {
365        val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
366        when (!noNeedToMoveFromFormerSlot) {
367          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
368          old_entry_modified.always_taken(i) := oe.always_taken(i)
369        }
370      }
371    }
372  }
373
374  // two circumstances:
375  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
376  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
377  //        the previous last br or the new br
378  val may_have_to_replace = oe.noEmptySlotForNewBr
379  val pft_need_to_change = is_new_br && may_have_to_replace
380  // it should either be the given last br or the new br
381  when (pft_need_to_change) {
382    val new_pft_offset =
383      Mux(!new_br_insert_onehot.asUInt.orR,
384        new_br_offset, oe.allSlotsForBr.last.offset)
385
386    // set jmp to invalid
387    if (!shareTailSlot) {
388      old_entry_modified.tailSlot.valid := false.B
389    }
390    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
391    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
392    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
393    old_entry_modified.oversize := false.B
394    old_entry_modified.isCall := false.B
395    old_entry_modified.isRet := false.B
396    old_entry_modified.isJalr := false.B
397  }
398
399  val old_entry_jmp_target_modified = WireInit(oe)
400  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
401  val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B
402  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
403  when (jalr_target_modified) {
404    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
405    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
406  }
407
408  val old_entry_always_taken = WireInit(oe)
409  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
410  for (i <- 0 until numBr) {
411    old_entry_always_taken.always_taken(i) :=
412      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
413    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
414  }
415  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
416
417
418
419  val derived_from_old_entry =
420    Mux(is_new_br, old_entry_modified,
421      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
422
423
424  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
425
426  io.new_br_insert_pos := new_br_insert_onehot
427  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
428    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
429  })
430  for (i <- 0 until numBr) {
431    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
432  }
433  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
434
435  // for perf counters
436  io.is_init_entry := !hit
437  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
438  io.is_new_br := hit && is_new_br
439  io.is_jalr_target_modified := hit && jalr_target_modified
440  io.is_always_taken_modified := hit && always_taken_modified
441  io.is_br_full := hit && is_new_br && may_have_to_replace
442}
443
444class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
445  with HasBackendRedirectInfo with BPUUtils with HasBPUConst {
446  val io = IO(new Bundle {
447    val fromBpu = Flipped(new BpuToFtqIO)
448    val fromIfu = Flipped(new IfuToFtqIO)
449    val fromBackend = Flipped(new CtrlToFtqIO)
450
451    val toBpu = new FtqToBpuIO
452    val toIfu = new FtqToIfuIO
453    val toBackend = new FtqToCtrlIO
454
455    val bpuInfo = new Bundle {
456      val bpRight = Output(UInt(XLEN.W))
457      val bpWrong = Output(UInt(XLEN.W))
458    }
459  })
460  io.bpuInfo := DontCare
461
462  val robFlush = io.fromBackend.robFlush
463  val stage2Redirect = io.fromBackend.stage2Redirect
464  val stage3Redirect = io.fromBackend.stage3Redirect
465
466  val stage2Flush = stage2Redirect.valid || robFlush.valid
467  val backendFlush = stage2Flush || RegNext(stage2Flush)
468  val ifuFlush = Wire(Bool())
469
470  val flush = stage2Flush || RegNext(stage2Flush)
471
472  val allowBpuIn, allowToIfu = WireInit(false.B)
473  val flushToIfu = !allowToIfu
474  allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
475  allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
476
477  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
478  val validEntries = distanceBetween(bpuPtr, commPtr)
479
480  // **********************************************************************
481  // **************************** enq from bpu ****************************
482  // **********************************************************************
483  val new_entry_ready = validEntries < FtqSize.U
484  io.fromBpu.resp.ready := new_entry_ready
485
486  val bpu_s2_resp = io.fromBpu.resp.bits.s2
487  val bpu_s3_resp = io.fromBpu.resp.bits.s3
488  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
489  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
490
491  io.toBpu.enq_ptr := bpuPtr
492  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
493  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
494
495  val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp)
496  val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx)
497  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
498  val bpu_in_resp_idx = bpu_in_resp_ptr.value
499
500  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
501  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
502  // resp from uBTB
503  ftq_pc_mem.io.wen(0) := bpu_in_fire
504  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
505  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
506
507  //                                                            ifuRedirect + backendRedirect + commit
508  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
509  // these info is intended to enq at the last stage of bpu
510  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
511  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
512  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
513
514  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
515  // these info is intended to enq at the last stage of bpu
516  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
517  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
518  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
519  //                                                            ifuRedirect + backendRedirect + commit
520  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
521  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
522  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
523  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
524
525
526  // multi-write
527  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
528  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
529  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
530  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
531
532  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
533  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
534    VecInit(Seq.fill(PredictWidth)(c_invalid))
535  }))
536
537  val f_to_send :: f_sent :: Nil = Enum(2)
538  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
539
540  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
541  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
542
543
544  when (bpu_in_fire) {
545    entry_fetch_status(bpu_in_resp_idx) := f_to_send
546    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
547    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex
548    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
549    update_target(bpu_in_resp_idx) := bpu_in_resp.target
550    pred_stage(bpu_in_resp_idx) := bpu_in_stage
551  }
552
553  bpuPtr := bpuPtr + enq_fire
554  ifuPtr := ifuPtr + io.toIfu.req.fire
555
556  // only use ftb result to assign hit status
557  when (bpu_s2_resp.valid) {
558    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit)
559  }
560
561
562  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
563  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
564  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
565    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
566    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
567    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
568      ifuPtr := bpu_s2_resp.ftq_idx
569    }
570  }
571
572  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
573  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
574  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
575    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
576    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
577    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
578      ifuPtr := bpu_s3_resp.ftq_idx
579    }
580    XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
581  }
582
583  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
584
585  // ****************************************************************
586  // **************************** to ifu ****************************
587  // ****************************************************************
588  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
589  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
590  val last_cycle_bpu_in = RegNext(bpu_in_fire)
591  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
592
593  // read pc and target
594  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
595  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
596
597  io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
598  io.toIfu.req.bits.ftqIdx := ifuPtr
599  io.toIfu.req.bits.target := update_target(ifuPtr.value)
600  io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
601
602  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
603    io.toIfu.req.bits.fromFtqPcBundle(bpu_in_bypass_buf)
604  }.elsewhen (last_cycle_to_ifu_fire) {
605    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
606  }.otherwise {
607    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
608  }
609
610  // when fall through is smaller in value than start address, there must be a false hit
611  when (io.toIfu.req.bits.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
612    when (io.toIfu.req.fire &&
613      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
614      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
615    ) {
616      entry_hit_status(ifuPtr.value) := h_false_hit
617      XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
618    }
619    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
620  }
621
622  val ifu_req_should_be_flushed =
623    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
624    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
625
626  when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
627    entry_fetch_status(ifuPtr.value) := f_sent
628  }
629
630
631  // *********************************************************************
632  // **************************** wb from ifu ****************************
633  // *********************************************************************
634  val pdWb = io.fromIfu.pdWb
635  val pds = pdWb.bits.pd
636  val ifu_wb_valid = pdWb.valid
637  val ifu_wb_idx = pdWb.bits.ftqIdx.value
638  // read ports:                                                         commit update
639  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
640  ftq_pd_mem.io.wen(0) := ifu_wb_valid
641  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
642  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
643
644  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
645  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
646  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
647  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
648  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
649  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
650
651  when (ifu_wb_valid) {
652    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
653      case (v, inRange) => v && inRange
654    })
655    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
656      case (qe, v) => when (v) { qe := c_valid }
657    }
658  }
659
660  ifuWbPtr := ifuWbPtr + ifu_wb_valid
661
662  ftb_entry_mem.io.raddr.head := ifu_wb_idx
663  val has_false_hit = WireInit(false.B)
664  when (RegNext(hit_pd_valid)) {
665    // check for false hit
666    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
667    val brSlots = pred_ftb_entry.brSlots
668    val tailSlot = pred_ftb_entry.tailSlot
669    // we check cfis that bpu predicted
670
671    // bpu predicted branches but denied by predecode
672    val br_false_hit =
673      brSlots.map{
674        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
675      }.reduce(_||_) ||
676      (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
677        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
678
679    val jmpOffset = tailSlot.offset
680    val jmp_pd = pd_reg(jmpOffset)
681    val jal_false_hit = pred_ftb_entry.jmpValid &&
682      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
683       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
684       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
685       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
686      )
687
688    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
689    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
690
691    // assert(!has_false_hit)
692  }
693
694  when (has_false_hit) {
695    entry_hit_status(wb_idx_reg) := h_false_hit
696  }
697
698
699  // **********************************************************************
700  // **************************** backend read ****************************
701  // **********************************************************************
702
703  // pc reads
704  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
705    ftq_pc_mem.io.raddr(i) := req.ptr.value
706    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
707  }
708  // target read
709  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
710
711  // *******************************************************************************
712  // **************************** redirect from backend ****************************
713  // *******************************************************************************
714
715  // redirect read cfiInfo, couples to redirectGen s2
716  ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid
717  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
718
719  ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
720
721  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
722  val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect)
723  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
724  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
725
726  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
727  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
728
729  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
730    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
731      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
732      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
733
734    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
735        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
736  }.otherwise {
737    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
738    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
739  }
740
741
742  // ***************************************************************************
743  // **************************** redirect from ifu ****************************
744  // ***************************************************************************
745  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
746  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
747  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
748  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
749  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
750
751  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
752  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
753  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
754  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
755  ifuRedirectCfiUpdate.target := pdWb.bits.target
756  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
757  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
758
759  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
760  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
761  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
762
763  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
764  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
765
766  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
767
768  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
769  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
770  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
771    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
772  }
773
774  // *********************************************************************
775  // **************************** wb from exu ****************************
776  // *********************************************************************
777
778  def extractRedirectInfo(wb: Valid[Redirect]) = {
779    val ftqIdx = wb.bits.ftqIdx.value
780    val ftqOffset = wb.bits.ftqOffset
781    val taken = wb.bits.cfiUpdate.taken
782    val mispred = wb.bits.cfiUpdate.isMisPred
783    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
784  }
785
786  // fix mispredict entry
787  val lastIsMispredict = RegNext(
788    stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B
789  )
790
791  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
792    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
793    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
794    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
795    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
796      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
797    }
798    when (cfiIndex_bits_wen) {
799      cfiIndex_vec(r_idx).bits := r_offset
800    }
801    update_target(r_idx) := redirect.bits.cfiUpdate.target
802    if (isBackend) {
803      mispredict_vec(r_idx)(r_offset) := r_mispred
804    }
805  }
806
807  when(stage3Redirect.valid && lastIsMispredict) {
808    updateCfiInfo(stage3Redirect)
809  }.elsewhen (ifuRedirectToBpu.valid) {
810    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
811  }
812
813  // ***********************************************************************************
814  // **************************** flush ptr and state queue ****************************
815  // ***********************************************************************************
816
817  val redirectVec = VecInit(robFlush, stage2Redirect, fromIfuRedirect)
818
819  // when redirect, we should reset ptrs and status queues
820  when(redirectVec.map(r => r.valid).reduce(_||_)){
821    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
822    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
823    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
824    val next = idx + 1.U
825    bpuPtr := next
826    ifuPtr := next
827    ifuWbPtr := next
828    when (notIfu) {
829      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
830        when(i.U > offset || i.U === offset && flushItSelf){
831          s := c_invalid
832        }
833      })
834    }
835  }
836
837  // only the valid bit is actually needed
838  io.toIfu.redirect.bits    := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits)
839  io.toIfu.redirect.valid   := stage2Flush
840
841  // commit
842  for (c <- io.fromBackend.rob_commits) {
843    when(c.valid) {
844      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
845      // TODO: remove this
846      // For instruction fusions, we also update the next instruction
847      when (c.bits.commitType === 4.U) {
848        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
849      }.elsewhen(c.bits.commitType === 5.U) {
850        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
851      }.elsewhen(c.bits.commitType === 6.U) {
852        val index = (c.bits.ftqIdx + 1.U).value
853        commitStateQueue(index)(0) := c_commited
854      }.elsewhen(c.bits.commitType === 7.U) {
855        val index = (c.bits.ftqIdx + 1.U).value
856        commitStateQueue(index)(1) := c_commited
857      }
858    }
859  }
860
861  // ****************************************************************
862  // **************************** to bpu ****************************
863  // ****************************************************************
864
865  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
866
867  val may_have_stall_from_bpu = RegInit(false.B)
868  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
869    Cat(commitStateQueue(commPtr.value).map(s => {
870      s === c_invalid || s === c_commited
871    })).andR()
872
873  // commit reads
874  ftq_pc_mem.io.raddr.last := commPtr.value
875  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
876  ftq_pd_mem.io.raddr.last := commPtr.value
877  val commit_pd = ftq_pd_mem.io.rdata.last
878  ftq_redirect_sram.io.ren.last := canCommit
879  ftq_redirect_sram.io.raddr.last := commPtr.value
880  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
881  ftq_meta_1r_sram.io.ren(0) := canCommit
882  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
883  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
884  ftb_entry_mem.io.raddr.last := commPtr.value
885  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
886
887  // need one cycle to read mem and srams
888  val do_commit_ptr = RegNext(commPtr)
889  val do_commit = RegNext(canCommit, init=false.B)
890  when (canCommit) { commPtr := commPtr + 1.U }
891  val commit_state = RegNext(commitStateQueue(commPtr.value))
892  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
893  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
894    can_commit_cfi.valid := false.B
895  }
896  val commit_cfi = RegNext(can_commit_cfi)
897
898  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
899    case (mis, state) => mis && state === c_commited
900  })
901  val can_commit_hit = entry_hit_status(commPtr.value)
902  val commit_hit = RegNext(can_commit_hit)
903  val commit_target = RegNext(update_target(commPtr.value))
904  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
905
906  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
907  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
908
909  io.toBpu.update := DontCare
910  io.toBpu.update.valid := commit_valid && do_commit
911  val update = io.toBpu.update.bits
912  update.false_hit   := commit_hit === h_false_hit
913  update.pc          := commit_pc_bundle.startAddr
914  update.preds.hit   := commit_hit === h_hit || commit_hit === h_false_hit
915  update.meta        := commit_meta.meta
916  update.full_target := commit_target
917  update.fromFtqRedirectSram(commit_spec_meta)
918
919  val commit_real_hit = commit_hit === h_hit
920  val update_ftb_entry = update.ftb_entry
921
922  val ftbEntryGen = Module(new FTBEntryGen).io
923  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
924  ftbEntryGen.old_entry      := commit_ftb_entry
925  ftbEntryGen.pd             := commit_pd
926  ftbEntryGen.cfiIndex       := commit_cfi
927  ftbEntryGen.target         := commit_target
928  ftbEntryGen.hit            := commit_real_hit
929  ftbEntryGen.mispredict_vec := commit_mispredict
930
931  update_ftb_entry         := ftbEntryGen.new_entry
932  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
933  update.mispred_mask      := ftbEntryGen.mispred_mask
934  update.old_entry         := ftbEntryGen.is_old_entry
935  update.preds.br_taken_mask  := ftbEntryGen.taken_mask
936
937  // ******************************************************************************
938  // **************************** commit perf counters ****************************
939  // ******************************************************************************
940
941  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
942  val commit_mispred_mask = commit_mispredict.asUInt
943  val commit_not_mispred_mask = ~commit_mispred_mask
944
945  val commit_br_mask = commit_pd.brMask.asUInt
946  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
947  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
948
949  val mbpInstrs = commit_inst_mask & commit_cfi_mask
950
951  val mbpRights = mbpInstrs & commit_not_mispred_mask
952  val mbpWrongs = mbpInstrs & commit_mispred_mask
953
954  io.bpuInfo.bpRight := PopCount(mbpRights)
955  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
956
957  // Cfi Info
958  for (i <- 0 until PredictWidth) {
959    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
960    val v = commit_state(i) === c_commited
961    val isBr = commit_pd.brMask(i)
962    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
963    val isCfi = isBr || isJmp
964    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
965    val misPred = commit_mispredict(i)
966    // val ghist = commit_spec_meta.ghist.predHist
967    val histPtr = commit_spec_meta.histPtr
968    val predCycle = commit_meta.meta(63, 0)
969    val target = commit_target
970
971    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
972    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
973    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
974    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
975    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
976    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
977    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
978  }
979
980  val enq = io.fromBpu.resp
981  val perf_redirect = io.fromBackend.stage2Redirect
982
983  XSPerfAccumulate("entry", validEntries)
984  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
985  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
986  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
987  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
988
989  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
990
991  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
992  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
993  XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr)
994
995  val from_bpu = io.fromBpu.resp.bits
996  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
997    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
998    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
999    val entry_len_map = (1 to PredictWidth+1).map(i =>
1000      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
1001    ).foldLeft(Map[String, UInt]())(_+_)
1002    entry_len_map
1003  }
1004  val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1")
1005  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1006  val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1007
1008  val to_ifu = io.toIfu.req.bits
1009  val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits
1010  val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U)
1011  val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i =>
1012    f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire)
1013  ).foldLeft(Map[String, UInt]())(_+_)
1014
1015
1016
1017  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1018  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1019    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1020  ).foldLeft(Map[String, UInt]())(_+_)
1021
1022
1023
1024  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1025  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1026  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1027  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1028
1029
1030  val mbpBRights = mbpRights & commit_br_mask
1031  val mbpJRights = mbpRights & commit_jal_mask
1032  val mbpIRights = mbpRights & commit_jalr_mask
1033  val mbpCRights = mbpRights & commit_call_mask
1034  val mbpRRights = mbpRights & commit_ret_mask
1035
1036  val mbpBWrongs = mbpWrongs & commit_br_mask
1037  val mbpJWrongs = mbpWrongs & commit_jal_mask
1038  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1039  val mbpCWrongs = mbpWrongs & commit_call_mask
1040  val mbpRWrongs = mbpWrongs & commit_ret_mask
1041
1042  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1043
1044  def pred_stage_map(src: UInt, name: String) = {
1045    (0 until numBpStages).map(i =>
1046      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1047    ).foldLeft(Map[String, UInt]())(_+_)
1048  }
1049
1050  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1051  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1052  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1053  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1054  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1055  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1056
1057  val update_valid = io.toBpu.update.valid
1058  def u(cond: Bool) = update_valid && cond
1059  val ftb_false_hit = u(update.false_hit)
1060  // assert(!ftb_false_hit)
1061  val ftb_hit = u(commit_hit === h_hit)
1062
1063  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1064  val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid
1065  val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0)
1066  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid
1067
1068  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1069
1070  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1071  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1072  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1073  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1074  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1075
1076  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1077  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1078  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1079    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1080  ).foldLeft(Map[String, UInt]())(_+_)
1081  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1082    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1083  ).foldLeft(Map[String, UInt]())(_+_)
1084
1085  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1086    f"ftq_has_entry_$i" ->( validEntries === i.U)
1087  ).foldLeft(Map[String, UInt]())(_+_)
1088
1089  val perfCountsMap = Map(
1090    "BpInstr" -> PopCount(mbpInstrs),
1091    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1092    "BpRight"  -> PopCount(mbpRights),
1093    "BpWrong"  -> PopCount(mbpWrongs),
1094    "BpBRight" -> PopCount(mbpBRights),
1095    "BpBWrong" -> PopCount(mbpBWrongs),
1096    "BpJRight" -> PopCount(mbpJRights),
1097    "BpJWrong" -> PopCount(mbpJWrongs),
1098    "BpIRight" -> PopCount(mbpIRights),
1099    "BpIWrong" -> PopCount(mbpIWrongs),
1100    "BpCRight" -> PopCount(mbpCRights),
1101    "BpCWrong" -> PopCount(mbpCWrongs),
1102    "BpRRight" -> PopCount(mbpRRights),
1103    "BpRWrong" -> PopCount(mbpRWrongs),
1104
1105    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1106    "ftb_hit"                      -> PopCount(ftb_hit),
1107    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1108    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1109    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1110    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1111    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1112    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1113    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1114    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1115    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1116    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1117  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++
1118  s2_entry_len_map ++ s3_entry_len_map ++
1119  to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1120  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1121  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1122
1123  for((key, value) <- perfCountsMap) {
1124    XSPerfAccumulate(key, value)
1125  }
1126
1127  // --------------------------- Debug --------------------------------
1128  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1129  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1130  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1131  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1132  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1133    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1134  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1135
1136  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1137  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1138  //       case (((valid, pd), ans), taken) =>
1139  //       Mux(valid && pd.isBr,
1140  //         isWrong ^ Mux(ans.hit.asBool,
1141  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1142  //           !taken),
1143  //         !taken),
1144  //       false.B)
1145  //     }
1146  //   }
1147
1148  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1149  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1150  //       case (((valid, pd), ans), taken) =>
1151  //       Mux(valid && pd.isBr,
1152  //         isWrong ^ Mux(ans.hit.asBool,
1153  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1154  //           !taken),
1155  //         !taken),
1156  //       false.B)
1157  //     }
1158  //   }
1159
1160  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1161  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1162  //       case (((valid, pd), ans), taken) =>
1163  //       Mux(valid && pd.isBr,
1164  //         isWrong ^ (ans.taken.asBool === taken),
1165  //       false.B)
1166  //     }
1167  //   }
1168
1169  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1170  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1171  //       case (((valid, pd), ans), taken) =>
1172  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1173  //         isWrong ^ (!taken),
1174  //           false.B)
1175  //     }
1176  //   }
1177
1178  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1179  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1180  //       case (((valid, pd), ans), taken) =>
1181  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1182  //         isWrong ^ (ans.target === commitEntry.target),
1183  //           false.B)
1184  //     }
1185  //   }
1186
1187  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1188  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1189  //   // btb and ubtb pred jal and jalr as well
1190  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1191  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1192  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1193  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1194
1195  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1196  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1197
1198  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1199  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1200  val perfinfo = IO(new Bundle(){
1201    val perfEvents = Output(new PerfEventsBundle(22))
1202  })
1203  val perfEvents = Seq(
1204    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1205    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1206    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1207    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1208    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1209    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1210    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1211    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1212    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1213    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1214    ("BpRight                ", PopCount(mbpRights)                                                         ),
1215    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1216    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1217    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1218    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1219    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1220    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1221    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1222    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1223    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1224    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1225    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1226    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1227    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1228  )
1229
1230  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
1231    perf_out.incr_step := RegNext(perf)
1232  }
1233}
1234