xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 5ff19bd8a1e28c198c79c1efc1222f9cdef21232)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError}
23import xiangshan._
24import scala.tools.nsc.doc.model.Val
25import utils.{ParallelPriorityMux, ParallelPriorityEncoder}
26import xiangshan.backend.{CtrlToFtqIO}
27import firrtl.annotations.MemoryLoadFileType
28
29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
30  p => p(XSCoreParamsKey).FtqSize
31){
32  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
33}
34
35object FtqPtr {
36  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
37    val ptr = Wire(new FtqPtr)
38    ptr.flag := f
39    ptr.value := v
40    ptr
41  }
42  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
43    apply(!ptr.flag, ptr.value)
44  }
45}
46
47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
48
49  val io = IO(new Bundle() {
50    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
51    val ren = Input(Vec(numRead, Bool()))
52    val rdata = Output(Vec(numRead, gen))
53    val waddr = Input(UInt(log2Up(FtqSize).W))
54    val wen = Input(Bool())
55    val wdata = Input(gen)
56  })
57
58  for(i <- 0 until numRead){
59    val sram = Module(new SRAMTemplate(gen, FtqSize))
60    sram.io.r.req.valid := io.ren(i)
61    sram.io.r.req.bits.setIdx := io.raddr(i)
62    io.rdata(i) := sram.io.r.resp.data(0)
63    sram.io.w.req.valid := io.wen
64    sram.io.w.req.bits.setIdx := io.waddr
65    sram.io.w.req.bits.data := VecInit(io.wdata)
66  }
67
68}
69
70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
71  // TODO: move pftAddr, oversize, carry to another mem
72  val startAddr = UInt(VAddrBits.W)
73  val nextRangeAddr = UInt(VAddrBits.W)
74  val pftAddr = UInt((log2Ceil(PredictWidth)+1).W)
75  val isNextMask = Vec(PredictWidth, Bool())
76  val oversize = Bool()
77  val carry = Bool()
78  def getPc(offset: UInt) = {
79    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits)
80    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits)
81    Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)),
82        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
83  }
84  def getFallThrough() = {
85    def getHigher(pc: UInt) = pc.head(VAddrBits-log2Ceil(PredictWidth)-instOffsetBits-1)
86    val startHigher = getHigher(startAddr)
87    val nextHigher  = getHigher(nextRangeAddr)
88    val higher = Mux(carry, nextHigher, startHigher)
89    Cat(higher, pftAddr, 0.U(instOffsetBits.W))
90  }
91  def fallThroughError() = {
92    val startLower        = Cat(0.U(1.W), startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits))
93    val endLowerwithCarry = Cat(carry,    pftAddr)
94    require(startLower.getWidth == log2Ceil(PredictWidth)+2)
95    require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2)
96    startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U
97  }
98  def fromBranchPrediction(resp: BranchPredictionBundle) = {
99    this.startAddr := resp.pc
100    this.nextRangeAddr := resp.pc + (FetchWidth * 4).U
101    this.pftAddr := resp.ftb_entry.pftAddr
102    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
103      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
104    ))
105    this.oversize := resp.ftb_entry.oversize
106    this.carry := resp.ftb_entry.carry
107    this
108  }
109  override def toPrintable: Printable = {
110    p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}"
111  }
112}
113
114class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
115  val brMask = Vec(PredictWidth, Bool())
116  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
117  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
118  val jalTarget = UInt(VAddrBits.W)
119  val rvcMask = Vec(PredictWidth, Bool())
120  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
121  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
122  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
123  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
124
125  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
126    val pds = pdWb.pd
127    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
128    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
129    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
130                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
131    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
132    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
133    this.jalTarget := pdWb.jalTarget
134  }
135
136  def toPd(offset: UInt) = {
137    require(offset.getWidth == log2Ceil(PredictWidth))
138    val pd = Wire(new PreDecodeInfo)
139    pd.valid := true.B
140    pd.isRVC := rvcMask(offset)
141    val isBr = brMask(offset)
142    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
143    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
144    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
145    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
146    pd
147  }
148}
149
150
151
152class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
153  val rasSp = UInt(log2Ceil(RasSize).W)
154  val rasEntry = new RASEntry
155  val specCnt = Vec(numBr, UInt(10.W))
156  // val ghist = new ShiftingGlobalHistory
157  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
158  val histPtr = new CGHPtr
159  val phist = UInt(PathHistoryLength.W)
160  val phNewBit = UInt(1.W)
161
162  def fromBranchPrediction(resp: BranchPredictionBundle) = {
163    this.rasSp := resp.rasSp
164    this.rasEntry := resp.rasTop
165    this.specCnt := resp.specCnt
166    // this.ghist := resp.ghist
167    this.folded_hist := resp.folded_hist
168    this.histPtr := resp.histPtr
169    this.phist := resp.phist
170    this.phNewBit := resp.pc(instOffsetBits)
171    this
172  }
173}
174
175class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
176  val meta = UInt(MaxMetaLength.W)
177}
178
179class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
180  val target = UInt(VAddrBits.W)
181  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
182}
183
184// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
185//   val startAddr = UInt(VAddrBits.W)
186//   val fallThruAddr = UInt(VAddrBits.W)
187//   val isNextMask = Vec(PredictWidth, Bool())
188
189//   val meta = UInt(MaxMetaLength.W)
190
191//   val rasSp = UInt(log2Ceil(RasSize).W)
192//   val rasEntry = new RASEntry
193//   val hist = new ShiftingGlobalHistory
194//   val specCnt = Vec(numBr, UInt(10.W))
195
196//   val valids = Vec(PredictWidth, Bool())
197//   val brMask = Vec(PredictWidth, Bool())
198//   // isJalr, isCall, isRet
199//   val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
200//   val jmpOffset = UInt(log2Ceil(PredictWidth).W)
201
202//   val mispredVec = Vec(PredictWidth, Bool())
203//   val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
204//   val target = UInt(VAddrBits.W)
205// }
206
207class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
208  val ptr = Output(new FtqPtr)
209  val offset = Output(UInt(log2Ceil(PredictWidth).W))
210  val data = Input(gen)
211  def apply(ptr: FtqPtr, offset: UInt) = {
212    this.ptr := ptr
213    this.offset := offset
214    this.data
215  }
216  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
217}
218
219
220class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
221  val redirect = Valid(new BranchPredictionRedirect)
222  val update = Valid(new BranchPredictionUpdate)
223  val enq_ptr = Output(new FtqPtr)
224}
225
226class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
227  val req = Decoupled(new FetchRequestBundle)
228  val redirect = Valid(new Redirect)
229  val flushFromBpu = new Bundle {
230    // when ifu pipeline is not stalled,
231    // a packet from bpu s3 can reach f1 at most
232    val s2 = Valid(new FtqPtr)
233    val s3 = Valid(new FtqPtr)
234    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
235      src.valid && !isAfter(src.bits, idx_to_flush)
236    }
237    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
238    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
239  }
240}
241
242trait HasBackendRedirectInfo extends HasXSParameter {
243  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
244  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
245}
246
247class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
248  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
249  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
250  def getJumpPcRead = pc_reads.head
251  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
252  def getMemPredPcRead = pc_reads.init.last
253  def getRobFlushPcRead = pc_reads.last
254}
255
256
257class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
258  val io = IO(new Bundle {
259    val start_addr = Input(UInt(VAddrBits.W))
260    val old_entry = Input(new FTBEntry)
261    val pd = Input(new Ftq_pd_Entry)
262    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
263    val target = Input(UInt(VAddrBits.W))
264    val hit = Input(Bool())
265    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
266
267    val new_entry = Output(new FTBEntry)
268    val new_br_insert_pos = Output(Vec(numBr, Bool()))
269    val taken_mask = Output(Vec(numBr, Bool()))
270    val mispred_mask = Output(Vec(numBr+1, Bool()))
271
272    // for perf counters
273    val is_init_entry = Output(Bool())
274    val is_old_entry = Output(Bool())
275    val is_new_br = Output(Bool())
276    val is_jalr_target_modified = Output(Bool())
277    val is_always_taken_modified = Output(Bool())
278    val is_br_full = Output(Bool())
279  })
280
281  // no mispredictions detected at predecode
282  val hit = io.hit
283  val pd = io.pd
284
285  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
286
287
288  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
289  val entry_has_jmp = pd.jmpInfo.valid
290  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
291  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
292  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
293  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
294  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
295  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
296
297  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
298  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
299
300  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
301  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
302  // if not hit, establish a new entry
303  init_entry.valid := true.B
304  // tag is left for ftb to assign
305
306  // case br
307  val init_br_slot = init_entry.getSlotForBr(0)
308  when (cfi_is_br) {
309    init_br_slot.valid := true.B
310    init_br_slot.offset := io.cfiIndex.bits
311    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
312    init_entry.always_taken(0) := true.B // set to always taken on init
313  }
314  // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
315
316  // case jmp
317  when (entry_has_jmp) {
318    init_entry.tailSlot.offset := pd.jmpOffset
319    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
320    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
321  }
322
323  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
324  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
325  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
326  init_entry.isJalr := new_jmp_is_jalr
327  init_entry.isCall := new_jmp_is_call
328  init_entry.isRet  := new_jmp_is_ret
329  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
330
331  init_entry.oversize := last_br_rvi || last_jmp_rvi
332
333  // if hit, check whether a new cfi(only br is possible) is detected
334  val oe = io.old_entry
335  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
336  val br_recorded = br_recorded_vec.asUInt.orR
337  val is_new_br = cfi_is_br && !br_recorded
338  val new_br_offset = io.cfiIndex.bits
339  // vec(i) means new br will be inserted BEFORE old br(i)
340  val allBrSlotsVec = oe.allSlotsForBr
341  val new_br_insert_onehot = VecInit((0 until numBr).map{
342    i => i match {
343      case 0 =>
344        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
345      case idx =>
346        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
347        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
348    }
349  })
350
351  val old_entry_modified = WireInit(io.old_entry)
352  for (i <- 0 until numBr) {
353    val slot = old_entry_modified.allSlotsForBr(i)
354    when (new_br_insert_onehot(i)) {
355      slot.valid := true.B
356      slot.offset := new_br_offset
357      slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
358      old_entry_modified.always_taken(i) := true.B
359    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
360      old_entry_modified.always_taken(i) := false.B
361      // all other fields remain unchanged
362    }.otherwise {
363      // case i == 0, remain unchanged
364      if (i != 0) {
365        val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
366        when (!noNeedToMoveFromFormerSlot) {
367          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
368          old_entry_modified.always_taken(i) := oe.always_taken(i)
369        }
370      }
371    }
372  }
373
374  // two circumstances:
375  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
376  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
377  //        the previous last br or the new br
378  val may_have_to_replace = oe.noEmptySlotForNewBr
379  val pft_need_to_change = is_new_br && may_have_to_replace
380  // it should either be the given last br or the new br
381  when (pft_need_to_change) {
382    val new_pft_offset =
383      Mux(!new_br_insert_onehot.asUInt.orR,
384        new_br_offset, oe.allSlotsForBr.last.offset)
385
386    // set jmp to invalid
387    if (!shareTailSlot) {
388      old_entry_modified.tailSlot.valid := false.B
389    }
390    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
391    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
392    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
393    old_entry_modified.oversize := false.B
394    old_entry_modified.isCall := false.B
395    old_entry_modified.isRet := false.B
396    old_entry_modified.isJalr := false.B
397  }
398
399  val old_entry_jmp_target_modified = WireInit(oe)
400  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
401  val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B
402  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
403  when (jalr_target_modified) {
404    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
405    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
406  }
407
408  val old_entry_always_taken = WireInit(oe)
409  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
410  for (i <- 0 until numBr) {
411    old_entry_always_taken.always_taken(i) :=
412      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
413    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
414  }
415  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
416
417
418
419  val derived_from_old_entry =
420    Mux(is_new_br, old_entry_modified,
421      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
422
423
424  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
425
426  io.new_br_insert_pos := new_br_insert_onehot
427  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
428    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
429  })
430  for (i <- 0 until numBr) {
431    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
432  }
433  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
434
435  // for perf counters
436  io.is_init_entry := !hit
437  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
438  io.is_new_br := hit && is_new_br
439  io.is_jalr_target_modified := hit && jalr_target_modified
440  io.is_always_taken_modified := hit && always_taken_modified
441  io.is_br_full := hit && is_new_br && may_have_to_replace
442}
443
444class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
445  with HasBackendRedirectInfo with BPUUtils with HasBPUConst {
446  val io = IO(new Bundle {
447    val fromBpu = Flipped(new BpuToFtqIO)
448    val fromIfu = Flipped(new IfuToFtqIO)
449    val fromBackend = Flipped(new CtrlToFtqIO)
450
451    val toBpu = new FtqToBpuIO
452    val toIfu = new FtqToIfuIO
453    val toBackend = new FtqToCtrlIO
454
455    val bpuInfo = new Bundle {
456      val bpRight = Output(UInt(XLEN.W))
457      val bpWrong = Output(UInt(XLEN.W))
458    }
459  })
460  io.bpuInfo := DontCare
461
462  val robFlush = io.fromBackend.robFlush
463  val stage2Redirect = io.fromBackend.stage2Redirect
464  val stage3Redirect = io.fromBackend.stage3Redirect
465
466  val stage2Flush = stage2Redirect.valid || robFlush.valid
467  val backendFlush = stage2Flush || RegNext(stage2Flush)
468  val ifuFlush = Wire(Bool())
469
470  val flush = stage2Flush || RegNext(stage2Flush)
471
472  val allowBpuIn, allowToIfu = WireInit(false.B)
473  val flushToIfu = !allowToIfu
474  allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
475  allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
476
477  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
478  val validEntries = distanceBetween(bpuPtr, commPtr)
479
480  // **********************************************************************
481  // **************************** enq from bpu ****************************
482  // **********************************************************************
483  val new_entry_ready = validEntries < FtqSize.U
484  io.fromBpu.resp.ready := new_entry_ready
485
486  val bpu_s2_resp = io.fromBpu.resp.bits.s2
487  val bpu_s3_resp = io.fromBpu.resp.bits.s3
488  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
489  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
490
491  io.toBpu.enq_ptr := bpuPtr
492  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
493  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
494
495  val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp)
496  val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx)
497  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
498  val bpu_in_resp_idx = bpu_in_resp_ptr.value
499
500  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
501  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
502  // resp from uBTB
503  ftq_pc_mem.io.wen(0) := bpu_in_fire
504  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
505  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
506
507  //                                                            ifuRedirect + backendRedirect + commit
508  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
509  // these info is intended to enq at the last stage of bpu
510  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
511  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
512  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
513
514  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
515  // these info is intended to enq at the last stage of bpu
516  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
517  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
518  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
519  //                                                            ifuRedirect + backendRedirect + commit
520  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
521  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
522  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
523  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
524
525
526  // multi-write
527  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
528  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
529  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
530  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
531
532  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
533  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
534    VecInit(Seq.fill(PredictWidth)(c_invalid))
535  }))
536
537  val f_to_send :: f_sent :: Nil = Enum(2)
538  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
539
540  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
541  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
542
543
544  when (bpu_in_fire) {
545    entry_fetch_status(bpu_in_resp_idx) := f_to_send
546    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
547    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex
548    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
549    update_target(bpu_in_resp_idx) := bpu_in_resp.target
550    pred_stage(bpu_in_resp_idx) := bpu_in_stage
551  }
552
553  bpuPtr := bpuPtr + enq_fire
554  ifuPtr := ifuPtr + io.toIfu.req.fire
555
556  // only use ftb result to assign hit status
557  when (bpu_s2_resp.valid) {
558    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit)
559  }
560
561
562  io.toIfu.flushFromBpu.s2.valid := bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
563  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
564  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
565    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
566    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
567    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
568      ifuPtr := bpu_s2_resp.ftq_idx
569    }
570  }
571
572  io.toIfu.flushFromBpu.s3.valid := bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
573  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
574  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
575    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
576    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
577    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
578      ifuPtr := bpu_s3_resp.ftq_idx
579    }
580    XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
581  }
582
583  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
584
585  // ****************************************************************
586  // **************************** to ifu ****************************
587  // ****************************************************************
588  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
589  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
590  val last_cycle_bpu_in = RegNext(bpu_in_fire)
591  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
592
593  // read pc and target
594  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
595  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
596
597  io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
598  io.toIfu.req.bits.ftqIdx := ifuPtr
599  io.toIfu.req.bits.target := update_target(ifuPtr.value)
600  io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
601
602  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
603    io.toIfu.req.bits.fromFtqPcBundle(bpu_in_bypass_buf)
604  }.elsewhen (last_cycle_to_ifu_fire) {
605    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
606  }.otherwise {
607    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
608  }
609
610  // when fall through is smaller in value than start address, there must be a false hit
611  when (io.toIfu.req.bits.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
612    when (io.toIfu.req.fire &&
613      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
614      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
615    ) {
616      entry_hit_status(ifuPtr.value) := h_false_hit
617      XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
618    }
619    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
620  }
621
622  val ifu_req_should_be_flushed =
623    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
624    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
625
626  when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
627    entry_fetch_status(ifuPtr.value) := f_sent
628  }
629
630
631  // *********************************************************************
632  // **************************** wb from ifu ****************************
633  // *********************************************************************
634  val pdWb = io.fromIfu.pdWb
635  val pds = pdWb.bits.pd
636  val ifu_wb_valid = pdWb.valid
637  val ifu_wb_idx = pdWb.bits.ftqIdx.value
638  // read ports:                                                         commit update
639  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
640  ftq_pd_mem.io.wen(0) := ifu_wb_valid
641  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
642  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
643
644  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
645  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
646  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
647  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
648  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
649  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
650
651  when (ifu_wb_valid) {
652    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
653      case (v, inRange) => v && inRange
654    })
655    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
656      case (qe, v) => when (v) { qe := c_valid }
657    }
658  }
659
660  ifuWbPtr := ifuWbPtr + ifu_wb_valid
661
662  ftb_entry_mem.io.raddr.head := ifu_wb_idx
663  val has_false_hit = WireInit(false.B)
664  when (RegNext(hit_pd_valid)) {
665    // check for false hit
666    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
667    val brSlots = pred_ftb_entry.brSlots
668    val tailSlot = pred_ftb_entry.tailSlot
669    // we check cfis that bpu predicted
670
671    // bpu predicted branches but denied by predecode
672    val br_false_hit =
673      brSlots.map{
674        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
675      }.reduce(_||_) ||
676      (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
677        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
678
679    val jmpOffset = tailSlot.offset
680    val jmp_pd = pd_reg(jmpOffset)
681    val jal_false_hit = pred_ftb_entry.jmpValid &&
682      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
683       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
684       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
685       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
686      )
687
688    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
689    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
690
691    // assert(!has_false_hit)
692  }
693
694  when (has_false_hit) {
695    entry_hit_status(wb_idx_reg) := h_false_hit
696  }
697
698
699  // **********************************************************************
700  // **************************** backend read ****************************
701  // **********************************************************************
702
703  // pc reads
704  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
705    ftq_pc_mem.io.raddr(i) := req.ptr.value
706    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
707  }
708  // target read
709  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
710
711  // *******************************************************************************
712  // **************************** redirect from backend ****************************
713  // *******************************************************************************
714
715  // redirect read cfiInfo, couples to redirectGen s2
716  ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid
717  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
718
719  ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
720
721  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
722  val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect)
723  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
724  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
725
726  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
727  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
728
729  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
730    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
731      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
732      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
733
734    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
735        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
736  }.otherwise {
737    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
738    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
739  }
740
741
742  // ***************************************************************************
743  // **************************** redirect from ifu ****************************
744  // ***************************************************************************
745  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
746  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
747  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
748  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
749  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
750
751  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
752  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
753  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
754  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
755  ifuRedirectCfiUpdate.target := pdWb.bits.target
756  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
757  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
758
759  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
760  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
761  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
762
763  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
764  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
765
766  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
767
768  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
769  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
770  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
771    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
772  }
773
774  // *********************************************************************
775  // **************************** wb from exu ****************************
776  // *********************************************************************
777
778  def extractRedirectInfo(wb: Valid[Redirect]) = {
779    val ftqIdx = wb.bits.ftqIdx.value
780    val ftqOffset = wb.bits.ftqOffset
781    val taken = wb.bits.cfiUpdate.taken
782    val mispred = wb.bits.cfiUpdate.isMisPred
783    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
784  }
785
786  // fix mispredict entry
787  val lastIsMispredict = RegNext(
788    stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B
789  )
790
791  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
792    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
793    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
794    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
795    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
796      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
797    }
798    when (cfiIndex_bits_wen) {
799      cfiIndex_vec(r_idx).bits := r_offset
800    }
801    update_target(r_idx) := redirect.bits.cfiUpdate.target
802    if (isBackend) {
803      mispredict_vec(r_idx)(r_offset) := r_mispred
804    }
805  }
806
807  when(stage3Redirect.valid && lastIsMispredict) {
808    updateCfiInfo(stage3Redirect)
809  }.elsewhen (ifuRedirectToBpu.valid) {
810    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
811  }
812
813  // ***********************************************************************************
814  // **************************** flush ptr and state queue ****************************
815  // ***********************************************************************************
816
817  class RedirectInfo extends Bundle {
818    val valid = Bool()
819    val ftqIdx = new FtqPtr
820    val ftqOffset = UInt(log2Ceil(PredictWidth).W)
821    val flushItSelf = Bool()
822    def apply(redirect: Valid[Redirect]) = {
823      this.valid := redirect.valid
824      this.ftqIdx := redirect.bits.ftqIdx
825      this.ftqOffset := redirect.bits.ftqOffset
826      this.flushItSelf := RedirectLevel.flushItself(redirect.bits.level)
827      this
828    }
829  }
830  val redirectVec = Wire(Vec(3, new RedirectInfo))
831  val robRedirect = robFlush
832
833  redirectVec.zip(Seq(robRedirect, stage2Redirect, fromIfuRedirect)).map {
834    case (ve, r) => ve(r)
835  }
836
837  // when redirect, we should reset ptrs and status queues
838  when(redirectVec.map(r => r.valid).reduce(_||_)){
839    val r = PriorityMux(redirectVec.map(r => (r.valid -> r)))
840    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
841    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, r.flushItSelf)
842    val next = idx + 1.U
843    bpuPtr := next
844    ifuPtr := next
845    ifuWbPtr := next
846    when (notIfu) {
847      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
848        when(i.U > offset || i.U === offset && flushItSelf){
849          s := c_invalid
850        }
851      })
852    }
853  }
854
855  // only the valid bit is actually needed
856  io.toIfu.redirect.bits    := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits)
857  io.toIfu.redirect.valid   := stage2Flush
858
859  // commit
860  for (c <- io.fromBackend.rob_commits) {
861    when(c.valid) {
862      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
863      // TODO: remove this
864      // For instruction fusions, we also update the next instruction
865      when (c.bits.commitType === 4.U) {
866        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
867      }.elsewhen(c.bits.commitType === 5.U) {
868        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
869      }.elsewhen(c.bits.commitType === 6.U) {
870        val index = (c.bits.ftqIdx + 1.U).value
871        commitStateQueue(index)(0) := c_commited
872      }.elsewhen(c.bits.commitType === 7.U) {
873        val index = (c.bits.ftqIdx + 1.U).value
874        commitStateQueue(index)(1) := c_commited
875      }
876    }
877  }
878
879  // ****************************************************************
880  // **************************** to bpu ****************************
881  // ****************************************************************
882
883  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
884
885  val may_have_stall_from_bpu = RegInit(false.B)
886  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
887    Cat(commitStateQueue(commPtr.value).map(s => {
888      s === c_invalid || s === c_commited
889    })).andR()
890
891  // commit reads
892  ftq_pc_mem.io.raddr.last := commPtr.value
893  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
894  ftq_pd_mem.io.raddr.last := commPtr.value
895  val commit_pd = ftq_pd_mem.io.rdata.last
896  ftq_redirect_sram.io.ren.last := canCommit
897  ftq_redirect_sram.io.raddr.last := commPtr.value
898  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
899  ftq_meta_1r_sram.io.ren(0) := canCommit
900  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
901  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
902  ftb_entry_mem.io.raddr.last := commPtr.value
903  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
904
905  // need one cycle to read mem and srams
906  val do_commit_ptr = RegNext(commPtr)
907  val do_commit = RegNext(canCommit, init=false.B)
908  when (canCommit) { commPtr := commPtr + 1.U }
909  val commit_state = RegNext(commitStateQueue(commPtr.value))
910  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
911  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
912    can_commit_cfi.valid := false.B
913  }
914  val commit_cfi = RegNext(can_commit_cfi)
915
916  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
917    case (mis, state) => mis && state === c_commited
918  })
919  val can_commit_hit = entry_hit_status(commPtr.value)
920  val commit_hit = RegNext(can_commit_hit)
921  val commit_target = RegNext(update_target(commPtr.value))
922  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
923
924  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
925  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
926
927  io.toBpu.update := DontCare
928  io.toBpu.update.valid := commit_valid && do_commit
929  val update = io.toBpu.update.bits
930  update.false_hit   := commit_hit === h_false_hit
931  update.pc          := commit_pc_bundle.startAddr
932  update.preds.hit   := commit_hit === h_hit || commit_hit === h_false_hit
933  update.meta        := commit_meta.meta
934  update.full_target := commit_target
935  update.fromFtqRedirectSram(commit_spec_meta)
936
937  val commit_real_hit = commit_hit === h_hit
938  val update_ftb_entry = update.ftb_entry
939
940  val ftbEntryGen = Module(new FTBEntryGen).io
941  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
942  ftbEntryGen.old_entry      := commit_ftb_entry
943  ftbEntryGen.pd             := commit_pd
944  ftbEntryGen.cfiIndex       := commit_cfi
945  ftbEntryGen.target         := commit_target
946  ftbEntryGen.hit            := commit_real_hit
947  ftbEntryGen.mispredict_vec := commit_mispredict
948
949  update_ftb_entry         := ftbEntryGen.new_entry
950  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
951  update.mispred_mask      := ftbEntryGen.mispred_mask
952  update.old_entry         := ftbEntryGen.is_old_entry
953  update.preds.br_taken_mask  := ftbEntryGen.taken_mask
954
955  // ******************************************************************************
956  // **************************** commit perf counters ****************************
957  // ******************************************************************************
958
959  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
960  val commit_mispred_mask = commit_mispredict.asUInt
961  val commit_not_mispred_mask = ~commit_mispred_mask
962
963  val commit_br_mask = commit_pd.brMask.asUInt
964  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
965  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
966
967  val mbpInstrs = commit_inst_mask & commit_cfi_mask
968
969  val mbpRights = mbpInstrs & commit_not_mispred_mask
970  val mbpWrongs = mbpInstrs & commit_mispred_mask
971
972  io.bpuInfo.bpRight := PopCount(mbpRights)
973  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
974
975  // Cfi Info
976  for (i <- 0 until PredictWidth) {
977    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
978    val v = commit_state(i) === c_commited
979    val isBr = commit_pd.brMask(i)
980    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
981    val isCfi = isBr || isJmp
982    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
983    val misPred = commit_mispredict(i)
984    // val ghist = commit_spec_meta.ghist.predHist
985    val histPtr = commit_spec_meta.histPtr
986    val predCycle = commit_meta.meta(63, 0)
987    val target = commit_target
988
989    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
990    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
991    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
992    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
993    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
994    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
995    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
996  }
997
998  val enq = io.fromBpu.resp
999  val perf_redirect = io.fromBackend.stage2Redirect
1000
1001  XSPerfAccumulate("entry", validEntries)
1002  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1003  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1004  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1005  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1006
1007  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1008
1009  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1010  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1011  XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr)
1012
1013  val from_bpu = io.fromBpu.resp.bits
1014  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
1015    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
1016    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1017    val entry_len_map = (1 to PredictWidth+1).map(i =>
1018      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
1019    ).foldLeft(Map[String, UInt]())(_+_)
1020    entry_len_map
1021  }
1022  val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1")
1023  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1024  val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1025
1026  val to_ifu = io.toIfu.req.bits
1027  val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits
1028  val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U)
1029  val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i =>
1030    f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire)
1031  ).foldLeft(Map[String, UInt]())(_+_)
1032
1033
1034
1035  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1036  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1037    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1038  ).foldLeft(Map[String, UInt]())(_+_)
1039
1040
1041
1042  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1043  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1044  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1045  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1046
1047
1048  val mbpBRights = mbpRights & commit_br_mask
1049  val mbpJRights = mbpRights & commit_jal_mask
1050  val mbpIRights = mbpRights & commit_jalr_mask
1051  val mbpCRights = mbpRights & commit_call_mask
1052  val mbpRRights = mbpRights & commit_ret_mask
1053
1054  val mbpBWrongs = mbpWrongs & commit_br_mask
1055  val mbpJWrongs = mbpWrongs & commit_jal_mask
1056  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1057  val mbpCWrongs = mbpWrongs & commit_call_mask
1058  val mbpRWrongs = mbpWrongs & commit_ret_mask
1059
1060  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1061
1062  def pred_stage_map(src: UInt, name: String) = {
1063    (0 until numBpStages).map(i =>
1064      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1065    ).foldLeft(Map[String, UInt]())(_+_)
1066  }
1067
1068  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1069  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1070  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1071  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1072  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1073  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1074
1075  val update_valid = io.toBpu.update.valid
1076  def u(cond: Bool) = update_valid && cond
1077  val ftb_false_hit = u(update.false_hit)
1078  // assert(!ftb_false_hit)
1079  val ftb_hit = u(commit_hit === h_hit)
1080
1081  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1082  val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid
1083  val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0)
1084  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid
1085
1086  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1087
1088  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1089  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1090  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1091  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1092  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1093
1094  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1095  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1096  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1097    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1098  ).foldLeft(Map[String, UInt]())(_+_)
1099  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1100    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1101  ).foldLeft(Map[String, UInt]())(_+_)
1102
1103  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1104    f"ftq_has_entry_$i" ->( validEntries === i.U)
1105  ).foldLeft(Map[String, UInt]())(_+_)
1106
1107  val perfCountsMap = Map(
1108    "BpInstr" -> PopCount(mbpInstrs),
1109    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1110    "BpRight"  -> PopCount(mbpRights),
1111    "BpWrong"  -> PopCount(mbpWrongs),
1112    "BpBRight" -> PopCount(mbpBRights),
1113    "BpBWrong" -> PopCount(mbpBWrongs),
1114    "BpJRight" -> PopCount(mbpJRights),
1115    "BpJWrong" -> PopCount(mbpJWrongs),
1116    "BpIRight" -> PopCount(mbpIRights),
1117    "BpIWrong" -> PopCount(mbpIWrongs),
1118    "BpCRight" -> PopCount(mbpCRights),
1119    "BpCWrong" -> PopCount(mbpCWrongs),
1120    "BpRRight" -> PopCount(mbpRRights),
1121    "BpRWrong" -> PopCount(mbpRWrongs),
1122
1123    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1124    "ftb_hit"                      -> PopCount(ftb_hit),
1125    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1126    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1127    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1128    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1129    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1130    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1131    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1132    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1133    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1134    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1135  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++
1136  s2_entry_len_map ++ s3_entry_len_map ++
1137  to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1138  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1139  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1140
1141  for((key, value) <- perfCountsMap) {
1142    XSPerfAccumulate(key, value)
1143  }
1144
1145  // --------------------------- Debug --------------------------------
1146  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1147  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1148  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1149  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1150  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1151    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1152  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1153
1154  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1155  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1156  //       case (((valid, pd), ans), taken) =>
1157  //       Mux(valid && pd.isBr,
1158  //         isWrong ^ Mux(ans.hit.asBool,
1159  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1160  //           !taken),
1161  //         !taken),
1162  //       false.B)
1163  //     }
1164  //   }
1165
1166  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1167  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1168  //       case (((valid, pd), ans), taken) =>
1169  //       Mux(valid && pd.isBr,
1170  //         isWrong ^ Mux(ans.hit.asBool,
1171  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1172  //           !taken),
1173  //         !taken),
1174  //       false.B)
1175  //     }
1176  //   }
1177
1178  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1179  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1180  //       case (((valid, pd), ans), taken) =>
1181  //       Mux(valid && pd.isBr,
1182  //         isWrong ^ (ans.taken.asBool === taken),
1183  //       false.B)
1184  //     }
1185  //   }
1186
1187  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1188  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1189  //       case (((valid, pd), ans), taken) =>
1190  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1191  //         isWrong ^ (!taken),
1192  //           false.B)
1193  //     }
1194  //   }
1195
1196  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1197  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1198  //       case (((valid, pd), ans), taken) =>
1199  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1200  //         isWrong ^ (ans.target === commitEntry.target),
1201  //           false.B)
1202  //     }
1203  //   }
1204
1205  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1206  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1207  //   // btb and ubtb pred jal and jalr as well
1208  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1209  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1210  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1211  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1212
1213  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1214  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1215
1216  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1217  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1218  val perfinfo = IO(new Bundle(){
1219    val perfEvents = Output(new PerfEventsBundle(22))
1220  })
1221  val perfEvents = Seq(
1222    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1223    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1224    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1225    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1226    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1227    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1228    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1229    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1230    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1231    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1232    ("BpRight                ", PopCount(mbpRights)                                                         ),
1233    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1234    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1235    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1236    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1237    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1238    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1239    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1240    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1241    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1242    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1243    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1244    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1245    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1246  )
1247
1248  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
1249    perf_out.incr_step := RegNext(perf)
1250  }
1251}
1252