xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 259b970fe1e04dc359f253ea7ef3ee395f26bd55)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import xiangshan._
24import xiangshan.frontend.icache._
25import xiangshan.backend.CtrlToFtqIO
26
27class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
28  p => p(XSCoreParamsKey).FtqSize
29){
30  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
31}
32
33object FtqPtr {
34  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
35    val ptr = Wire(new FtqPtr)
36    ptr.flag := f
37    ptr.value := v
38    ptr
39  }
40  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
41    apply(!ptr.flag, ptr.value)
42  }
43}
44
45class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
46
47  val io = IO(new Bundle() {
48    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
49    val ren = Input(Vec(numRead, Bool()))
50    val rdata = Output(Vec(numRead, gen))
51    val waddr = Input(UInt(log2Up(FtqSize).W))
52    val wen = Input(Bool())
53    val wdata = Input(gen)
54  })
55
56  for(i <- 0 until numRead){
57    val sram = Module(new SRAMTemplate(gen, FtqSize))
58    sram.io.r.req.valid := io.ren(i)
59    sram.io.r.req.bits.setIdx := io.raddr(i)
60    io.rdata(i) := sram.io.r.resp.data(0)
61    sram.io.w.req.valid := io.wen
62    sram.io.w.req.bits.setIdx := io.waddr
63    sram.io.w.req.bits.data := VecInit(io.wdata)
64  }
65
66}
67
68class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
69  // TODO: move pftAddr, oversize, carry to another mem
70  val startAddr = UInt(VAddrBits.W)
71  val nextLineAddr = UInt(VAddrBits.W)
72  val isNextMask = Vec(PredictWidth, Bool())
73  val oversize = Bool()
74  val fallThruError = Bool()
75  // val carry = Bool()
76  def getPc(offset: UInt) = {
77    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
78    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
79    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextLineAddr, startAddr)),
80        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
81  }
82  def fromBranchPrediction(resp: BranchPredictionBundle) = {
83    def carryPos(addr: UInt) = addr(instOffsetBits+log2Ceil(PredictWidth)+1)
84    this.startAddr := resp.pc
85    this.nextLineAddr := resp.pc + (FetchWidth * 4 * 2).U
86    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
87      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
88    ))
89    this.oversize := resp.oversize
90    this.fallThruError := resp.fallThruError
91    this
92  }
93  override def toPrintable: Printable = {
94    p"startAddr:${Hexadecimal(startAddr)}"
95  }
96}
97
98class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
99  val brMask = Vec(PredictWidth, Bool())
100  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
101  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
102  val jalTarget = UInt(VAddrBits.W)
103  val rvcMask = Vec(PredictWidth, Bool())
104  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
105  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
106  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
107  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
108
109  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
110    val pds = pdWb.pd
111    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
112    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
113    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
114                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
115    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
116    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
117    this.jalTarget := pdWb.jalTarget
118  }
119
120  def toPd(offset: UInt) = {
121    require(offset.getWidth == log2Ceil(PredictWidth))
122    val pd = Wire(new PreDecodeInfo)
123    pd.valid := true.B
124    pd.isRVC := rvcMask(offset)
125    val isBr = brMask(offset)
126    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
127    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
128    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
129    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
130    pd
131  }
132}
133
134
135
136class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
137  val rasSp = UInt(log2Ceil(RasSize).W)
138  val rasEntry = new RASEntry
139  // val specCnt = Vec(numBr, UInt(10.W))
140  // val ghist = new ShiftingGlobalHistory
141  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
142  val afhob = new AllAheadFoldedHistoryOldestBits(foldedGHistInfos)
143  val lastBrNumOH = UInt((numBr+1).W)
144
145  val histPtr = new CGHPtr
146
147  def fromBranchPrediction(resp: BranchPredictionBundle) = {
148    assert(!resp.is_minimal)
149    this.rasSp := resp.rasSp
150    this.rasEntry := resp.rasTop
151    this.folded_hist := resp.folded_hist
152    this.afhob := resp.afhob
153    this.lastBrNumOH := resp.lastBrNumOH
154    this.histPtr := resp.histPtr
155    this
156  }
157}
158
159class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
160  val meta = UInt(MaxMetaLength.W)
161}
162
163class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
164  val target = UInt(VAddrBits.W)
165  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
166}
167
168// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
169//   val startAddr = UInt(VAddrBits.W)
170//   val fallThruAddr = UInt(VAddrBits.W)
171//   val isNextMask = Vec(PredictWidth, Bool())
172
173//   val meta = UInt(MaxMetaLength.W)
174
175//   val rasSp = UInt(log2Ceil(RasSize).W)
176//   val rasEntry = new RASEntry
177//   val hist = new ShiftingGlobalHistory
178//   val specCnt = Vec(numBr, UInt(10.W))
179
180//   val valids = Vec(PredictWidth, Bool())
181//   val brMask = Vec(PredictWidth, Bool())
182//   // isJalr, isCall, isRet
183//   val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
184//   val jmpOffset = UInt(log2Ceil(PredictWidth).W)
185
186//   val mispredVec = Vec(PredictWidth, Bool())
187//   val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
188//   val target = UInt(VAddrBits.W)
189// }
190
191class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
192  val ptr = Output(new FtqPtr)
193  val offset = Output(UInt(log2Ceil(PredictWidth).W))
194  val data = Input(gen)
195  def apply(ptr: FtqPtr, offset: UInt) = {
196    this.ptr := ptr
197    this.offset := offset
198    this.data
199  }
200  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
201}
202
203
204class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
205  val redirect = Valid(new BranchPredictionRedirect)
206  val update = Valid(new BranchPredictionUpdate)
207  val enq_ptr = Output(new FtqPtr)
208}
209
210class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
211  val req = Decoupled(new FetchRequestBundle)
212  val redirect = Valid(new Redirect)
213  val flushFromBpu = new Bundle {
214    // when ifu pipeline is not stalled,
215    // a packet from bpu s3 can reach f1 at most
216    val s2 = Valid(new FtqPtr)
217    val s3 = Valid(new FtqPtr)
218    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
219      src.valid && !isAfter(src.bits, idx_to_flush)
220    }
221    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
222    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
223  }
224}
225
226trait HasBackendRedirectInfo extends HasXSParameter {
227  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
228  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
229}
230
231class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
232  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
233  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
234  def getJumpPcRead = pc_reads.head
235  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
236  def getMemPredPcRead = pc_reads.init.last
237  def getRobFlushPcRead = pc_reads.last
238}
239
240
241class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
242  val io = IO(new Bundle {
243    val start_addr = Input(UInt(VAddrBits.W))
244    val old_entry = Input(new FTBEntry)
245    val pd = Input(new Ftq_pd_Entry)
246    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
247    val target = Input(UInt(VAddrBits.W))
248    val hit = Input(Bool())
249    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
250
251    val new_entry = Output(new FTBEntry)
252    val new_br_insert_pos = Output(Vec(numBr, Bool()))
253    val taken_mask = Output(Vec(numBr, Bool()))
254    val mispred_mask = Output(Vec(numBr+1, Bool()))
255
256    // for perf counters
257    val is_init_entry = Output(Bool())
258    val is_old_entry = Output(Bool())
259    val is_new_br = Output(Bool())
260    val is_jalr_target_modified = Output(Bool())
261    val is_always_taken_modified = Output(Bool())
262    val is_br_full = Output(Bool())
263  })
264
265  // no mispredictions detected at predecode
266  val hit = io.hit
267  val pd = io.pd
268
269  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
270
271
272  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
273  val entry_has_jmp = pd.jmpInfo.valid
274  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
275  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
276  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
277  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
278  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
279  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
280
281  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
282  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
283
284  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
285  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
286  // if not hit, establish a new entry
287  init_entry.valid := true.B
288  // tag is left for ftb to assign
289
290  // case br
291  val init_br_slot = init_entry.getSlotForBr(0)
292  when (cfi_is_br) {
293    init_br_slot.valid := true.B
294    init_br_slot.offset := io.cfiIndex.bits
295    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, numBr == 1)
296    init_entry.always_taken(0) := true.B // set to always taken on init
297  }
298
299  // case jmp
300  when (entry_has_jmp) {
301    init_entry.tailSlot.offset := pd.jmpOffset
302    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
303    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
304  }
305
306  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
307  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
308  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
309  init_entry.isJalr := new_jmp_is_jalr
310  init_entry.isCall := new_jmp_is_call
311  init_entry.isRet  := new_jmp_is_ret
312  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
313
314  init_entry.oversize := last_br_rvi || last_jmp_rvi
315
316  // if hit, check whether a new cfi(only br is possible) is detected
317  val oe = io.old_entry
318  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
319  val br_recorded = br_recorded_vec.asUInt.orR
320  val is_new_br = cfi_is_br && !br_recorded
321  val new_br_offset = io.cfiIndex.bits
322  // vec(i) means new br will be inserted BEFORE old br(i)
323  val allBrSlotsVec = oe.allSlotsForBr
324  val new_br_insert_onehot = VecInit((0 until numBr).map{
325    i => i match {
326      case 0 =>
327        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
328      case idx =>
329        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
330        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
331    }
332  })
333
334  val old_entry_modified = WireInit(io.old_entry)
335  for (i <- 0 until numBr) {
336    val slot = old_entry_modified.allSlotsForBr(i)
337    when (new_br_insert_onehot(i)) {
338      slot.valid := true.B
339      slot.offset := new_br_offset
340      slot.setLowerStatByTarget(io.start_addr, io.target, i == numBr-1)
341      old_entry_modified.always_taken(i) := true.B
342    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
343      old_entry_modified.always_taken(i) := false.B
344      // all other fields remain unchanged
345    }.otherwise {
346      // case i == 0, remain unchanged
347      if (i != 0) {
348        val noNeedToMoveFromFormerSlot = (i == numBr-1).B && !oe.brSlots.last.valid
349        when (!noNeedToMoveFromFormerSlot) {
350          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
351          old_entry_modified.always_taken(i) := oe.always_taken(i)
352        }
353      }
354    }
355  }
356
357  // two circumstances:
358  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
359  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
360  //        the previous last br or the new br
361  val may_have_to_replace = oe.noEmptySlotForNewBr
362  val pft_need_to_change = is_new_br && may_have_to_replace
363  // it should either be the given last br or the new br
364  when (pft_need_to_change) {
365    val new_pft_offset =
366      Mux(!new_br_insert_onehot.asUInt.orR,
367        new_br_offset, oe.allSlotsForBr.last.offset)
368
369    // set jmp to invalid
370    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
371    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
372    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
373    old_entry_modified.oversize := false.B
374    old_entry_modified.isCall := false.B
375    old_entry_modified.isRet := false.B
376    old_entry_modified.isJalr := false.B
377  }
378
379  val old_entry_jmp_target_modified = WireInit(oe)
380  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
381  val old_tail_is_jmp = !oe.tailSlot.sharing
382  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
383  when (jalr_target_modified) {
384    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
385    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
386  }
387
388  val old_entry_always_taken = WireInit(oe)
389  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
390  for (i <- 0 until numBr) {
391    old_entry_always_taken.always_taken(i) :=
392      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
393    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
394  }
395  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
396
397
398
399  val derived_from_old_entry =
400    Mux(is_new_br, old_entry_modified,
401      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
402
403
404  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
405
406  io.new_br_insert_pos := new_br_insert_onehot
407  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
408    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
409  })
410  for (i <- 0 until numBr) {
411    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
412  }
413  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
414
415  // for perf counters
416  io.is_init_entry := !hit
417  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
418  io.is_new_br := hit && is_new_br
419  io.is_jalr_target_modified := hit && jalr_target_modified
420  io.is_always_taken_modified := hit && always_taken_modified
421  io.is_br_full := hit && is_new_br && may_have_to_replace
422}
423
424class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
425  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents
426  with HasICacheParameters{
427  val io = IO(new Bundle {
428    val fromBpu = Flipped(new BpuToFtqIO)
429    val fromIfu = Flipped(new IfuToFtqIO)
430    val fromBackend = Flipped(new CtrlToFtqIO)
431
432    val toBpu = new FtqToBpuIO
433    val toIfu = new FtqToIfuIO
434    val toBackend = new FtqToCtrlIO
435
436    val toPrefetch = new FtqPrefechBundle
437
438    val bpuInfo = new Bundle {
439      val bpRight = Output(UInt(XLEN.W))
440      val bpWrong = Output(UInt(XLEN.W))
441    }
442  })
443  io.bpuInfo := DontCare
444
445  val backendRedirect = io.fromBackend.redirect
446  val backendRedirectReg = RegNext(io.fromBackend.redirect)
447
448  val stage2Flush = backendRedirect.valid
449  val backendFlush = stage2Flush || RegNext(stage2Flush)
450  val ifuFlush = Wire(Bool())
451
452  val flush = stage2Flush || RegNext(stage2Flush)
453
454  val allowBpuIn, allowToIfu = WireInit(false.B)
455  val flushToIfu = !allowToIfu
456  allowBpuIn := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
457  allowToIfu := !ifuFlush && !backendRedirect.valid && !backendRedirectReg.valid
458
459  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
460  val validEntries = distanceBetween(bpuPtr, commPtr)
461
462  // **********************************************************************
463  // **************************** enq from bpu ****************************
464  // **********************************************************************
465  val new_entry_ready = validEntries < FtqSize.U
466  io.fromBpu.resp.ready := new_entry_ready
467
468  val bpu_s2_resp = io.fromBpu.resp.bits.s2
469  val bpu_s3_resp = io.fromBpu.resp.bits.s3
470  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
471  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
472
473  io.toBpu.enq_ptr := bpuPtr
474  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
475  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
476
477  val bpu_in_resp = io.fromBpu.resp.bits.selectedResp
478  val bpu_in_stage = io.fromBpu.resp.bits.selectedRespIdx
479  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
480  val bpu_in_resp_idx = bpu_in_resp_ptr.value
481
482  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
483  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
484  // resp from uBTB
485  ftq_pc_mem.io.wen(0) := bpu_in_fire
486  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
487  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
488
489  //                                                            ifuRedirect + backendRedirect + commit
490  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
491  // these info is intended to enq at the last stage of bpu
492  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
493  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
494  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
495  println(f"ftq redirect SRAM: entry ${ftq_redirect_sram.io.wdata.getWidth} * ${FtqSize} * 3")
496  println(f"ftq redirect SRAM: ahead fh ${ftq_redirect_sram.io.wdata.afhob.getWidth} * ${FtqSize} * 3")
497
498  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
499  // these info is intended to enq at the last stage of bpu
500  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
501  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
502  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
503  //                                                            ifuRedirect + backendRedirect + commit
504  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
505  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
506  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
507  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
508
509
510  // multi-write
511  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W))) // could be taken target or fallThrough
512  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
513  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
514  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
515
516  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
517  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
518    VecInit(Seq.fill(PredictWidth)(c_invalid))
519  }))
520
521  val f_to_send :: f_sent :: Nil = Enum(2)
522  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
523
524  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
525  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
526
527
528  when (bpu_in_fire) {
529    entry_fetch_status(bpu_in_resp_idx) := f_to_send
530    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
531    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.cfiIndex
532    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
533    update_target(bpu_in_resp_idx) := bpu_in_resp.getTarget
534    pred_stage(bpu_in_resp_idx) := bpu_in_stage
535  }
536
537  bpuPtr := bpuPtr + enq_fire
538  ifuPtr := ifuPtr + (io.toIfu.req.fire && allowToIfu)
539
540  // only use ftb result to assign hit status
541  when (bpu_s2_resp.valid) {
542    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.full_pred.hit, h_hit, h_not_hit)
543  }
544
545
546  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
547  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
548  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
549    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
550    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
551    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
552      ifuPtr := bpu_s2_resp.ftq_idx
553    }
554  }
555
556  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
557  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
558  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
559    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
560    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
561    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
562      ifuPtr := bpu_s3_resp.ftq_idx
563    }
564  }
565
566  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
567
568  // ****************************************************************
569  // **************************** to ifu ****************************
570  // ****************************************************************
571  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
572  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
573  val last_cycle_bpu_in = RegNext(bpu_in_fire)
574  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
575
576  // read pc and target
577  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
578  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
579
580  io.toIfu.req.valid := entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
581  io.toIfu.req.bits.ftqIdx := ifuPtr
582  io.toIfu.req.bits.nextStartAddr := update_target(ifuPtr.value)
583  io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
584
585  val toIfuPcBundle = Wire(new Ftq_RF_Components)
586
587  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
588    toIfuPcBundle := bpu_in_bypass_buf
589  }.elsewhen (last_cycle_to_ifu_fire) {
590    toIfuPcBundle := ftq_pc_mem.io.rdata.init.last
591  }.otherwise {
592    toIfuPcBundle := ftq_pc_mem.io.rdata.init.init.last
593  }
594
595  io.toIfu.req.bits.fromFtqPcBundle(toIfuPcBundle)
596
597  // when fall through is smaller in value than start address, there must be a false hit
598  when (toIfuPcBundle.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
599    when (io.toIfu.req.fire &&
600      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
601      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
602    ) {
603      entry_hit_status(ifuPtr.value) := h_false_hit
604      // XSError(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
605    }
606    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.nextStartAddr)
607  }
608
609  val ifu_req_should_be_flushed =
610    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
611    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
612
613    when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
614      entry_fetch_status(ifuPtr.value) := f_sent
615    }
616
617  // *********************************************************************
618  // **************************** wb from ifu ****************************
619  // *********************************************************************
620  val pdWb = io.fromIfu.pdWb
621  val pds = pdWb.bits.pd
622  val ifu_wb_valid = pdWb.valid
623  val ifu_wb_idx = pdWb.bits.ftqIdx.value
624  // read ports:                                                         commit update
625  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
626  ftq_pd_mem.io.wen(0) := ifu_wb_valid
627  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
628  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
629
630  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
631  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
632  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
633  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
634  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
635  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
636
637  when (ifu_wb_valid) {
638    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
639      case (v, inRange) => v && inRange
640    })
641    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
642      case (qe, v) => when (v) { qe := c_valid }
643    }
644  }
645
646  ifuWbPtr := ifuWbPtr + ifu_wb_valid
647
648  ftb_entry_mem.io.raddr.head := ifu_wb_idx
649  val has_false_hit = WireInit(false.B)
650  when (RegNext(hit_pd_valid)) {
651    // check for false hit
652    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
653    val brSlots = pred_ftb_entry.brSlots
654    val tailSlot = pred_ftb_entry.tailSlot
655    // we check cfis that bpu predicted
656
657    // bpu predicted branches but denied by predecode
658    val br_false_hit =
659      brSlots.map{
660        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
661      }.reduce(_||_) ||
662      (tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
663        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
664
665    val jmpOffset = tailSlot.offset
666    val jmp_pd = pd_reg(jmpOffset)
667    val jal_false_hit = pred_ftb_entry.jmpValid &&
668      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
669       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
670       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
671       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
672      )
673
674    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
675    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
676
677    // assert(!has_false_hit)
678  }
679
680  when (has_false_hit) {
681    entry_hit_status(wb_idx_reg) := h_false_hit
682  }
683
684
685  // **********************************************************************
686  // **************************** backend read ****************************
687  // **********************************************************************
688
689  // pc reads
690  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
691    ftq_pc_mem.io.raddr(i) := req.ptr.value
692    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
693  }
694  // target read
695  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
696
697  // *******************************************************************************
698  // **************************** redirect from backend ****************************
699  // *******************************************************************************
700
701  // redirect read cfiInfo, couples to redirectGen s2
702  ftq_redirect_sram.io.ren.init.last := io.fromBackend.redirect.valid
703  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.redirect.bits.ftqIdx.value
704
705  ftb_entry_mem.io.raddr.init.last := io.fromBackend.redirect.bits.ftqIdx.value
706
707  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
708  val fromBackendRedirect = WireInit(backendRedirectReg)
709  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
710  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
711
712  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
713  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
714
715  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
716    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
717      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
718      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
719
720    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
721        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
722  }.otherwise {
723    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
724    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
725  }
726
727
728  // ***************************************************************************
729  // **************************** redirect from ifu ****************************
730  // ***************************************************************************
731  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
732  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
733  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
734  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
735  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
736
737  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
738  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
739  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
740  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
741  ifuRedirectCfiUpdate.target := pdWb.bits.target
742  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
743  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
744
745  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
746  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
747  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
748
749  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
750  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
751
752  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
753
754  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
755  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
756  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
757    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
758  }
759
760  // *********************************************************************
761  // **************************** wb from exu ****************************
762  // *********************************************************************
763
764  def extractRedirectInfo(wb: Valid[Redirect]) = {
765    val ftqIdx = wb.bits.ftqIdx.value
766    val ftqOffset = wb.bits.ftqOffset
767    val taken = wb.bits.cfiUpdate.taken
768    val mispred = wb.bits.cfiUpdate.isMisPred
769    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
770  }
771
772  // fix mispredict entry
773  val lastIsMispredict = RegNext(
774    backendRedirect.valid && backendRedirect.bits.level === RedirectLevel.flushAfter, init = false.B
775  )
776
777  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
778    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
779    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
780    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
781    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
782      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
783    }
784    when (cfiIndex_bits_wen) {
785      cfiIndex_vec(r_idx).bits := r_offset
786    }
787    update_target(r_idx) := redirect.bits.cfiUpdate.target
788    if (isBackend) {
789      mispredict_vec(r_idx)(r_offset) := r_mispred
790    }
791  }
792
793  when(backendRedirectReg.valid && lastIsMispredict) {
794    updateCfiInfo(backendRedirectReg)
795  }.elsewhen (ifuRedirectToBpu.valid) {
796    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
797  }
798
799  // ***********************************************************************************
800  // **************************** flush ptr and state queue ****************************
801  // ***********************************************************************************
802
803  val redirectVec = VecInit(backendRedirect, fromIfuRedirect)
804
805  // when redirect, we should reset ptrs and status queues
806  when(redirectVec.map(r => r.valid).reduce(_||_)){
807    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
808    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
809    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
810    val next = idx + 1.U
811    bpuPtr := next
812    ifuPtr := next
813    ifuWbPtr := next
814    when (notIfu) {
815      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
816        when(i.U > offset || i.U === offset && flushItSelf){
817          s := c_invalid
818        }
819      })
820    }
821  }
822
823  // only the valid bit is actually needed
824  io.toIfu.redirect.bits    := backendRedirect.bits
825  io.toIfu.redirect.valid   := stage2Flush
826
827  // commit
828  for (c <- io.fromBackend.rob_commits) {
829    when(c.valid) {
830      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
831      // TODO: remove this
832      // For instruction fusions, we also update the next instruction
833      when (c.bits.commitType === 4.U) {
834        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
835      }.elsewhen(c.bits.commitType === 5.U) {
836        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
837      }.elsewhen(c.bits.commitType === 6.U) {
838        val index = (c.bits.ftqIdx + 1.U).value
839        commitStateQueue(index)(0) := c_commited
840      }.elsewhen(c.bits.commitType === 7.U) {
841        val index = (c.bits.ftqIdx + 1.U).value
842        commitStateQueue(index)(1) := c_commited
843      }
844    }
845  }
846
847  // ****************************************************************
848  // **************************** to bpu ****************************
849  // ****************************************************************
850
851  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
852
853  val may_have_stall_from_bpu = RegInit(false.B)
854  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
855    Cat(commitStateQueue(commPtr.value).map(s => {
856      s === c_invalid || s === c_commited
857    })).andR()
858
859  // commit reads
860  ftq_pc_mem.io.raddr.last := commPtr.value
861  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
862  ftq_pd_mem.io.raddr.last := commPtr.value
863  val commit_pd = ftq_pd_mem.io.rdata.last
864  ftq_redirect_sram.io.ren.last := canCommit
865  ftq_redirect_sram.io.raddr.last := commPtr.value
866  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
867  ftq_meta_1r_sram.io.ren(0) := canCommit
868  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
869  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
870  ftb_entry_mem.io.raddr.last := commPtr.value
871  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
872
873  // need one cycle to read mem and srams
874  val do_commit_ptr = RegNext(commPtr)
875  val do_commit = RegNext(canCommit, init=false.B)
876  when (canCommit) { commPtr := commPtr + 1.U }
877  val commit_state = RegNext(commitStateQueue(commPtr.value))
878  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
879  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
880    can_commit_cfi.valid := false.B
881  }
882  val commit_cfi = RegNext(can_commit_cfi)
883
884  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
885    case (mis, state) => mis && state === c_commited
886  })
887  val can_commit_hit = entry_hit_status(commPtr.value)
888  val commit_hit = RegNext(can_commit_hit)
889  val commit_target = RegNext(update_target(commPtr.value))
890  val commit_stage = RegNext(pred_stage(commPtr.value))
891  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
892
893  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
894  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
895
896  io.toBpu.update := DontCare
897  io.toBpu.update.valid := commit_valid && do_commit
898  val update = io.toBpu.update.bits
899  update.false_hit   := commit_hit === h_false_hit
900  update.pc          := commit_pc_bundle.startAddr
901  update.meta        := commit_meta.meta
902  update.full_target := commit_target
903  update.from_stage  := commit_stage
904  update.fromFtqRedirectSram(commit_spec_meta)
905
906  val commit_real_hit = commit_hit === h_hit
907  val update_ftb_entry = update.ftb_entry
908
909  val ftbEntryGen = Module(new FTBEntryGen).io
910  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
911  ftbEntryGen.old_entry      := commit_ftb_entry
912  ftbEntryGen.pd             := commit_pd
913  ftbEntryGen.cfiIndex       := commit_cfi
914  ftbEntryGen.target         := commit_target
915  ftbEntryGen.hit            := commit_real_hit
916  ftbEntryGen.mispredict_vec := commit_mispredict
917
918  update_ftb_entry         := ftbEntryGen.new_entry
919  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
920  update.mispred_mask      := ftbEntryGen.mispred_mask
921  update.old_entry         := ftbEntryGen.is_old_entry
922  update.pred_hit          := commit_hit === h_hit || commit_hit === h_false_hit
923
924  update.is_minimal := false.B
925  update.full_pred.fromFtbEntry(ftbEntryGen.new_entry, update.pc)
926  update.full_pred.br_taken_mask  := ftbEntryGen.taken_mask
927  update.full_pred.jalr_target := commit_target
928  update.full_pred.hit := true.B
929  when (update.full_pred.is_jalr) {
930    update.full_pred.targets.last := commit_target
931  }
932
933  // ****************************************************************
934  // *********************** to prefetch ****************************
935  // ****************************************************************
936
937  if(cacheParams.hasPrefetch){
938    val prefetchPtr = RegInit(FtqPtr(false.B, 0.U))
939    prefetchPtr := prefetchPtr + io.toPrefetch.req.fire()
940
941    when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect && !isBefore(prefetchPtr, bpu_s2_resp.ftq_idx)) {
942      prefetchPtr := bpu_s2_resp.ftq_idx
943    }
944
945    when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect && !isBefore(prefetchPtr, bpu_s3_resp.ftq_idx)) {
946      prefetchPtr := bpu_s3_resp.ftq_idx
947      // XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
948    }
949
950    io.toPrefetch.req.valid := prefetchPtr =/= bpuPtr && entry_fetch_status(prefetchPtr.value) === f_to_send
951    io.toPrefetch.req.bits.target := update_target(prefetchPtr.value)
952
953    when(redirectVec.map(r => r.valid).reduce(_||_)){
954      val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
955      val next = r.ftqIdx + 1.U
956      prefetchPtr := next
957    }
958
959    XSError(isBefore(bpuPtr, prefetchPtr) && !isFull(bpuPtr, prefetchPtr), "\nprefetchPtr is before bpuPtr!\n")
960  }
961  else {
962    io.toPrefetch.req <> DontCare
963  }
964
965  // ******************************************************************************
966  // **************************** commit perf counters ****************************
967  // ******************************************************************************
968
969  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
970  val commit_mispred_mask = commit_mispredict.asUInt
971  val commit_not_mispred_mask = ~commit_mispred_mask
972
973  val commit_br_mask = commit_pd.brMask.asUInt
974  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
975  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
976
977  val mbpInstrs = commit_inst_mask & commit_cfi_mask
978
979  val mbpRights = mbpInstrs & commit_not_mispred_mask
980  val mbpWrongs = mbpInstrs & commit_mispred_mask
981
982  io.bpuInfo.bpRight := PopCount(mbpRights)
983  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
984
985  // Cfi Info
986  for (i <- 0 until PredictWidth) {
987    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
988    val v = commit_state(i) === c_commited
989    val isBr = commit_pd.brMask(i)
990    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
991    val isCfi = isBr || isJmp
992    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
993    val misPred = commit_mispredict(i)
994    // val ghist = commit_spec_meta.ghist.predHist
995    val histPtr = commit_spec_meta.histPtr
996    val predCycle = commit_meta.meta(63, 0)
997    val target = commit_target
998
999    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
1000    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
1001    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
1002    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
1003    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
1004    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
1005    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
1006  }
1007
1008  val enq = io.fromBpu.resp
1009  val perf_redirect = io.fromBackend.redirect
1010
1011  XSPerfAccumulate("entry", validEntries)
1012  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1013  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1014  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1015  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1016
1017  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1018
1019  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1020  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1021  XSPerfAccumulate("bpu_to_ifu_bubble", bpuPtr === ifuPtr)
1022
1023  val from_bpu = io.fromBpu.resp.bits
1024  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
1025    assert(!resp.is_minimal)
1026    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
1027    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1028    val entry_len_map = (1 to PredictWidth+1).map(i =>
1029      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
1030    ).foldLeft(Map[String, UInt]())(_+_)
1031    entry_len_map
1032  }
1033  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1034  val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1035
1036  val to_ifu = io.toIfu.req.bits
1037
1038
1039
1040  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1041  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1042    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1043  ).foldLeft(Map[String, UInt]())(_+_)
1044
1045
1046
1047  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1048  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1049  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1050  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1051
1052
1053  val mbpBRights = mbpRights & commit_br_mask
1054  val mbpJRights = mbpRights & commit_jal_mask
1055  val mbpIRights = mbpRights & commit_jalr_mask
1056  val mbpCRights = mbpRights & commit_call_mask
1057  val mbpRRights = mbpRights & commit_ret_mask
1058
1059  val mbpBWrongs = mbpWrongs & commit_br_mask
1060  val mbpJWrongs = mbpWrongs & commit_jal_mask
1061  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1062  val mbpCWrongs = mbpWrongs & commit_call_mask
1063  val mbpRWrongs = mbpWrongs & commit_ret_mask
1064
1065  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1066
1067  def pred_stage_map(src: UInt, name: String) = {
1068    (0 until numBpStages).map(i =>
1069      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1070    ).foldLeft(Map[String, UInt]())(_+_)
1071  }
1072
1073  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1074  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1075  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1076  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1077  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1078  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1079
1080  val update_valid = io.toBpu.update.valid
1081  def u(cond: Bool) = update_valid && cond
1082  val ftb_false_hit = u(update.false_hit)
1083  // assert(!ftb_false_hit)
1084  val ftb_hit = u(commit_hit === h_hit)
1085
1086  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1087  val ftb_new_entry_only_br = ftb_new_entry && !update_ftb_entry.jmpValid
1088  val ftb_new_entry_only_jmp = ftb_new_entry && !update_ftb_entry.brValids(0)
1089  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update_ftb_entry.brValids(0) && update_ftb_entry.jmpValid
1090
1091  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1092
1093  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1094  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1095  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1096  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1097  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1098
1099  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1100  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1101  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1102    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1103  ).foldLeft(Map[String, UInt]())(_+_)
1104  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1105    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1106  ).foldLeft(Map[String, UInt]())(_+_)
1107
1108  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1109    f"ftq_has_entry_$i" ->( validEntries === i.U)
1110  ).foldLeft(Map[String, UInt]())(_+_)
1111
1112  val perfCountsMap = Map(
1113    "BpInstr" -> PopCount(mbpInstrs),
1114    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1115    "BpRight"  -> PopCount(mbpRights),
1116    "BpWrong"  -> PopCount(mbpWrongs),
1117    "BpBRight" -> PopCount(mbpBRights),
1118    "BpBWrong" -> PopCount(mbpBWrongs),
1119    "BpJRight" -> PopCount(mbpJRights),
1120    "BpJWrong" -> PopCount(mbpJWrongs),
1121    "BpIRight" -> PopCount(mbpIRights),
1122    "BpIWrong" -> PopCount(mbpIWrongs),
1123    "BpCRight" -> PopCount(mbpCRights),
1124    "BpCWrong" -> PopCount(mbpCWrongs),
1125    "BpRRight" -> PopCount(mbpRRights),
1126    "BpRWrong" -> PopCount(mbpRWrongs),
1127
1128    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1129    "ftb_hit"                      -> PopCount(ftb_hit),
1130    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1131    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1132    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1133    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1134    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1135    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1136    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1137    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1138    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1139    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1140  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s2_entry_len_map ++
1141  s3_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1142  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1143  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1144
1145  for((key, value) <- perfCountsMap) {
1146    XSPerfAccumulate(key, value)
1147  }
1148
1149  // --------------------------- Debug --------------------------------
1150  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1151  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1152  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1153  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1154  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1155    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1156  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1157
1158  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1159  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1160  //       case (((valid, pd), ans), taken) =>
1161  //       Mux(valid && pd.isBr,
1162  //         isWrong ^ Mux(ans.hit.asBool,
1163  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1164  //           !taken),
1165  //         !taken),
1166  //       false.B)
1167  //     }
1168  //   }
1169
1170  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1171  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1172  //       case (((valid, pd), ans), taken) =>
1173  //       Mux(valid && pd.isBr,
1174  //         isWrong ^ Mux(ans.hit.asBool,
1175  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1176  //           !taken),
1177  //         !taken),
1178  //       false.B)
1179  //     }
1180  //   }
1181
1182  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1183  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1184  //       case (((valid, pd), ans), taken) =>
1185  //       Mux(valid && pd.isBr,
1186  //         isWrong ^ (ans.taken.asBool === taken),
1187  //       false.B)
1188  //     }
1189  //   }
1190
1191  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1192  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1193  //       case (((valid, pd), ans), taken) =>
1194  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1195  //         isWrong ^ (!taken),
1196  //           false.B)
1197  //     }
1198  //   }
1199
1200  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1201  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1202  //       case (((valid, pd), ans), taken) =>
1203  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1204  //         isWrong ^ (ans.target === commitEntry.target),
1205  //           false.B)
1206  //     }
1207  //   }
1208
1209  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1210  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1211  //   // btb and ubtb pred jal and jalr as well
1212  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1213  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1214  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1215  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1216
1217  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1218  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1219
1220  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1221  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1222
1223  val perfEvents = Seq(
1224    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1225    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1226    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1227    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1228    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1229    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1230    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1231    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1232    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1233    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1234    ("BpRight                ", PopCount(mbpRights)                                                         ),
1235    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1236    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1237    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1238    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1239    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1240    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1241    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1242    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1243    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1244    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1245    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1246    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1247    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1248  )
1249  generatePerfEvent()
1250}
1251