xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 1ccea2498ffb5d5d23d1701497d16a061da1b1fd)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils.{AsyncDataModuleTemplate, CircularQueuePtr, DataModuleTemplate, HasCircularQueuePtrHelper, SRAMTemplate, SyncDataModuleTemplate, XSDebug, XSPerfAccumulate, PerfBundle, PerfEventsBundle, XSError}
23import xiangshan._
24import scala.tools.nsc.doc.model.Val
25import utils.{ParallelPriorityMux, ParallelPriorityEncoder}
26import xiangshan.backend.{CtrlToFtqIO}
27import firrtl.annotations.MemoryLoadFileType
28
29class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
30  p => p(XSCoreParamsKey).FtqSize
31){
32  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
33}
34
35object FtqPtr {
36  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
37    val ptr = Wire(new FtqPtr)
38    ptr.flag := f
39    ptr.value := v
40    ptr
41  }
42  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
43    apply(!ptr.flag, ptr.value)
44  }
45}
46
47class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
48
49  val io = IO(new Bundle() {
50    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
51    val ren = Input(Vec(numRead, Bool()))
52    val rdata = Output(Vec(numRead, gen))
53    val waddr = Input(UInt(log2Up(FtqSize).W))
54    val wen = Input(Bool())
55    val wdata = Input(gen)
56  })
57
58  for(i <- 0 until numRead){
59    val sram = Module(new SRAMTemplate(gen, FtqSize))
60    sram.io.r.req.valid := io.ren(i)
61    sram.io.r.req.bits.setIdx := io.raddr(i)
62    io.rdata(i) := sram.io.r.resp.data(0)
63    sram.io.w.req.valid := io.wen
64    sram.io.w.req.bits.setIdx := io.waddr
65    sram.io.w.req.bits.data := VecInit(io.wdata)
66  }
67
68}
69
70class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
71  // TODO: move pftAddr, oversize, carry to another mem
72  val startAddr = UInt(VAddrBits.W)
73  val nextRangeAddr = UInt(VAddrBits.W)
74  val pftAddr = UInt((log2Ceil(PredictWidth)+1).W)
75  val isNextMask = Vec(PredictWidth, Bool())
76  val oversize = Bool()
77  val carry = Bool()
78  def getPc(offset: UInt) = {
79    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits)
80    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits-1, instOffsetBits)
81    Cat(getHigher(Mux(isNextMask(offset), nextRangeAddr, startAddr)),
82        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
83  }
84  def getFallThrough() = {
85    getFallThroughAddr(this.startAddr, this.carry, this.pftAddr)
86  }
87  def fallThroughError() = {
88    !carry && startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits) > pftAddr
89  }
90  def fromBranchPrediction(resp: BranchPredictionBundle) = {
91    this.startAddr := resp.pc
92    this.nextRangeAddr := resp.pc + (FetchWidth * 4).U
93    this.pftAddr :=
94      Mux(resp.preds.hit, resp.ftb_entry.pftAddr,
95        resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U)
96    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
97      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
98    ))
99    this.oversize := Mux(resp.preds.hit, resp.ftb_entry.oversize, false.B)
100    this.carry := Mux(resp.preds.hit, resp.ftb_entry.carry, resp.pc(instOffsetBits + log2Ceil(PredictWidth)).asBool)
101    this
102  }
103  override def toPrintable: Printable = {
104    p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}"
105  }
106}
107
108class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
109  val brMask = Vec(PredictWidth, Bool())
110  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
111  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
112  val jalTarget = UInt(VAddrBits.W)
113  val rvcMask = Vec(PredictWidth, Bool())
114  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
115  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
116  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
117  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
118
119  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
120    val pds = pdWb.pd
121    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
122    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
123    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
124                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
125    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
126    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
127    this.jalTarget := pdWb.jalTarget
128  }
129
130  def toPd(offset: UInt) = {
131    require(offset.getWidth == log2Ceil(PredictWidth))
132    val pd = Wire(new PreDecodeInfo)
133    pd.valid := true.B
134    pd.isRVC := rvcMask(offset)
135    val isBr = brMask(offset)
136    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
137    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
138    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
139    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
140    pd
141  }
142}
143
144
145
146class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
147  val rasSp = UInt(log2Ceil(RasSize).W)
148  val rasEntry = new RASEntry
149  val specCnt = Vec(numBr, UInt(10.W))
150  // val ghist = new ShiftingGlobalHistory
151  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
152  val histPtr = new CGHPtr
153  val phist = UInt(PathHistoryLength.W)
154  val phNewBit = UInt(1.W)
155
156  def fromBranchPrediction(resp: BranchPredictionBundle) = {
157    this.rasSp := resp.rasSp
158    this.rasEntry := resp.rasTop
159    this.specCnt := resp.specCnt
160    // this.ghist := resp.ghist
161    this.folded_hist := resp.folded_hist
162    this.histPtr := resp.histPtr
163    this.phist := resp.phist
164    this.phNewBit := resp.pc(instOffsetBits)
165    this
166  }
167}
168
169class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
170  val meta = UInt(MaxMetaLength.W)
171}
172
173class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
174  val target = UInt(VAddrBits.W)
175  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
176}
177
178// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
179//   val startAddr = UInt(VAddrBits.W)
180//   val fallThruAddr = UInt(VAddrBits.W)
181//   val isNextMask = Vec(PredictWidth, Bool())
182
183//   val meta = UInt(MaxMetaLength.W)
184
185//   val rasSp = UInt(log2Ceil(RasSize).W)
186//   val rasEntry = new RASEntry
187//   val hist = new ShiftingGlobalHistory
188//   val specCnt = Vec(numBr, UInt(10.W))
189
190//   val valids = Vec(PredictWidth, Bool())
191//   val brMask = Vec(PredictWidth, Bool())
192//   // isJalr, isCall, isRet
193//   val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
194//   val jmpOffset = UInt(log2Ceil(PredictWidth).W)
195
196//   val mispredVec = Vec(PredictWidth, Bool())
197//   val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
198//   val target = UInt(VAddrBits.W)
199// }
200
201class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
202  val ptr = Output(new FtqPtr)
203  val offset = Output(UInt(log2Ceil(PredictWidth).W))
204  val data = Input(gen)
205  def apply(ptr: FtqPtr, offset: UInt) = {
206    this.ptr := ptr
207    this.offset := offset
208    this.data
209  }
210  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
211}
212
213
214class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
215  val redirect = Valid(new BranchPredictionRedirect)
216  val update = Valid(new BranchPredictionUpdate)
217  val enq_ptr = Output(new FtqPtr)
218}
219
220class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
221  val req = Decoupled(new FetchRequestBundle)
222  val redirect = Valid(new Redirect)
223  val flushFromBpu = new Bundle {
224    // when ifu pipeline is not stalled,
225    // a packet from bpu s3 can reach f1 at most
226    val s2 = Valid(new FtqPtr)
227    val s3 = Valid(new FtqPtr)
228    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
229      src.valid && !isAfter(src.bits, idx_to_flush)
230    }
231    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
232    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
233  }
234}
235
236trait HasBackendRedirectInfo extends HasXSParameter {
237  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
238  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
239}
240
241class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
242  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
243  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
244  def getJumpPcRead = pc_reads.head
245  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
246  def getMemPredPcRead = pc_reads.init.last
247  def getRobFlushPcRead = pc_reads.last
248}
249
250
251class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
252  val io = IO(new Bundle {
253    val start_addr = Input(UInt(VAddrBits.W))
254    val old_entry = Input(new FTBEntry)
255    val pd = Input(new Ftq_pd_Entry)
256    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
257    val target = Input(UInt(VAddrBits.W))
258    val hit = Input(Bool())
259    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
260
261    val new_entry = Output(new FTBEntry)
262    val new_br_insert_pos = Output(Vec(numBr, Bool()))
263    val taken_mask = Output(Vec(numBr, Bool()))
264    val mispred_mask = Output(Vec(numBr+1, Bool()))
265
266    // for perf counters
267    val is_init_entry = Output(Bool())
268    val is_old_entry = Output(Bool())
269    val is_new_br = Output(Bool())
270    val is_jalr_target_modified = Output(Bool())
271    val is_always_taken_modified = Output(Bool())
272    val is_br_full = Output(Bool())
273  })
274
275  // no mispredictions detected at predecode
276  val hit = io.hit
277  val pd = io.pd
278
279  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
280
281
282  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
283  val entry_has_jmp = pd.jmpInfo.valid
284  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
285  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
286  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
287  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
288  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
289  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
290
291  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
292  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
293
294  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
295  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
296  // if not hit, establish a new entry
297  init_entry.valid := true.B
298  // tag is left for ftb to assign
299
300  // case br
301  val init_br_slot = init_entry.getSlotForBr(0)
302  when (cfi_is_br) {
303    init_br_slot.valid := true.B
304    init_br_slot.offset := io.cfiIndex.bits
305    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
306    init_entry.always_taken(0) := true.B // set to always taken on init
307  }
308  // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
309
310  // case jmp
311  when (entry_has_jmp) {
312    init_entry.tailSlot.offset := pd.jmpOffset
313    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
314    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
315  }
316
317  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
318  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
319  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
320  init_entry.isJalr := new_jmp_is_jalr
321  init_entry.isCall := new_jmp_is_call
322  init_entry.isRet  := new_jmp_is_ret
323  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
324
325  init_entry.oversize := last_br_rvi || last_jmp_rvi
326
327  // if hit, check whether a new cfi(only br is possible) is detected
328  val oe = io.old_entry
329  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
330  val br_recorded = br_recorded_vec.asUInt.orR
331  val is_new_br = cfi_is_br && !br_recorded
332  val new_br_offset = io.cfiIndex.bits
333  // vec(i) means new br will be inserted BEFORE old br(i)
334  val allBrSlotsVec = oe.allSlotsForBr
335  val new_br_insert_onehot = VecInit((0 until numBr).map{
336    i => i match {
337      case 0 =>
338        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
339      case idx =>
340        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
341        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
342    }
343  })
344
345  val old_entry_modified = WireInit(io.old_entry)
346  for (i <- 0 until numBr) {
347    val slot = old_entry_modified.allSlotsForBr(i)
348    when (new_br_insert_onehot(i)) {
349      slot.valid := true.B
350      slot.offset := new_br_offset
351      slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
352      old_entry_modified.always_taken(i) := true.B
353    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
354      old_entry_modified.always_taken(i) := false.B
355      // all other fields remain unchanged
356    }.otherwise {
357      // case i == 0, remain unchanged
358      if (i != 0) {
359        val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
360        when (!noNeedToMoveFromFormerSlot) {
361          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
362          old_entry_modified.always_taken(i) := oe.always_taken(i)
363        }
364      }
365    }
366  }
367
368  // two circumstances:
369  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
370  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
371  //        the previous last br or the new br
372  val may_have_to_replace = oe.noEmptySlotForNewBr
373  val pft_need_to_change = is_new_br && may_have_to_replace
374  // it should either be the given last br or the new br
375  when (pft_need_to_change) {
376    val new_pft_offset =
377      Mux(!new_br_insert_onehot.asUInt.orR,
378        new_br_offset, oe.allSlotsForBr.last.offset)
379
380    // set jmp to invalid
381    if (!shareTailSlot) {
382      old_entry_modified.tailSlot.valid := false.B
383    }
384    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
385    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
386    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
387    old_entry_modified.oversize := false.B
388    old_entry_modified.isCall := false.B
389    old_entry_modified.isRet := false.B
390    old_entry_modified.isJalr := false.B
391  }
392
393  val old_entry_jmp_target_modified = WireInit(oe)
394  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
395  val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B
396  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
397  when (jalr_target_modified) {
398    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
399    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
400  }
401
402  val old_entry_always_taken = WireInit(oe)
403  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
404  for (i <- 0 until numBr) {
405    old_entry_always_taken.always_taken(i) :=
406      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
407    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
408  }
409  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
410
411
412
413  val derived_from_old_entry =
414    Mux(is_new_br, old_entry_modified,
415      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
416
417
418  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
419
420  io.new_br_insert_pos := new_br_insert_onehot
421  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
422    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
423  })
424  for (i <- 0 until numBr) {
425    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
426  }
427  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
428
429  // for perf counters
430  io.is_init_entry := !hit
431  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
432  io.is_new_br := hit && is_new_br
433  io.is_jalr_target_modified := hit && jalr_target_modified
434  io.is_always_taken_modified := hit && always_taken_modified
435  io.is_br_full := hit && is_new_br && may_have_to_replace
436}
437
438class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
439  with HasBackendRedirectInfo with BPUUtils with HasBPUConst {
440  val io = IO(new Bundle {
441    val fromBpu = Flipped(new BpuToFtqIO)
442    val fromIfu = Flipped(new IfuToFtqIO)
443    val fromBackend = Flipped(new CtrlToFtqIO)
444
445    val toBpu = new FtqToBpuIO
446    val toIfu = new FtqToIfuIO
447    val toBackend = new FtqToCtrlIO
448
449    val bpuInfo = new Bundle {
450      val bpRight = Output(UInt(XLEN.W))
451      val bpWrong = Output(UInt(XLEN.W))
452    }
453  })
454  io.bpuInfo := DontCare
455
456  val robFlush = io.fromBackend.robFlush
457  val stage2Redirect = io.fromBackend.stage2Redirect
458  val stage3Redirect = io.fromBackend.stage3Redirect
459
460  val stage2Flush = stage2Redirect.valid || robFlush.valid
461  val backendFlush = stage2Flush || RegNext(stage2Flush)
462  val ifuFlush = Wire(Bool())
463
464  val flush = stage2Flush || RegNext(stage2Flush)
465
466  val allowBpuIn, allowToIfu = WireInit(false.B)
467  val flushToIfu = !allowToIfu
468  allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
469  allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
470
471  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
472  val validEntries = distanceBetween(bpuPtr, commPtr)
473
474  // **********************************************************************
475  // **************************** enq from bpu ****************************
476  // **********************************************************************
477  val new_entry_ready = validEntries < FtqSize.U
478  io.fromBpu.resp.ready := new_entry_ready
479
480  val bpu_s2_resp = io.fromBpu.resp.bits.s2
481  val bpu_s3_resp = io.fromBpu.resp.bits.s3
482  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
483  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
484
485  io.toBpu.enq_ptr := bpuPtr
486  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
487  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
488
489  val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp)
490  val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx)
491  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
492  val bpu_in_resp_idx = bpu_in_resp_ptr.value
493
494  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
495  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
496  // resp from uBTB
497  ftq_pc_mem.io.wen(0) := bpu_in_fire
498  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
499  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
500
501  //                                                            ifuRedirect + backendRedirect + commit
502  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
503  // these info is intended to enq at the last stage of bpu
504  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
505  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
506  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
507
508  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
509  // these info is intended to enq at the last stage of bpu
510  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
511  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
512  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
513  //                                                            ifuRedirect + backendRedirect + commit
514  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
515  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
516  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
517  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
518
519
520  // multi-write
521  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
522  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
523  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
524  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
525
526  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
527  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
528    VecInit(Seq.fill(PredictWidth)(c_invalid))
529  }))
530
531  val f_to_send :: f_sent :: Nil = Enum(2)
532  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
533
534  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
535  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
536
537
538  when (bpu_in_fire) {
539    entry_fetch_status(bpu_in_resp_idx) := f_to_send
540    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
541    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex
542    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
543    update_target(bpu_in_resp_idx) := bpu_in_resp.target
544    pred_stage(bpu_in_resp_idx) := bpu_in_stage
545  }
546
547  bpuPtr := bpuPtr + enq_fire
548  ifuPtr := ifuPtr + io.toIfu.req.fire
549
550  // only use ftb result to assign hit status
551  when (bpu_s2_resp.valid) {
552    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit)
553  }
554
555
556  io.toIfu.flushFromBpu.s2.valid := bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
557  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
558  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
559    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
560    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
561    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
562      ifuPtr := bpu_s2_resp.ftq_idx
563    }
564  }
565
566  io.toIfu.flushFromBpu.s3.valid := bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
567  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
568  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
569    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
570    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
571    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
572      ifuPtr := bpu_s3_resp.ftq_idx
573    }
574    XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
575  }
576
577  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
578
579  // ****************************************************************
580  // **************************** to ifu ****************************
581  // ****************************************************************
582  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
583  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
584  val last_cycle_bpu_in = RegNext(bpu_in_fire)
585  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
586
587  // read pc and target
588  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
589  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
590
591  val toIfuReq = Wire(chiselTypeOf(io.toIfu.req))
592
593  toIfuReq.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
594  toIfuReq.bits.ftqIdx := ifuPtr
595  toIfuReq.bits.target := update_target(ifuPtr.value)
596  toIfuReq.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
597  toIfuReq.bits.fallThruError  := false.B
598
599  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
600    toIfuReq.bits.fromFtqPcBundle(bpu_in_bypass_buf)
601  }.elsewhen (last_cycle_to_ifu_fire) {
602    toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
603  }.otherwise {
604    toIfuReq.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
605  }
606
607  io.toIfu.req <> toIfuReq
608
609  // when fall through is smaller in value than start address, there must be a false hit
610  when (toIfuReq.bits.fallThroughError() && entry_hit_status(ifuPtr.value) === h_hit) {
611    when (io.toIfu.req.fire &&
612      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
613      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
614    ) {
615      entry_hit_status(ifuPtr.value) := h_false_hit
616      XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
617    }
618    io.toIfu.req.bits.fallThruAddr   := toIfuReq.bits.startAddr + (FetchWidth*4).U
619    io.toIfu.req.bits.fallThruError  := true.B
620    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", toIfuReq.bits.startAddr, toIfuReq.bits.fallThruAddr)
621  }
622
623  val ifu_req_should_be_flushed =
624    io.toIfu.flushFromBpu.shouldFlushByStage2(toIfuReq.bits.ftqIdx) ||
625    io.toIfu.flushFromBpu.shouldFlushByStage3(toIfuReq.bits.ftqIdx)
626
627  when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
628    entry_fetch_status(ifuPtr.value) := f_sent
629  }
630
631
632  // *********************************************************************
633  // **************************** wb from ifu ****************************
634  // *********************************************************************
635  val pdWb = io.fromIfu.pdWb
636  val pds = pdWb.bits.pd
637  val ifu_wb_valid = pdWb.valid
638  val ifu_wb_idx = pdWb.bits.ftqIdx.value
639  // read ports:                                                         commit update
640  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
641  ftq_pd_mem.io.wen(0) := ifu_wb_valid
642  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
643  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
644
645  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
646  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
647  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
648  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
649  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
650  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
651
652  when (ifu_wb_valid) {
653    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
654      case (v, inRange) => v && inRange
655    })
656    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
657      case (qe, v) => when (v) { qe := c_valid }
658    }
659  }
660
661  ifuWbPtr := ifuWbPtr + ifu_wb_valid
662
663  ftb_entry_mem.io.raddr.head := ifu_wb_idx
664  val has_false_hit = WireInit(false.B)
665  when (RegNext(hit_pd_valid)) {
666    // check for false hit
667    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
668    val brSlots = pred_ftb_entry.brSlots
669    val tailSlot = pred_ftb_entry.tailSlot
670    // we check cfis that bpu predicted
671
672    // bpu predicted branches but denied by predecode
673    val br_false_hit =
674      brSlots.map{
675        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
676      }.reduce(_||_) ||
677      (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
678        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
679
680    val jmpOffset = tailSlot.offset
681    val jmp_pd = pd_reg(jmpOffset)
682    val jal_false_hit = pred_ftb_entry.jmpValid &&
683      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
684       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
685       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
686       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
687      )
688
689    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
690    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
691
692    // assert(!has_false_hit)
693  }
694
695  when (has_false_hit) {
696    entry_hit_status(wb_idx_reg) := h_false_hit
697  }
698
699
700  // **********************************************************************
701  // **************************** backend read ****************************
702  // **********************************************************************
703
704  // pc reads
705  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
706    ftq_pc_mem.io.raddr(i) := req.ptr.value
707    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
708  }
709  // target read
710  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
711
712  // *******************************************************************************
713  // **************************** redirect from backend ****************************
714  // *******************************************************************************
715
716  // redirect read cfiInfo, couples to redirectGen s2
717  ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid
718  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
719
720  ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
721
722  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
723  val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect)
724  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
725  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
726
727  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
728  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
729
730  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
731    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
732      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
733      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
734
735    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
736        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
737  }.otherwise {
738    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
739    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
740  }
741
742
743  // ***************************************************************************
744  // **************************** redirect from ifu ****************************
745  // ***************************************************************************
746  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
747  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
748  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
749  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
750  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
751
752  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
753  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
754  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
755  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
756  ifuRedirectCfiUpdate.target := pdWb.bits.target
757  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
758  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
759
760  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
761  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
762  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
763
764  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
765  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
766
767  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
768
769  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
770  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
771  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
772    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
773  }
774
775  // *********************************************************************
776  // **************************** wb from exu ****************************
777  // *********************************************************************
778
779  def extractRedirectInfo(wb: Valid[Redirect]) = {
780    val ftqIdx = wb.bits.ftqIdx.value
781    val ftqOffset = wb.bits.ftqOffset
782    val taken = wb.bits.cfiUpdate.taken
783    val mispred = wb.bits.cfiUpdate.isMisPred
784    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
785  }
786
787  // fix mispredict entry
788  val lastIsMispredict = RegNext(
789    stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B
790  )
791
792  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
793    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
794    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
795    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
796    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
797      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
798    }
799    when (cfiIndex_bits_wen) {
800      cfiIndex_vec(r_idx).bits := r_offset
801    }
802    update_target(r_idx) := redirect.bits.cfiUpdate.target
803    if (isBackend) {
804      mispredict_vec(r_idx)(r_offset) := r_mispred
805    }
806  }
807
808  when(stage3Redirect.valid && lastIsMispredict) {
809    updateCfiInfo(stage3Redirect)
810  }.elsewhen (ifuRedirectToBpu.valid) {
811    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
812  }
813
814  // ***********************************************************************************
815  // **************************** flush ptr and state queue ****************************
816  // ***********************************************************************************
817
818  class RedirectInfo extends Bundle {
819    val valid = Bool()
820    val ftqIdx = new FtqPtr
821    val ftqOffset = UInt(log2Ceil(PredictWidth).W)
822    val flushItSelf = Bool()
823    def apply(redirect: Valid[Redirect]) = {
824      this.valid := redirect.valid
825      this.ftqIdx := redirect.bits.ftqIdx
826      this.ftqOffset := redirect.bits.ftqOffset
827      this.flushItSelf := RedirectLevel.flushItself(redirect.bits.level)
828      this
829    }
830  }
831  val redirectVec = Wire(Vec(3, new RedirectInfo))
832  val robRedirect = robFlush
833
834  redirectVec.zip(Seq(robRedirect, stage2Redirect, fromIfuRedirect)).map {
835    case (ve, r) => ve(r)
836  }
837
838  // when redirect, we should reset ptrs and status queues
839  when(redirectVec.map(r => r.valid).reduce(_||_)){
840    val r = PriorityMux(redirectVec.map(r => (r.valid -> r)))
841    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
842    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, r.flushItSelf)
843    val next = idx + 1.U
844    bpuPtr := next
845    ifuPtr := next
846    ifuWbPtr := next
847    when (notIfu) {
848      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
849        when(i.U > offset || i.U === offset && flushItSelf){
850          s := c_invalid
851        }
852      })
853    }
854  }
855
856  // only the valid bit is actually needed
857  io.toIfu.redirect.bits    := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits)
858  io.toIfu.redirect.valid   := stage2Flush
859
860  // commit
861  for (c <- io.fromBackend.rob_commits) {
862    when(c.valid) {
863      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
864      // TODO: remove this
865      // For instruction fusions, we also update the next instruction
866      when (c.bits.commitType === 4.U) {
867        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
868      }.elsewhen(c.bits.commitType === 5.U) {
869        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
870      }.elsewhen(c.bits.commitType === 6.U) {
871        val index = (c.bits.ftqIdx + 1.U).value
872        commitStateQueue(index)(0) := c_commited
873      }.elsewhen(c.bits.commitType === 7.U) {
874        val index = (c.bits.ftqIdx + 1.U).value
875        commitStateQueue(index)(1) := c_commited
876      }
877    }
878  }
879
880  // ****************************************************************
881  // **************************** to bpu ****************************
882  // ****************************************************************
883
884  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
885
886  val may_have_stall_from_bpu = RegInit(false.B)
887  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
888    Cat(commitStateQueue(commPtr.value).map(s => {
889      s === c_invalid || s === c_commited
890    })).andR()
891
892  // commit reads
893  ftq_pc_mem.io.raddr.last := commPtr.value
894  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
895  ftq_pd_mem.io.raddr.last := commPtr.value
896  val commit_pd = ftq_pd_mem.io.rdata.last
897  ftq_redirect_sram.io.ren.last := canCommit
898  ftq_redirect_sram.io.raddr.last := commPtr.value
899  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
900  ftq_meta_1r_sram.io.ren(0) := canCommit
901  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
902  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
903  ftb_entry_mem.io.raddr.last := commPtr.value
904  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
905
906  // need one cycle to read mem and srams
907  val do_commit_ptr = RegNext(commPtr)
908  val do_commit = RegNext(canCommit, init=false.B)
909  when (canCommit) { commPtr := commPtr + 1.U }
910  val commit_state = RegNext(commitStateQueue(commPtr.value))
911  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
912  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
913    can_commit_cfi.valid := false.B
914  }
915  val commit_cfi = RegNext(can_commit_cfi)
916
917  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
918    case (mis, state) => mis && state === c_commited
919  })
920  val can_commit_hit = entry_hit_status(commPtr.value)
921  val commit_hit = RegNext(can_commit_hit)
922  val commit_target = RegNext(update_target(commPtr.value))
923  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
924
925  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
926  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
927
928  io.toBpu.update := DontCare
929  io.toBpu.update.valid := commit_valid && do_commit
930  val update = io.toBpu.update.bits
931  update.false_hit   := commit_hit === h_false_hit
932  update.pc          := commit_pc_bundle.startAddr
933  update.preds.hit   := commit_hit === h_hit || commit_hit === h_false_hit
934  update.meta        := commit_meta.meta
935  update.full_target := commit_target
936  update.fromFtqRedirectSram(commit_spec_meta)
937
938  val commit_real_hit = commit_hit === h_hit
939  val update_ftb_entry = update.ftb_entry
940
941  val ftbEntryGen = Module(new FTBEntryGen).io
942  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
943  ftbEntryGen.old_entry      := commit_ftb_entry
944  ftbEntryGen.pd             := commit_pd
945  ftbEntryGen.cfiIndex       := commit_cfi
946  ftbEntryGen.target         := commit_target
947  ftbEntryGen.hit            := commit_real_hit
948  ftbEntryGen.mispredict_vec := commit_mispredict
949
950  update_ftb_entry         := ftbEntryGen.new_entry
951  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
952  update.mispred_mask      := ftbEntryGen.mispred_mask
953  update.old_entry         := ftbEntryGen.is_old_entry
954  update.preds.br_taken_mask  := ftbEntryGen.taken_mask
955
956  // ******************************************************************************
957  // **************************** commit perf counters ****************************
958  // ******************************************************************************
959
960  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
961  val commit_mispred_mask = commit_mispredict.asUInt
962  val commit_not_mispred_mask = ~commit_mispred_mask
963
964  val commit_br_mask = commit_pd.brMask.asUInt
965  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
966  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
967
968  val mbpInstrs = commit_inst_mask & commit_cfi_mask
969
970  val mbpRights = mbpInstrs & commit_not_mispred_mask
971  val mbpWrongs = mbpInstrs & commit_mispred_mask
972
973  io.bpuInfo.bpRight := PopCount(mbpRights)
974  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
975
976  // Cfi Info
977  for (i <- 0 until PredictWidth) {
978    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
979    val v = commit_state(i) === c_commited
980    val isBr = commit_pd.brMask(i)
981    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
982    val isCfi = isBr || isJmp
983    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
984    val misPred = commit_mispredict(i)
985    // val ghist = commit_spec_meta.ghist.predHist
986    val histPtr = commit_spec_meta.histPtr
987    val predCycle = commit_meta.meta(63, 0)
988    val target = commit_target
989
990    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
991    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
992    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
993    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
994    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
995    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
996    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
997  }
998
999  val enq = io.fromBpu.resp
1000  val perf_redirect = io.fromBackend.stage2Redirect
1001
1002  XSPerfAccumulate("entry", validEntries)
1003  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
1004  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
1005  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
1006  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
1007
1008  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
1009
1010  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
1011  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
1012  XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr)
1013
1014  val from_bpu = io.fromBpu.resp.bits
1015  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
1016    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
1017    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
1018    val entry_len_map = (1 to PredictWidth+1).map(i =>
1019      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
1020    ).foldLeft(Map[String, UInt]())(_+_)
1021    entry_len_map
1022  }
1023  val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1")
1024  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1025  val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1026
1027  val to_ifu = io.toIfu.req.bits
1028  val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits
1029  val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U)
1030  val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i =>
1031    f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire)
1032  ).foldLeft(Map[String, UInt]())(_+_)
1033
1034
1035
1036  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1037  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1038    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1039  ).foldLeft(Map[String, UInt]())(_+_)
1040
1041
1042
1043  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1044  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1045  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1046  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1047
1048
1049  val mbpBRights = mbpRights & commit_br_mask
1050  val mbpJRights = mbpRights & commit_jal_mask
1051  val mbpIRights = mbpRights & commit_jalr_mask
1052  val mbpCRights = mbpRights & commit_call_mask
1053  val mbpRRights = mbpRights & commit_ret_mask
1054
1055  val mbpBWrongs = mbpWrongs & commit_br_mask
1056  val mbpJWrongs = mbpWrongs & commit_jal_mask
1057  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1058  val mbpCWrongs = mbpWrongs & commit_call_mask
1059  val mbpRWrongs = mbpWrongs & commit_ret_mask
1060
1061  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1062
1063  def pred_stage_map(src: UInt, name: String) = {
1064    (0 until numBpStages).map(i =>
1065      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1066    ).foldLeft(Map[String, UInt]())(_+_)
1067  }
1068
1069  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1070  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1071  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1072  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1073  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1074  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1075
1076  val update_valid = io.toBpu.update.valid
1077  def u(cond: Bool) = update_valid && cond
1078  val ftb_false_hit = u(update.false_hit)
1079  // assert(!ftb_false_hit)
1080  val ftb_hit = u(commit_hit === h_hit)
1081
1082  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1083  val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid
1084  val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0)
1085  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid
1086
1087  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1088
1089  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1090  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1091  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1092  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1093  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1094
1095  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1096  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1097  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1098    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1099  ).foldLeft(Map[String, UInt]())(_+_)
1100  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1101    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1102  ).foldLeft(Map[String, UInt]())(_+_)
1103
1104  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1105    f"ftq_has_entry_$i" ->( validEntries === i.U)
1106  ).foldLeft(Map[String, UInt]())(_+_)
1107
1108  val perfCountsMap = Map(
1109    "BpInstr" -> PopCount(mbpInstrs),
1110    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1111    "BpRight"  -> PopCount(mbpRights),
1112    "BpWrong"  -> PopCount(mbpWrongs),
1113    "BpBRight" -> PopCount(mbpBRights),
1114    "BpBWrong" -> PopCount(mbpBWrongs),
1115    "BpJRight" -> PopCount(mbpJRights),
1116    "BpJWrong" -> PopCount(mbpJWrongs),
1117    "BpIRight" -> PopCount(mbpIRights),
1118    "BpIWrong" -> PopCount(mbpIWrongs),
1119    "BpCRight" -> PopCount(mbpCRights),
1120    "BpCWrong" -> PopCount(mbpCWrongs),
1121    "BpRRight" -> PopCount(mbpRRights),
1122    "BpRWrong" -> PopCount(mbpRWrongs),
1123
1124    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1125    "ftb_hit"                      -> PopCount(ftb_hit),
1126    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1127    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1128    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1129    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1130    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1131    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1132    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1133    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1134    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1135    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1136  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++
1137  s2_entry_len_map ++ s3_entry_len_map ++
1138  to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1139  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1140  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1141
1142  for((key, value) <- perfCountsMap) {
1143    XSPerfAccumulate(key, value)
1144  }
1145
1146  // --------------------------- Debug --------------------------------
1147  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1148  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1149  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1150  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1151  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1152    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1153  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1154
1155  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1156  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1157  //       case (((valid, pd), ans), taken) =>
1158  //       Mux(valid && pd.isBr,
1159  //         isWrong ^ Mux(ans.hit.asBool,
1160  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1161  //           !taken),
1162  //         !taken),
1163  //       false.B)
1164  //     }
1165  //   }
1166
1167  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1168  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1169  //       case (((valid, pd), ans), taken) =>
1170  //       Mux(valid && pd.isBr,
1171  //         isWrong ^ Mux(ans.hit.asBool,
1172  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1173  //           !taken),
1174  //         !taken),
1175  //       false.B)
1176  //     }
1177  //   }
1178
1179  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1180  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1181  //       case (((valid, pd), ans), taken) =>
1182  //       Mux(valid && pd.isBr,
1183  //         isWrong ^ (ans.taken.asBool === taken),
1184  //       false.B)
1185  //     }
1186  //   }
1187
1188  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1189  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1190  //       case (((valid, pd), ans), taken) =>
1191  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1192  //         isWrong ^ (!taken),
1193  //           false.B)
1194  //     }
1195  //   }
1196
1197  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1198  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1199  //       case (((valid, pd), ans), taken) =>
1200  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1201  //         isWrong ^ (ans.target === commitEntry.target),
1202  //           false.B)
1203  //     }
1204  //   }
1205
1206  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1207  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1208  //   // btb and ubtb pred jal and jalr as well
1209  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1210  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1211  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1212  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1213
1214  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1215  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1216
1217  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1218  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1219  val perfinfo = IO(new Bundle(){
1220    val perfEvents = Output(new PerfEventsBundle(22))
1221  })
1222  val perfEvents = Seq(
1223    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1224    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1225    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1226    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1227    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1228    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1229    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1230    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1231    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1232    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1233    ("BpRight                ", PopCount(mbpRights)                                                         ),
1234    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1235    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1236    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1237    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1238    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1239    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1240    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1241    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1242    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1243    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1244    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1245    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1246    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1247  )
1248
1249  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
1250    perf_out.incr_step := RegNext(perf)
1251  }
1252}
1253