xref: /XiangShan/src/main/scala/xiangshan/frontend/NewFtq.scala (revision 1ca0e4f33f402f31daec0e57d270079d2db13562)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import xiangshan._
24import xiangshan.backend.CtrlToFtqIO
25
26class FtqPtr(implicit p: Parameters) extends CircularQueuePtr[FtqPtr](
27  p => p(XSCoreParamsKey).FtqSize
28){
29  override def cloneType = (new FtqPtr).asInstanceOf[this.type]
30}
31
32object FtqPtr {
33  def apply(f: Bool, v: UInt)(implicit p: Parameters): FtqPtr = {
34    val ptr = Wire(new FtqPtr)
35    ptr.flag := f
36    ptr.value := v
37    ptr
38  }
39  def inverse(ptr: FtqPtr)(implicit p: Parameters): FtqPtr = {
40    apply(!ptr.flag, ptr.value)
41  }
42}
43
44class FtqNRSRAM[T <: Data](gen: T, numRead: Int)(implicit p: Parameters) extends XSModule {
45
46  val io = IO(new Bundle() {
47    val raddr = Input(Vec(numRead, UInt(log2Up(FtqSize).W)))
48    val ren = Input(Vec(numRead, Bool()))
49    val rdata = Output(Vec(numRead, gen))
50    val waddr = Input(UInt(log2Up(FtqSize).W))
51    val wen = Input(Bool())
52    val wdata = Input(gen)
53  })
54
55  for(i <- 0 until numRead){
56    val sram = Module(new SRAMTemplate(gen, FtqSize))
57    sram.io.r.req.valid := io.ren(i)
58    sram.io.r.req.bits.setIdx := io.raddr(i)
59    io.rdata(i) := sram.io.r.resp.data(0)
60    sram.io.w.req.valid := io.wen
61    sram.io.w.req.bits.setIdx := io.waddr
62    sram.io.w.req.bits.data := VecInit(io.wdata)
63  }
64
65}
66
67class Ftq_RF_Components(implicit p: Parameters) extends XSBundle with BPUUtils {
68  // TODO: move pftAddr, oversize, carry to another mem
69  val startAddr = UInt(VAddrBits.W)
70  val nextRangeAddr = UInt(VAddrBits.W)
71  val pftAddr = UInt((log2Ceil(PredictWidth)+1).W)
72  val isNextMask = Vec(PredictWidth, Bool())
73  val oversize = Bool()
74  val carry = Bool()
75  def getPc(offset: UInt) = {
76    def getHigher(pc: UInt) = pc(VAddrBits-1, log2Ceil(PredictWidth)+instOffsetBits+1)
77    def getOffset(pc: UInt) = pc(log2Ceil(PredictWidth)+instOffsetBits, instOffsetBits)
78    Cat(getHigher(Mux(isNextMask(offset) && startAddr(log2Ceil(PredictWidth)+instOffsetBits), nextRangeAddr, startAddr)),
79        getOffset(startAddr)+offset, 0.U(instOffsetBits.W))
80  }
81  def getFallThrough() = {
82    def getHigher(pc: UInt) = pc.head(VAddrBits-log2Ceil(PredictWidth)-instOffsetBits-1)
83    val startHigher = getHigher(startAddr)
84    val nextHigher  = getHigher(nextRangeAddr)
85    val higher = Mux(carry, nextHigher, startHigher)
86    Cat(higher, pftAddr, 0.U(instOffsetBits.W))
87  }
88  def fallThroughError() = {
89    val startLower        = Cat(0.U(1.W), startAddr(instOffsetBits+log2Ceil(PredictWidth), instOffsetBits))
90    val endLowerwithCarry = Cat(carry,    pftAddr)
91    require(startLower.getWidth == log2Ceil(PredictWidth)+2)
92    require(endLowerwithCarry.getWidth == log2Ceil(PredictWidth)+2)
93    startLower >= endLowerwithCarry || (endLowerwithCarry - startLower) > (PredictWidth+1).U
94  }
95  def fromBranchPrediction(resp: BranchPredictionBundle) = {
96    this.startAddr := resp.pc
97    this.nextRangeAddr := resp.pc + (FetchWidth * 4 * 2).U
98    this.pftAddr :=
99      Mux(resp.preds.hit, resp.ftb_entry.pftAddr,
100        resp.pc(instOffsetBits + log2Ceil(PredictWidth), instOffsetBits) ^ (1 << log2Ceil(PredictWidth)).U)
101    this.isNextMask := VecInit((0 until PredictWidth).map(i =>
102      (resp.pc(log2Ceil(PredictWidth), 1) +& i.U)(log2Ceil(PredictWidth)).asBool()
103    ))
104    this.oversize := Mux(resp.preds.hit, resp.ftb_entry.oversize, false.B)
105    this.carry := Mux(resp.preds.hit, resp.ftb_entry.carry, resp.pc(instOffsetBits + log2Ceil(PredictWidth)).asBool)
106    this
107  }
108  override def toPrintable: Printable = {
109    p"startAddr:${Hexadecimal(startAddr)}, fallThru:${Hexadecimal(getFallThrough())}"
110  }
111}
112
113class Ftq_pd_Entry(implicit p: Parameters) extends XSBundle {
114  val brMask = Vec(PredictWidth, Bool())
115  val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
116  val jmpOffset = UInt(log2Ceil(PredictWidth).W)
117  val jalTarget = UInt(VAddrBits.W)
118  val rvcMask = Vec(PredictWidth, Bool())
119  def hasJal  = jmpInfo.valid && !jmpInfo.bits(0)
120  def hasJalr = jmpInfo.valid && jmpInfo.bits(0)
121  def hasCall = jmpInfo.valid && jmpInfo.bits(1)
122  def hasRet  = jmpInfo.valid && jmpInfo.bits(2)
123
124  def fromPdWb(pdWb: PredecodeWritebackBundle) = {
125    val pds = pdWb.pd
126    this.brMask := VecInit(pds.map(pd => pd.isBr && pd.valid))
127    this.jmpInfo.valid := VecInit(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid)).asUInt.orR
128    this.jmpInfo.bits := ParallelPriorityMux(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid),
129                                             pds.map(pd => VecInit(pd.isJalr, pd.isCall, pd.isRet)))
130    this.jmpOffset := ParallelPriorityEncoder(pds.map(pd => (pd.isJal || pd.isJalr) && pd.valid))
131    this.rvcMask := VecInit(pds.map(pd => pd.isRVC))
132    this.jalTarget := pdWb.jalTarget
133  }
134
135  def toPd(offset: UInt) = {
136    require(offset.getWidth == log2Ceil(PredictWidth))
137    val pd = Wire(new PreDecodeInfo)
138    pd.valid := true.B
139    pd.isRVC := rvcMask(offset)
140    val isBr = brMask(offset)
141    val isJalr = offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(0)
142    pd.brType := Cat(offset === jmpOffset && jmpInfo.valid, isJalr || isBr)
143    pd.isCall := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(1)
144    pd.isRet  := offset === jmpOffset && jmpInfo.valid && jmpInfo.bits(2)
145    pd
146  }
147}
148
149
150
151class Ftq_Redirect_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
152  val rasSp = UInt(log2Ceil(RasSize).W)
153  val rasEntry = new RASEntry
154  val specCnt = Vec(numBr, UInt(10.W))
155  // val ghist = new ShiftingGlobalHistory
156  val folded_hist = new AllFoldedHistories(foldedGHistInfos)
157  val histPtr = new CGHPtr
158  val phist = UInt(PathHistoryLength.W)
159  val phNewBit = UInt(1.W)
160
161  def fromBranchPrediction(resp: BranchPredictionBundle) = {
162    this.rasSp := resp.rasSp
163    this.rasEntry := resp.rasTop
164    this.specCnt := resp.specCnt
165    // this.ghist := resp.ghist
166    this.folded_hist := resp.folded_hist
167    this.histPtr := resp.histPtr
168    this.phist := resp.phist
169    this.phNewBit := resp.pc(instOffsetBits)
170    this
171  }
172}
173
174class Ftq_1R_SRAMEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
175  val meta = UInt(MaxMetaLength.W)
176}
177
178class Ftq_Pred_Info(implicit p: Parameters) extends XSBundle {
179  val target = UInt(VAddrBits.W)
180  val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
181}
182
183// class FtqEntry(implicit p: Parameters) extends XSBundle with HasBPUConst {
184//   val startAddr = UInt(VAddrBits.W)
185//   val fallThruAddr = UInt(VAddrBits.W)
186//   val isNextMask = Vec(PredictWidth, Bool())
187
188//   val meta = UInt(MaxMetaLength.W)
189
190//   val rasSp = UInt(log2Ceil(RasSize).W)
191//   val rasEntry = new RASEntry
192//   val hist = new ShiftingGlobalHistory
193//   val specCnt = Vec(numBr, UInt(10.W))
194
195//   val valids = Vec(PredictWidth, Bool())
196//   val brMask = Vec(PredictWidth, Bool())
197//   // isJalr, isCall, isRet
198//   val jmpInfo = ValidUndirectioned(Vec(3, Bool()))
199//   val jmpOffset = UInt(log2Ceil(PredictWidth).W)
200
201//   val mispredVec = Vec(PredictWidth, Bool())
202//   val cfiIndex = ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))
203//   val target = UInt(VAddrBits.W)
204// }
205
206class FtqRead[T <: Data](private val gen: T)(implicit p: Parameters) extends XSBundle {
207  val ptr = Output(new FtqPtr)
208  val offset = Output(UInt(log2Ceil(PredictWidth).W))
209  val data = Input(gen)
210  def apply(ptr: FtqPtr, offset: UInt) = {
211    this.ptr := ptr
212    this.offset := offset
213    this.data
214  }
215  override def cloneType = (new FtqRead(gen)).asInstanceOf[this.type]
216}
217
218
219class FtqToBpuIO(implicit p: Parameters) extends XSBundle {
220  val redirect = Valid(new BranchPredictionRedirect)
221  val update = Valid(new BranchPredictionUpdate)
222  val enq_ptr = Output(new FtqPtr)
223}
224
225class FtqToIfuIO(implicit p: Parameters) extends XSBundle with HasCircularQueuePtrHelper {
226  val req = Decoupled(new FetchRequestBundle)
227  val redirect = Valid(new Redirect)
228  val flushFromBpu = new Bundle {
229    // when ifu pipeline is not stalled,
230    // a packet from bpu s3 can reach f1 at most
231    val s2 = Valid(new FtqPtr)
232    val s3 = Valid(new FtqPtr)
233    def shouldFlushBy(src: Valid[FtqPtr], idx_to_flush: FtqPtr) = {
234      src.valid && !isAfter(src.bits, idx_to_flush)
235    }
236    def shouldFlushByStage2(idx: FtqPtr) = shouldFlushBy(s2, idx)
237    def shouldFlushByStage3(idx: FtqPtr) = shouldFlushBy(s3, idx)
238  }
239}
240
241trait HasBackendRedirectInfo extends HasXSParameter {
242  def numRedirect = exuParameters.JmpCnt + exuParameters.AluCnt + 1
243  def isLoadReplay(r: Valid[Redirect]) = r.bits.flushItself()
244}
245
246class FtqToCtrlIO(implicit p: Parameters) extends XSBundle with HasBackendRedirectInfo {
247  val pc_reads = Vec(1 + numRedirect + 1 + 1, Flipped(new FtqRead(UInt(VAddrBits.W))))
248  val target_read = Flipped(new FtqRead(UInt(VAddrBits.W)))
249  def getJumpPcRead = pc_reads.head
250  def getRedirectPcRead = VecInit(pc_reads.tail.dropRight(2))
251  def getMemPredPcRead = pc_reads.init.last
252  def getRobFlushPcRead = pc_reads.last
253}
254
255
256class FTBEntryGen(implicit p: Parameters) extends XSModule with HasBackendRedirectInfo with HasBPUParameter {
257  val io = IO(new Bundle {
258    val start_addr = Input(UInt(VAddrBits.W))
259    val old_entry = Input(new FTBEntry)
260    val pd = Input(new Ftq_pd_Entry)
261    val cfiIndex = Flipped(Valid(UInt(log2Ceil(PredictWidth).W)))
262    val target = Input(UInt(VAddrBits.W))
263    val hit = Input(Bool())
264    val mispredict_vec = Input(Vec(PredictWidth, Bool()))
265
266    val new_entry = Output(new FTBEntry)
267    val new_br_insert_pos = Output(Vec(numBr, Bool()))
268    val taken_mask = Output(Vec(numBr, Bool()))
269    val mispred_mask = Output(Vec(numBr+1, Bool()))
270
271    // for perf counters
272    val is_init_entry = Output(Bool())
273    val is_old_entry = Output(Bool())
274    val is_new_br = Output(Bool())
275    val is_jalr_target_modified = Output(Bool())
276    val is_always_taken_modified = Output(Bool())
277    val is_br_full = Output(Bool())
278  })
279
280  // no mispredictions detected at predecode
281  val hit = io.hit
282  val pd = io.pd
283
284  val init_entry = WireInit(0.U.asTypeOf(new FTBEntry))
285
286
287  val cfi_is_br = pd.brMask(io.cfiIndex.bits) && io.cfiIndex.valid
288  val entry_has_jmp = pd.jmpInfo.valid
289  val new_jmp_is_jal  = entry_has_jmp && !pd.jmpInfo.bits(0) && io.cfiIndex.valid
290  val new_jmp_is_jalr = entry_has_jmp &&  pd.jmpInfo.bits(0) && io.cfiIndex.valid
291  val new_jmp_is_call = entry_has_jmp &&  pd.jmpInfo.bits(1) && io.cfiIndex.valid
292  val new_jmp_is_ret  = entry_has_jmp &&  pd.jmpInfo.bits(2) && io.cfiIndex.valid
293  val last_jmp_rvi = entry_has_jmp && pd.jmpOffset === (PredictWidth-1).U && !pd.rvcMask.last
294  val last_br_rvi = cfi_is_br && io.cfiIndex.bits === (PredictWidth-1).U && !pd.rvcMask.last
295
296  val cfi_is_jal = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jal
297  val cfi_is_jalr = io.cfiIndex.bits === pd.jmpOffset && new_jmp_is_jalr
298
299  def carryPos = log2Ceil(PredictWidth)+instOffsetBits+1
300  def getLower(pc: UInt) = pc(carryPos-1, instOffsetBits)
301  // if not hit, establish a new entry
302  init_entry.valid := true.B
303  // tag is left for ftb to assign
304
305  // case br
306  val init_br_slot = init_entry.getSlotForBr(0)
307  when (cfi_is_br) {
308    init_br_slot.valid := true.B
309    init_br_slot.offset := io.cfiIndex.bits
310    init_br_slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && numBr == 1)
311    init_entry.always_taken(0) := true.B // set to always taken on init
312  }
313  // init_entry.isBrSharing := shareTailSlot.B && (numBr == 1).B && cfi_is_br
314
315  // case jmp
316  when (entry_has_jmp) {
317    init_entry.tailSlot.offset := pd.jmpOffset
318    init_entry.tailSlot.valid := new_jmp_is_jal || new_jmp_is_jalr
319    init_entry.tailSlot.setLowerStatByTarget(io.start_addr, Mux(cfi_is_jalr, io.target, pd.jalTarget), isShare=false)
320  }
321
322  val jmpPft = getLower(io.start_addr) +& pd.jmpOffset +& Mux(pd.rvcMask(pd.jmpOffset), 1.U, 2.U)
323  init_entry.pftAddr := Mux(entry_has_jmp, jmpPft, getLower(io.start_addr) + ((FetchWidth*4)>>instOffsetBits).U + Mux(last_br_rvi, 1.U, 0.U))
324  init_entry.carry   := Mux(entry_has_jmp, jmpPft(carryPos-instOffsetBits), io.start_addr(carryPos-1) || (io.start_addr(carryPos-2, instOffsetBits).andR && last_br_rvi))
325  init_entry.isJalr := new_jmp_is_jalr
326  init_entry.isCall := new_jmp_is_call
327  init_entry.isRet  := new_jmp_is_ret
328  init_entry.last_is_rvc := Mux(entry_has_jmp, pd.rvcMask(pd.jmpOffset), pd.rvcMask.last)
329
330  init_entry.oversize := last_br_rvi || last_jmp_rvi
331
332  // if hit, check whether a new cfi(only br is possible) is detected
333  val oe = io.old_entry
334  val br_recorded_vec = oe.getBrRecordedVec(io.cfiIndex.bits)
335  val br_recorded = br_recorded_vec.asUInt.orR
336  val is_new_br = cfi_is_br && !br_recorded
337  val new_br_offset = io.cfiIndex.bits
338  // vec(i) means new br will be inserted BEFORE old br(i)
339  val allBrSlotsVec = oe.allSlotsForBr
340  val new_br_insert_onehot = VecInit((0 until numBr).map{
341    i => i match {
342      case 0 =>
343        !allBrSlotsVec(0).valid || new_br_offset < allBrSlotsVec(0).offset
344      case idx =>
345        allBrSlotsVec(idx-1).valid && new_br_offset > allBrSlotsVec(idx-1).offset &&
346        (!allBrSlotsVec(idx).valid || new_br_offset < allBrSlotsVec(idx).offset)
347    }
348  })
349
350  val old_entry_modified = WireInit(io.old_entry)
351  for (i <- 0 until numBr) {
352    val slot = old_entry_modified.allSlotsForBr(i)
353    when (new_br_insert_onehot(i)) {
354      slot.valid := true.B
355      slot.offset := new_br_offset
356      slot.setLowerStatByTarget(io.start_addr, io.target, shareTailSlot && i == numBr-1)
357      old_entry_modified.always_taken(i) := true.B
358    }.elsewhen (new_br_offset > oe.allSlotsForBr(i).offset) {
359      old_entry_modified.always_taken(i) := false.B
360      // all other fields remain unchanged
361    }.otherwise {
362      // case i == 0, remain unchanged
363      if (i != 0) {
364        val noNeedToMoveFromFormerSlot = (shareTailSlot && i == numBr-1).B && !oe.brSlots.last.valid
365        when (!noNeedToMoveFromFormerSlot) {
366          slot.fromAnotherSlot(oe.allSlotsForBr(i-1))
367          old_entry_modified.always_taken(i) := oe.always_taken(i)
368        }
369      }
370    }
371  }
372
373  // two circumstances:
374  // 1. oe: | br | j  |, new br should be in front of j, thus addr of j should be new pft
375  // 2. oe: | br | br |, new br could be anywhere between, thus new pft is the addr of either
376  //        the previous last br or the new br
377  val may_have_to_replace = oe.noEmptySlotForNewBr
378  val pft_need_to_change = is_new_br && may_have_to_replace
379  // it should either be the given last br or the new br
380  when (pft_need_to_change) {
381    val new_pft_offset =
382      Mux(!new_br_insert_onehot.asUInt.orR,
383        new_br_offset, oe.allSlotsForBr.last.offset)
384
385    // set jmp to invalid
386    if (!shareTailSlot) {
387      old_entry_modified.tailSlot.valid := false.B
388    }
389    old_entry_modified.pftAddr := getLower(io.start_addr) + new_pft_offset
390    old_entry_modified.last_is_rvc := pd.rvcMask(new_pft_offset - 1.U) // TODO: fix this
391    old_entry_modified.carry := (getLower(io.start_addr) +& new_pft_offset).head(1).asBool
392    old_entry_modified.oversize := false.B
393    old_entry_modified.isCall := false.B
394    old_entry_modified.isRet := false.B
395    old_entry_modified.isJalr := false.B
396  }
397
398  val old_entry_jmp_target_modified = WireInit(oe)
399  val old_target = oe.tailSlot.getTarget(io.start_addr) // may be wrong because we store only 20 lowest bits
400  val old_tail_is_jmp = !oe.tailSlot.sharing || !shareTailSlot.B
401  val jalr_target_modified = cfi_is_jalr && (old_target =/= io.target) && old_tail_is_jmp // TODO: pass full jalr target
402  when (jalr_target_modified) {
403    old_entry_jmp_target_modified.setByJmpTarget(io.start_addr, io.target)
404    old_entry_jmp_target_modified.always_taken := 0.U.asTypeOf(Vec(numBr, Bool()))
405  }
406
407  val old_entry_always_taken = WireInit(oe)
408  val always_taken_modified_vec = Wire(Vec(numBr, Bool())) // whether modified or not
409  for (i <- 0 until numBr) {
410    old_entry_always_taken.always_taken(i) :=
411      oe.always_taken(i) && io.cfiIndex.valid && oe.brValids(i) && io.cfiIndex.bits === oe.brOffset(i)
412    always_taken_modified_vec(i) := oe.always_taken(i) && !old_entry_always_taken.always_taken(i)
413  }
414  val always_taken_modified = always_taken_modified_vec.reduce(_||_)
415
416
417
418  val derived_from_old_entry =
419    Mux(is_new_br, old_entry_modified,
420      Mux(jalr_target_modified, old_entry_jmp_target_modified, old_entry_always_taken))
421
422
423  io.new_entry := Mux(!hit, init_entry, derived_from_old_entry)
424
425  io.new_br_insert_pos := new_br_insert_onehot
426  io.taken_mask := VecInit((io.new_entry.brOffset zip io.new_entry.brValids).map{
427    case (off, v) => io.cfiIndex.bits === off && io.cfiIndex.valid && v
428  })
429  for (i <- 0 until numBr) {
430    io.mispred_mask(i) := io.new_entry.brValids(i) && io.mispredict_vec(io.new_entry.brOffset(i))
431  }
432  io.mispred_mask.last := io.new_entry.jmpValid && io.mispredict_vec(pd.jmpOffset)
433
434  // for perf counters
435  io.is_init_entry := !hit
436  io.is_old_entry := hit && !is_new_br && !jalr_target_modified && !always_taken_modified
437  io.is_new_br := hit && is_new_br
438  io.is_jalr_target_modified := hit && jalr_target_modified
439  io.is_always_taken_modified := hit && always_taken_modified
440  io.is_br_full := hit && is_new_br && may_have_to_replace
441}
442
443class Ftq(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper
444  with HasBackendRedirectInfo with BPUUtils with HasBPUConst with HasPerfEvents {
445  val io = IO(new Bundle {
446    val fromBpu = Flipped(new BpuToFtqIO)
447    val fromIfu = Flipped(new IfuToFtqIO)
448    val fromBackend = Flipped(new CtrlToFtqIO)
449
450    val toBpu = new FtqToBpuIO
451    val toIfu = new FtqToIfuIO
452    val toBackend = new FtqToCtrlIO
453
454    val bpuInfo = new Bundle {
455      val bpRight = Output(UInt(XLEN.W))
456      val bpWrong = Output(UInt(XLEN.W))
457    }
458  })
459  io.bpuInfo := DontCare
460
461  val robFlush = io.fromBackend.robFlush
462  val stage2Redirect = io.fromBackend.stage2Redirect
463  val stage3Redirect = io.fromBackend.stage3Redirect
464
465  val stage2Flush = stage2Redirect.valid || robFlush.valid
466  val backendFlush = stage2Flush || RegNext(stage2Flush)
467  val ifuFlush = Wire(Bool())
468
469  val flush = stage2Flush || RegNext(stage2Flush)
470
471  val allowBpuIn, allowToIfu = WireInit(false.B)
472  val flushToIfu = !allowToIfu
473  allowBpuIn := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
474  allowToIfu := !ifuFlush && !robFlush.valid && !stage2Redirect.valid && !stage3Redirect.valid
475
476  val bpuPtr, ifuPtr, ifuWbPtr, commPtr = RegInit(FtqPtr(false.B, 0.U))
477  val validEntries = distanceBetween(bpuPtr, commPtr)
478
479  // **********************************************************************
480  // **************************** enq from bpu ****************************
481  // **********************************************************************
482  val new_entry_ready = validEntries < FtqSize.U
483  io.fromBpu.resp.ready := new_entry_ready
484
485  val bpu_s2_resp = io.fromBpu.resp.bits.s2
486  val bpu_s3_resp = io.fromBpu.resp.bits.s3
487  val bpu_s2_redirect = bpu_s2_resp.valid && bpu_s2_resp.hasRedirect
488  val bpu_s3_redirect = bpu_s3_resp.valid && bpu_s3_resp.hasRedirect
489
490  io.toBpu.enq_ptr := bpuPtr
491  val enq_fire = io.fromBpu.resp.fire() && allowBpuIn // from bpu s1
492  val bpu_in_fire = (io.fromBpu.resp.fire() || bpu_s2_redirect || bpu_s3_redirect) && allowBpuIn
493
494  val bpu_in_resp = WireInit(io.fromBpu.resp.bits.selectedResp)
495  val bpu_in_stage = WireInit(io.fromBpu.resp.bits.selectedRespIdx)
496  val bpu_in_resp_ptr = Mux(bpu_in_stage === BP_S1, bpuPtr, bpu_in_resp.ftq_idx)
497  val bpu_in_resp_idx = bpu_in_resp_ptr.value
498
499  // read ports:                            jumpPc + redirects + loadPred + robFlush + ifuReq1 + ifuReq2 + commitUpdate
500  val ftq_pc_mem = Module(new SyncDataModuleTemplate(new Ftq_RF_Components, FtqSize, 1+numRedirect+2+1+1+1, 1))
501  // resp from uBTB
502  ftq_pc_mem.io.wen(0) := bpu_in_fire
503  ftq_pc_mem.io.waddr(0) := bpu_in_resp_idx
504  ftq_pc_mem.io.wdata(0).fromBranchPrediction(bpu_in_resp)
505
506  //                                                            ifuRedirect + backendRedirect + commit
507  val ftq_redirect_sram = Module(new FtqNRSRAM(new Ftq_Redirect_SRAMEntry, 1+1+1))
508  // these info is intended to enq at the last stage of bpu
509  ftq_redirect_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
510  ftq_redirect_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
511  ftq_redirect_sram.io.wdata.fromBranchPrediction(io.fromBpu.resp.bits.lastStage)
512
513  val ftq_meta_1r_sram = Module(new FtqNRSRAM(new Ftq_1R_SRAMEntry, 1))
514  // these info is intended to enq at the last stage of bpu
515  ftq_meta_1r_sram.io.wen := io.fromBpu.resp.bits.lastStage.valid
516  ftq_meta_1r_sram.io.waddr := io.fromBpu.resp.bits.lastStage.ftq_idx.value
517  ftq_meta_1r_sram.io.wdata.meta := io.fromBpu.resp.bits.meta
518  //                                                            ifuRedirect + backendRedirect + commit
519  val ftb_entry_mem = Module(new SyncDataModuleTemplate(new FTBEntry, FtqSize, 1+1+1, 1))
520  ftb_entry_mem.io.wen(0) := io.fromBpu.resp.bits.lastStage.valid
521  ftb_entry_mem.io.waddr(0) := io.fromBpu.resp.bits.lastStage.ftq_idx.value
522  ftb_entry_mem.io.wdata(0) := io.fromBpu.resp.bits.lastStage.ftb_entry
523
524
525  // multi-write
526  val update_target = Reg(Vec(FtqSize, UInt(VAddrBits.W)))
527  val cfiIndex_vec = Reg(Vec(FtqSize, ValidUndirectioned(UInt(log2Ceil(PredictWidth).W))))
528  val mispredict_vec = Reg(Vec(FtqSize, Vec(PredictWidth, Bool())))
529  val pred_stage = Reg(Vec(FtqSize, UInt(2.W)))
530
531  val c_invalid :: c_valid :: c_commited :: Nil = Enum(3)
532  val commitStateQueue = RegInit(VecInit(Seq.fill(FtqSize) {
533    VecInit(Seq.fill(PredictWidth)(c_invalid))
534  }))
535
536  val f_to_send :: f_sent :: Nil = Enum(2)
537  val entry_fetch_status = RegInit(VecInit(Seq.fill(FtqSize)(f_sent)))
538
539  val h_not_hit :: h_false_hit :: h_hit :: Nil = Enum(3)
540  val entry_hit_status = RegInit(VecInit(Seq.fill(FtqSize)(h_not_hit)))
541
542
543  when (bpu_in_fire) {
544    entry_fetch_status(bpu_in_resp_idx) := f_to_send
545    commitStateQueue(bpu_in_resp_idx) := VecInit(Seq.fill(PredictWidth)(c_invalid))
546    cfiIndex_vec(bpu_in_resp_idx) := bpu_in_resp.genCfiIndex
547    mispredict_vec(bpu_in_resp_idx) := WireInit(VecInit(Seq.fill(PredictWidth)(false.B)))
548    update_target(bpu_in_resp_idx) := bpu_in_resp.target
549    pred_stage(bpu_in_resp_idx) := bpu_in_stage
550  }
551
552  bpuPtr := bpuPtr + enq_fire
553  ifuPtr := ifuPtr + io.toIfu.req.fire
554
555  // only use ftb result to assign hit status
556  when (bpu_s2_resp.valid) {
557    entry_hit_status(bpu_s2_resp.ftq_idx.value) := Mux(bpu_s2_resp.preds.hit, h_hit, h_not_hit)
558  }
559
560
561  io.toIfu.flushFromBpu.s2.valid := bpu_s2_redirect
562  io.toIfu.flushFromBpu.s2.bits := bpu_s2_resp.ftq_idx
563  when (bpu_s2_resp.valid && bpu_s2_resp.hasRedirect) {
564    bpuPtr := bpu_s2_resp.ftq_idx + 1.U
565    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
566    when (!isBefore(ifuPtr, bpu_s2_resp.ftq_idx)) {
567      ifuPtr := bpu_s2_resp.ftq_idx
568    }
569  }
570
571  io.toIfu.flushFromBpu.s3.valid := bpu_s3_redirect
572  io.toIfu.flushFromBpu.s3.bits := bpu_s3_resp.ftq_idx
573  when (bpu_s3_resp.valid && bpu_s3_resp.hasRedirect) {
574    bpuPtr := bpu_s3_resp.ftq_idx + 1.U
575    // only when ifuPtr runs ahead of bpu s2 resp should we recover it
576    when (!isBefore(ifuPtr, bpu_s3_resp.ftq_idx)) {
577      ifuPtr := bpu_s3_resp.ftq_idx
578    }
579    XSError(true.B, "\ns3_redirect mechanism not implemented!\n")
580  }
581
582  XSError(isBefore(bpuPtr, ifuPtr) && !isFull(bpuPtr, ifuPtr), "\nifuPtr is before bpuPtr!\n")
583
584  // ****************************************************************
585  // **************************** to ifu ****************************
586  // ****************************************************************
587  val bpu_in_bypass_buf = RegEnable(ftq_pc_mem.io.wdata(0), enable=bpu_in_fire)
588  val bpu_in_bypass_ptr = RegNext(bpu_in_resp_ptr)
589  val last_cycle_bpu_in = RegNext(bpu_in_fire)
590  val last_cycle_to_ifu_fire = RegNext(io.toIfu.req.fire)
591
592  // read pc and target
593  ftq_pc_mem.io.raddr.init.init.last := ifuPtr.value
594  ftq_pc_mem.io.raddr.init.last := (ifuPtr+1.U).value
595
596  io.toIfu.req.valid := allowToIfu && entry_fetch_status(ifuPtr.value) === f_to_send && ifuPtr =/= bpuPtr
597  io.toIfu.req.bits.ftqIdx := ifuPtr
598  io.toIfu.req.bits.target := update_target(ifuPtr.value)
599  io.toIfu.req.bits.ftqOffset := cfiIndex_vec(ifuPtr.value)
600
601  when (last_cycle_bpu_in && bpu_in_bypass_ptr === ifuPtr) {
602    io.toIfu.req.bits.fromFtqPcBundle(bpu_in_bypass_buf)
603  }.elsewhen (last_cycle_to_ifu_fire) {
604    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.last)
605  }.otherwise {
606    io.toIfu.req.bits.fromFtqPcBundle(ftq_pc_mem.io.rdata.init.init.last)
607  }
608
609  // when fall through is smaller in value than start address, there must be a false hit
610  when (io.toIfu.req.bits.fallThruError && entry_hit_status(ifuPtr.value) === h_hit) {
611    when (io.toIfu.req.fire &&
612      !(bpu_s2_redirect && bpu_s2_resp.ftq_idx === ifuPtr) &&
613      !(bpu_s3_redirect && bpu_s3_resp.ftq_idx === ifuPtr)
614    ) {
615      entry_hit_status(ifuPtr.value) := h_false_hit
616      XSDebug(true.B, "FTB false hit by fallThroughError, startAddr: %x, fallTHru: %x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
617    }
618    XSDebug(true.B, "fallThruError! start:%x, fallThru:%x\n", io.toIfu.req.bits.startAddr, io.toIfu.req.bits.fallThruAddr)
619  }
620
621  val ifu_req_should_be_flushed =
622    io.toIfu.flushFromBpu.shouldFlushByStage2(io.toIfu.req.bits.ftqIdx) ||
623    io.toIfu.flushFromBpu.shouldFlushByStage3(io.toIfu.req.bits.ftqIdx)
624
625  when (io.toIfu.req.fire && !ifu_req_should_be_flushed) {
626    entry_fetch_status(ifuPtr.value) := f_sent
627  }
628
629
630  // *********************************************************************
631  // **************************** wb from ifu ****************************
632  // *********************************************************************
633  val pdWb = io.fromIfu.pdWb
634  val pds = pdWb.bits.pd
635  val ifu_wb_valid = pdWb.valid
636  val ifu_wb_idx = pdWb.bits.ftqIdx.value
637  // read ports:                                                         commit update
638  val ftq_pd_mem = Module(new SyncDataModuleTemplate(new Ftq_pd_Entry, FtqSize, 1, 1))
639  ftq_pd_mem.io.wen(0) := ifu_wb_valid
640  ftq_pd_mem.io.waddr(0) := pdWb.bits.ftqIdx.value
641  ftq_pd_mem.io.wdata(0).fromPdWb(pdWb.bits)
642
643  val hit_pd_valid = entry_hit_status(ifu_wb_idx) === h_hit && ifu_wb_valid
644  val hit_pd_mispred = hit_pd_valid && pdWb.bits.misOffset.valid
645  val hit_pd_mispred_reg = RegNext(hit_pd_mispred, init=false.B)
646  val pd_reg       = RegEnable(pds,             enable = pdWb.valid)
647  val start_pc_reg = RegEnable(pdWb.bits.pc(0), enable = pdWb.valid)
648  val wb_idx_reg   = RegEnable(ifu_wb_idx,      enable = pdWb.valid)
649
650  when (ifu_wb_valid) {
651    val comm_stq_wen = VecInit(pds.map(_.valid).zip(pdWb.bits.instrRange).map{
652      case (v, inRange) => v && inRange
653    })
654    (commitStateQueue(ifu_wb_idx) zip comm_stq_wen).map{
655      case (qe, v) => when (v) { qe := c_valid }
656    }
657  }
658
659  ifuWbPtr := ifuWbPtr + ifu_wb_valid
660
661  ftb_entry_mem.io.raddr.head := ifu_wb_idx
662  val has_false_hit = WireInit(false.B)
663  when (RegNext(hit_pd_valid)) {
664    // check for false hit
665    val pred_ftb_entry = ftb_entry_mem.io.rdata.head
666    val brSlots = pred_ftb_entry.brSlots
667    val tailSlot = pred_ftb_entry.tailSlot
668    // we check cfis that bpu predicted
669
670    // bpu predicted branches but denied by predecode
671    val br_false_hit =
672      brSlots.map{
673        s => s.valid && !(pd_reg(s.offset).valid && pd_reg(s.offset).isBr)
674      }.reduce(_||_) ||
675      (shareTailSlot.B && tailSlot.valid && pred_ftb_entry.tailSlot.sharing &&
676        !(pd_reg(tailSlot.offset).valid && pd_reg(tailSlot.offset).isBr))
677
678    val jmpOffset = tailSlot.offset
679    val jmp_pd = pd_reg(jmpOffset)
680    val jal_false_hit = pred_ftb_entry.jmpValid &&
681      ((pred_ftb_entry.isJal  && !(jmp_pd.valid && jmp_pd.isJal)) ||
682       (pred_ftb_entry.isJalr && !(jmp_pd.valid && jmp_pd.isJalr)) ||
683       (pred_ftb_entry.isCall && !(jmp_pd.valid && jmp_pd.isCall)) ||
684       (pred_ftb_entry.isRet  && !(jmp_pd.valid && jmp_pd.isRet))
685      )
686
687    has_false_hit := br_false_hit || jal_false_hit || hit_pd_mispred_reg
688    XSDebug(has_false_hit, "FTB false hit by br or jal or hit_pd, startAddr: %x\n", pdWb.bits.pc(0))
689
690    // assert(!has_false_hit)
691  }
692
693  when (has_false_hit) {
694    entry_hit_status(wb_idx_reg) := h_false_hit
695  }
696
697
698  // **********************************************************************
699  // **************************** backend read ****************************
700  // **********************************************************************
701
702  // pc reads
703  for ((req, i) <- io.toBackend.pc_reads.zipWithIndex) {
704    ftq_pc_mem.io.raddr(i) := req.ptr.value
705    req.data := ftq_pc_mem.io.rdata(i).getPc(RegNext(req.offset))
706  }
707  // target read
708  io.toBackend.target_read.data := RegNext(update_target(io.toBackend.target_read.ptr.value))
709
710  // *******************************************************************************
711  // **************************** redirect from backend ****************************
712  // *******************************************************************************
713
714  // redirect read cfiInfo, couples to redirectGen s2
715  ftq_redirect_sram.io.ren.init.last := io.fromBackend.stage2Redirect.valid
716  ftq_redirect_sram.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
717
718  ftb_entry_mem.io.raddr.init.last := io.fromBackend.stage2Redirect.bits.ftqIdx.value
719
720  val stage3CfiInfo = ftq_redirect_sram.io.rdata.init.last
721  val fromBackendRedirect = WireInit(io.fromBackend.stage3Redirect)
722  val backendRedirectCfi = fromBackendRedirect.bits.cfiUpdate
723  backendRedirectCfi.fromFtqRedirectSram(stage3CfiInfo)
724
725  val r_ftb_entry = ftb_entry_mem.io.rdata.init.last
726  val r_ftqOffset = fromBackendRedirect.bits.ftqOffset
727
728  when (entry_hit_status(fromBackendRedirect.bits.ftqIdx.value) === h_hit) {
729    backendRedirectCfi.shift := PopCount(r_ftb_entry.getBrMaskByOffset(r_ftqOffset)) +&
730      (backendRedirectCfi.pd.isBr && !r_ftb_entry.brIsSaved(r_ftqOffset) &&
731      !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
732
733    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr && (r_ftb_entry.brIsSaved(r_ftqOffset) ||
734        !r_ftb_entry.newBrCanNotInsert(r_ftqOffset))
735  }.otherwise {
736    backendRedirectCfi.shift := (backendRedirectCfi.pd.isBr && backendRedirectCfi.taken).asUInt
737    backendRedirectCfi.addIntoHist := backendRedirectCfi.pd.isBr.asUInt
738  }
739
740
741  // ***************************************************************************
742  // **************************** redirect from ifu ****************************
743  // ***************************************************************************
744  val fromIfuRedirect = WireInit(0.U.asTypeOf(Valid(new Redirect)))
745  fromIfuRedirect.valid := pdWb.valid && pdWb.bits.misOffset.valid && !backendFlush
746  fromIfuRedirect.bits.ftqIdx := pdWb.bits.ftqIdx
747  fromIfuRedirect.bits.ftqOffset := pdWb.bits.misOffset.bits
748  fromIfuRedirect.bits.level := RedirectLevel.flushAfter
749
750  val ifuRedirectCfiUpdate = fromIfuRedirect.bits.cfiUpdate
751  ifuRedirectCfiUpdate.pc := pdWb.bits.pc(pdWb.bits.misOffset.bits)
752  ifuRedirectCfiUpdate.pd := pdWb.bits.pd(pdWb.bits.misOffset.bits)
753  ifuRedirectCfiUpdate.predTaken := cfiIndex_vec(pdWb.bits.ftqIdx.value).valid
754  ifuRedirectCfiUpdate.target := pdWb.bits.target
755  ifuRedirectCfiUpdate.taken := pdWb.bits.cfiOffset.valid
756  ifuRedirectCfiUpdate.isMisPred := pdWb.bits.misOffset.valid
757
758  val ifuRedirectReg = RegNext(fromIfuRedirect, init=0.U.asTypeOf(Valid(new Redirect)))
759  val ifuRedirectToBpu = WireInit(ifuRedirectReg)
760  ifuFlush := fromIfuRedirect.valid || ifuRedirectToBpu.valid
761
762  ftq_redirect_sram.io.ren.head := fromIfuRedirect.valid
763  ftq_redirect_sram.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
764
765  ftb_entry_mem.io.raddr.head := fromIfuRedirect.bits.ftqIdx.value
766
767  val toBpuCfi = ifuRedirectToBpu.bits.cfiUpdate
768  toBpuCfi.fromFtqRedirectSram(ftq_redirect_sram.io.rdata.head)
769  when (ifuRedirectReg.bits.cfiUpdate.pd.isRet) {
770    toBpuCfi.target := toBpuCfi.rasEntry.retAddr
771  }
772
773  // *********************************************************************
774  // **************************** wb from exu ****************************
775  // *********************************************************************
776
777  def extractRedirectInfo(wb: Valid[Redirect]) = {
778    val ftqIdx = wb.bits.ftqIdx.value
779    val ftqOffset = wb.bits.ftqOffset
780    val taken = wb.bits.cfiUpdate.taken
781    val mispred = wb.bits.cfiUpdate.isMisPred
782    (wb.valid, ftqIdx, ftqOffset, taken, mispred)
783  }
784
785  // fix mispredict entry
786  val lastIsMispredict = RegNext(
787    stage2Redirect.valid && stage2Redirect.bits.level === RedirectLevel.flushAfter, init = false.B
788  )
789
790  def updateCfiInfo(redirect: Valid[Redirect], isBackend: Boolean = true) = {
791    val (r_valid, r_idx, r_offset, r_taken, r_mispred) = extractRedirectInfo(redirect)
792    val cfiIndex_bits_wen = r_valid && r_taken && r_offset < cfiIndex_vec(r_idx).bits
793    val cfiIndex_valid_wen = r_valid && r_offset === cfiIndex_vec(r_idx).bits
794    when (cfiIndex_bits_wen || cfiIndex_valid_wen) {
795      cfiIndex_vec(r_idx).valid := cfiIndex_bits_wen || cfiIndex_valid_wen && r_taken
796    }
797    when (cfiIndex_bits_wen) {
798      cfiIndex_vec(r_idx).bits := r_offset
799    }
800    update_target(r_idx) := redirect.bits.cfiUpdate.target
801    if (isBackend) {
802      mispredict_vec(r_idx)(r_offset) := r_mispred
803    }
804  }
805
806  when(stage3Redirect.valid && lastIsMispredict) {
807    updateCfiInfo(stage3Redirect)
808  }.elsewhen (ifuRedirectToBpu.valid) {
809    updateCfiInfo(ifuRedirectToBpu, isBackend=false)
810  }
811
812  // ***********************************************************************************
813  // **************************** flush ptr and state queue ****************************
814  // ***********************************************************************************
815
816  val redirectVec = VecInit(robFlush, stage2Redirect, fromIfuRedirect)
817
818  // when redirect, we should reset ptrs and status queues
819  when(redirectVec.map(r => r.valid).reduce(_||_)){
820    val r = PriorityMux(redirectVec.map(r => (r.valid -> r.bits)))
821    val notIfu = redirectVec.dropRight(1).map(r => r.valid).reduce(_||_)
822    val (idx, offset, flushItSelf) = (r.ftqIdx, r.ftqOffset, RedirectLevel.flushItself(r.level))
823    val next = idx + 1.U
824    bpuPtr := next
825    ifuPtr := next
826    ifuWbPtr := next
827    when (notIfu) {
828      commitStateQueue(idx.value).zipWithIndex.foreach({ case (s, i) =>
829        when(i.U > offset || i.U === offset && flushItSelf){
830          s := c_invalid
831        }
832      })
833    }
834  }
835
836  // only the valid bit is actually needed
837  io.toIfu.redirect.bits    := Mux(robFlush.valid, robFlush.bits, stage2Redirect.bits)
838  io.toIfu.redirect.valid   := stage2Flush
839
840  // commit
841  for (c <- io.fromBackend.rob_commits) {
842    when(c.valid) {
843      commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset) := c_commited
844      // TODO: remove this
845      // For instruction fusions, we also update the next instruction
846      when (c.bits.commitType === 4.U) {
847        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 1.U) := c_commited
848      }.elsewhen(c.bits.commitType === 5.U) {
849        commitStateQueue(c.bits.ftqIdx.value)(c.bits.ftqOffset + 2.U) := c_commited
850      }.elsewhen(c.bits.commitType === 6.U) {
851        val index = (c.bits.ftqIdx + 1.U).value
852        commitStateQueue(index)(0) := c_commited
853      }.elsewhen(c.bits.commitType === 7.U) {
854        val index = (c.bits.ftqIdx + 1.U).value
855        commitStateQueue(index)(1) := c_commited
856      }
857    }
858  }
859
860  // ****************************************************************
861  // **************************** to bpu ****************************
862  // ****************************************************************
863
864  io.toBpu.redirect <> Mux(fromBackendRedirect.valid, fromBackendRedirect, ifuRedirectToBpu)
865
866  val may_have_stall_from_bpu = RegInit(false.B)
867  val canCommit = commPtr =/= ifuWbPtr && !may_have_stall_from_bpu &&
868    Cat(commitStateQueue(commPtr.value).map(s => {
869      s === c_invalid || s === c_commited
870    })).andR()
871
872  // commit reads
873  ftq_pc_mem.io.raddr.last := commPtr.value
874  val commit_pc_bundle = ftq_pc_mem.io.rdata.last
875  ftq_pd_mem.io.raddr.last := commPtr.value
876  val commit_pd = ftq_pd_mem.io.rdata.last
877  ftq_redirect_sram.io.ren.last := canCommit
878  ftq_redirect_sram.io.raddr.last := commPtr.value
879  val commit_spec_meta = ftq_redirect_sram.io.rdata.last
880  ftq_meta_1r_sram.io.ren(0) := canCommit
881  ftq_meta_1r_sram.io.raddr(0) := commPtr.value
882  val commit_meta = ftq_meta_1r_sram.io.rdata(0)
883  ftb_entry_mem.io.raddr.last := commPtr.value
884  val commit_ftb_entry = ftb_entry_mem.io.rdata.last
885
886  // need one cycle to read mem and srams
887  val do_commit_ptr = RegNext(commPtr)
888  val do_commit = RegNext(canCommit, init=false.B)
889  when (canCommit) { commPtr := commPtr + 1.U }
890  val commit_state = RegNext(commitStateQueue(commPtr.value))
891  val can_commit_cfi = WireInit(cfiIndex_vec(commPtr.value))
892  when (commitStateQueue(commPtr.value)(can_commit_cfi.bits) =/= c_commited) {
893    can_commit_cfi.valid := false.B
894  }
895  val commit_cfi = RegNext(can_commit_cfi)
896
897  val commit_mispredict = VecInit((RegNext(mispredict_vec(commPtr.value)) zip commit_state).map {
898    case (mis, state) => mis && state === c_commited
899  })
900  val can_commit_hit = entry_hit_status(commPtr.value)
901  val commit_hit = RegNext(can_commit_hit)
902  val commit_target = RegNext(update_target(commPtr.value))
903  val commit_valid = commit_hit === h_hit || commit_cfi.valid // hit or taken
904
905  val to_bpu_hit = can_commit_hit === h_hit || can_commit_hit === h_false_hit
906  may_have_stall_from_bpu := can_commit_cfi.valid && !to_bpu_hit && !may_have_stall_from_bpu
907
908  io.toBpu.update := DontCare
909  io.toBpu.update.valid := commit_valid && do_commit
910  val update = io.toBpu.update.bits
911  update.false_hit   := commit_hit === h_false_hit
912  update.pc          := commit_pc_bundle.startAddr
913  update.preds.hit   := commit_hit === h_hit || commit_hit === h_false_hit
914  update.meta        := commit_meta.meta
915  update.full_target := commit_target
916  update.fromFtqRedirectSram(commit_spec_meta)
917
918  val commit_real_hit = commit_hit === h_hit
919  val update_ftb_entry = update.ftb_entry
920
921  val ftbEntryGen = Module(new FTBEntryGen).io
922  ftbEntryGen.start_addr     := commit_pc_bundle.startAddr
923  ftbEntryGen.old_entry      := commit_ftb_entry
924  ftbEntryGen.pd             := commit_pd
925  ftbEntryGen.cfiIndex       := commit_cfi
926  ftbEntryGen.target         := commit_target
927  ftbEntryGen.hit            := commit_real_hit
928  ftbEntryGen.mispredict_vec := commit_mispredict
929
930  update_ftb_entry         := ftbEntryGen.new_entry
931  update.new_br_insert_pos := ftbEntryGen.new_br_insert_pos
932  update.mispred_mask      := ftbEntryGen.mispred_mask
933  update.old_entry         := ftbEntryGen.is_old_entry
934  update.preds.br_taken_mask  := ftbEntryGen.taken_mask
935
936  // ******************************************************************************
937  // **************************** commit perf counters ****************************
938  // ******************************************************************************
939
940  val commit_inst_mask    = VecInit(commit_state.map(c => c === c_commited && do_commit)).asUInt
941  val commit_mispred_mask = commit_mispredict.asUInt
942  val commit_not_mispred_mask = ~commit_mispred_mask
943
944  val commit_br_mask = commit_pd.brMask.asUInt
945  val commit_jmp_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.jmpInfo.valid.asTypeOf(UInt(1.W)))
946  val commit_cfi_mask = (commit_br_mask | commit_jmp_mask)
947
948  val mbpInstrs = commit_inst_mask & commit_cfi_mask
949
950  val mbpRights = mbpInstrs & commit_not_mispred_mask
951  val mbpWrongs = mbpInstrs & commit_mispred_mask
952
953  io.bpuInfo.bpRight := PopCount(mbpRights)
954  io.bpuInfo.bpWrong := PopCount(mbpWrongs)
955
956  // Cfi Info
957  for (i <- 0 until PredictWidth) {
958    val pc = commit_pc_bundle.startAddr + (i * instBytes).U
959    val v = commit_state(i) === c_commited
960    val isBr = commit_pd.brMask(i)
961    val isJmp = commit_pd.jmpInfo.valid && commit_pd.jmpOffset === i.U
962    val isCfi = isBr || isJmp
963    val isTaken = commit_cfi.valid && commit_cfi.bits === i.U
964    val misPred = commit_mispredict(i)
965    // val ghist = commit_spec_meta.ghist.predHist
966    val histPtr = commit_spec_meta.histPtr
967    val predCycle = commit_meta.meta(63, 0)
968    val target = commit_target
969
970    val brIdx = OHToUInt(Reverse(Cat(update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U})))
971    val inFtbEntry = update_ftb_entry.brValids.zip(update_ftb_entry.brOffset).map{case(v, offset) => v && offset === i.U}.reduce(_||_)
972    val addIntoHist = ((commit_hit === h_hit) && inFtbEntry) || ((!(commit_hit === h_hit) && i.U === commit_cfi.bits && isBr && commit_cfi.valid))
973    XSDebug(v && do_commit && isCfi, p"cfi_update: isBr(${isBr}) pc(${Hexadecimal(pc)}) " +
974    p"taken(${isTaken}) mispred(${misPred}) cycle($predCycle) hist(${histPtr.value}) " +
975    p"startAddr(${Hexadecimal(commit_pc_bundle.startAddr)}) AddIntoHist(${addIntoHist}) " +
976    p"brInEntry(${inFtbEntry}) brIdx(${brIdx}) target(${Hexadecimal(target)})\n")
977  }
978
979  val enq = io.fromBpu.resp
980  val perf_redirect = io.fromBackend.stage2Redirect
981
982  XSPerfAccumulate("entry", validEntries)
983  XSPerfAccumulate("bpu_to_ftq_stall", enq.valid && !enq.ready)
984  XSPerfAccumulate("mispredictRedirect", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level)
985  XSPerfAccumulate("replayRedirect", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level))
986  XSPerfAccumulate("predecodeRedirect", fromIfuRedirect.valid)
987
988  XSPerfAccumulate("to_ifu_bubble", io.toIfu.req.ready && !io.toIfu.req.valid)
989
990  XSPerfAccumulate("to_ifu_stall", io.toIfu.req.valid && !io.toIfu.req.ready)
991  XSPerfAccumulate("from_bpu_real_bubble", !enq.valid && enq.ready && allowBpuIn)
992  XSPerfAccumulate("bpu_to_ftq_bubble", bpuPtr === ifuPtr)
993
994  val from_bpu = io.fromBpu.resp.bits
995  def in_entry_len_map_gen(resp: BranchPredictionBundle)(stage: String) = {
996    val entry_len = (resp.ftb_entry.getFallThrough(resp.pc) - resp.pc) >> instOffsetBits
997    val entry_len_recording_vec = (1 to PredictWidth+1).map(i => entry_len === i.U)
998    val entry_len_map = (1 to PredictWidth+1).map(i =>
999      f"${stage}_ftb_entry_len_$i" -> (entry_len_recording_vec(i-1) && resp.valid)
1000    ).foldLeft(Map[String, UInt]())(_+_)
1001    entry_len_map
1002  }
1003  val s1_entry_len_map = in_entry_len_map_gen(from_bpu.s1)("s1")
1004  val s2_entry_len_map = in_entry_len_map_gen(from_bpu.s2)("s2")
1005  val s3_entry_len_map = in_entry_len_map_gen(from_bpu.s3)("s3")
1006
1007  val to_ifu = io.toIfu.req.bits
1008  val to_ifu_entry_len = (to_ifu.fallThruAddr - to_ifu.startAddr) >> instOffsetBits
1009  val to_ifu_entry_len_recording_vec = (1 to PredictWidth+1).map(i => to_ifu_entry_len === i.U)
1010  val to_ifu_entry_len_map = (1 to PredictWidth+1).map(i =>
1011    f"to_ifu_ftb_entry_len_$i" -> (to_ifu_entry_len_recording_vec(i-1) && io.toIfu.req.fire)
1012  ).foldLeft(Map[String, UInt]())(_+_)
1013
1014
1015
1016  val commit_num_inst_recording_vec = (1 to PredictWidth).map(i => PopCount(commit_inst_mask) === i.U)
1017  val commit_num_inst_map = (1 to PredictWidth).map(i =>
1018    f"commit_num_inst_$i" -> (commit_num_inst_recording_vec(i-1) && do_commit)
1019  ).foldLeft(Map[String, UInt]())(_+_)
1020
1021
1022
1023  val commit_jal_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJal.asTypeOf(UInt(1.W)))
1024  val commit_jalr_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasJalr.asTypeOf(UInt(1.W)))
1025  val commit_call_mask = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasCall.asTypeOf(UInt(1.W)))
1026  val commit_ret_mask  = UIntToOH(commit_pd.jmpOffset) & Fill(PredictWidth, commit_pd.hasRet.asTypeOf(UInt(1.W)))
1027
1028
1029  val mbpBRights = mbpRights & commit_br_mask
1030  val mbpJRights = mbpRights & commit_jal_mask
1031  val mbpIRights = mbpRights & commit_jalr_mask
1032  val mbpCRights = mbpRights & commit_call_mask
1033  val mbpRRights = mbpRights & commit_ret_mask
1034
1035  val mbpBWrongs = mbpWrongs & commit_br_mask
1036  val mbpJWrongs = mbpWrongs & commit_jal_mask
1037  val mbpIWrongs = mbpWrongs & commit_jalr_mask
1038  val mbpCWrongs = mbpWrongs & commit_call_mask
1039  val mbpRWrongs = mbpWrongs & commit_ret_mask
1040
1041  val commit_pred_stage = RegNext(pred_stage(commPtr.value))
1042
1043  def pred_stage_map(src: UInt, name: String) = {
1044    (0 until numBpStages).map(i =>
1045      f"${name}_stage_${i+1}" -> PopCount(src.asBools.map(_ && commit_pred_stage === BP_STAGES(i)))
1046    ).foldLeft(Map[String, UInt]())(_+_)
1047  }
1048
1049  val mispred_stage_map      = pred_stage_map(mbpWrongs,  "mispredict")
1050  val br_mispred_stage_map   = pred_stage_map(mbpBWrongs, "br_mispredict")
1051  val jalr_mispred_stage_map = pred_stage_map(mbpIWrongs, "jalr_mispredict")
1052  val correct_stage_map      = pred_stage_map(mbpRights,  "correct")
1053  val br_correct_stage_map   = pred_stage_map(mbpBRights, "br_correct")
1054  val jalr_correct_stage_map = pred_stage_map(mbpIRights, "jalr_correct")
1055
1056  val update_valid = io.toBpu.update.valid
1057  def u(cond: Bool) = update_valid && cond
1058  val ftb_false_hit = u(update.false_hit)
1059  // assert(!ftb_false_hit)
1060  val ftb_hit = u(commit_hit === h_hit)
1061
1062  val ftb_new_entry = u(ftbEntryGen.is_init_entry)
1063  val ftb_new_entry_only_br = ftb_new_entry && !update.ftb_entry.jmpValid
1064  val ftb_new_entry_only_jmp = ftb_new_entry && !update.ftb_entry.brValids(0)
1065  val ftb_new_entry_has_br_and_jmp = ftb_new_entry && update.ftb_entry.brValids(0) && update.ftb_entry.jmpValid
1066
1067  val ftb_old_entry = u(ftbEntryGen.is_old_entry)
1068
1069  val ftb_modified_entry = u(ftbEntryGen.is_new_br || ftbEntryGen.is_jalr_target_modified || ftbEntryGen.is_always_taken_modified)
1070  val ftb_modified_entry_new_br = u(ftbEntryGen.is_new_br)
1071  val ftb_modified_entry_jalr_target_modified = u(ftbEntryGen.is_jalr_target_modified)
1072  val ftb_modified_entry_br_full = ftb_modified_entry && ftbEntryGen.is_br_full
1073  val ftb_modified_entry_always_taken = ftb_modified_entry && ftbEntryGen.is_always_taken_modified
1074
1075  val ftb_entry_len = (ftbEntryGen.new_entry.getFallThrough(update.pc) - update.pc) >> instOffsetBits
1076  val ftb_entry_len_recording_vec = (1 to PredictWidth+1).map(i => ftb_entry_len === i.U)
1077  val ftb_init_entry_len_map = (1 to PredictWidth+1).map(i =>
1078    f"ftb_init_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_new_entry)
1079  ).foldLeft(Map[String, UInt]())(_+_)
1080  val ftb_modified_entry_len_map = (1 to PredictWidth+1).map(i =>
1081    f"ftb_modified_entry_len_$i" -> (ftb_entry_len_recording_vec(i-1) && ftb_modified_entry)
1082  ).foldLeft(Map[String, UInt]())(_+_)
1083
1084  val ftq_occupancy_map = (0 to FtqSize).map(i =>
1085    f"ftq_has_entry_$i" ->( validEntries === i.U)
1086  ).foldLeft(Map[String, UInt]())(_+_)
1087
1088  val perfCountsMap = Map(
1089    "BpInstr" -> PopCount(mbpInstrs),
1090    "BpBInstr" -> PopCount(mbpBRights | mbpBWrongs),
1091    "BpRight"  -> PopCount(mbpRights),
1092    "BpWrong"  -> PopCount(mbpWrongs),
1093    "BpBRight" -> PopCount(mbpBRights),
1094    "BpBWrong" -> PopCount(mbpBWrongs),
1095    "BpJRight" -> PopCount(mbpJRights),
1096    "BpJWrong" -> PopCount(mbpJWrongs),
1097    "BpIRight" -> PopCount(mbpIRights),
1098    "BpIWrong" -> PopCount(mbpIWrongs),
1099    "BpCRight" -> PopCount(mbpCRights),
1100    "BpCWrong" -> PopCount(mbpCWrongs),
1101    "BpRRight" -> PopCount(mbpRRights),
1102    "BpRWrong" -> PopCount(mbpRWrongs),
1103
1104    "ftb_false_hit"                -> PopCount(ftb_false_hit),
1105    "ftb_hit"                      -> PopCount(ftb_hit),
1106    "ftb_new_entry"                -> PopCount(ftb_new_entry),
1107    "ftb_new_entry_only_br"        -> PopCount(ftb_new_entry_only_br),
1108    "ftb_new_entry_only_jmp"       -> PopCount(ftb_new_entry_only_jmp),
1109    "ftb_new_entry_has_br_and_jmp" -> PopCount(ftb_new_entry_has_br_and_jmp),
1110    "ftb_old_entry"                -> PopCount(ftb_old_entry),
1111    "ftb_modified_entry"           -> PopCount(ftb_modified_entry),
1112    "ftb_modified_entry_new_br"    -> PopCount(ftb_modified_entry_new_br),
1113    "ftb_jalr_target_modified"     -> PopCount(ftb_modified_entry_jalr_target_modified),
1114    "ftb_modified_entry_br_full"   -> PopCount(ftb_modified_entry_br_full),
1115    "ftb_modified_entry_always_taken" -> PopCount(ftb_modified_entry_always_taken)
1116  ) ++ ftb_init_entry_len_map ++ ftb_modified_entry_len_map ++ s1_entry_len_map ++
1117  s2_entry_len_map ++ s3_entry_len_map ++
1118  to_ifu_entry_len_map ++ commit_num_inst_map ++ ftq_occupancy_map ++
1119  mispred_stage_map ++ br_mispred_stage_map ++ jalr_mispred_stage_map ++
1120  correct_stage_map ++ br_correct_stage_map ++ jalr_correct_stage_map
1121
1122  for((key, value) <- perfCountsMap) {
1123    XSPerfAccumulate(key, value)
1124  }
1125
1126  // --------------------------- Debug --------------------------------
1127  // XSDebug(enq_fire, p"enq! " + io.fromBpu.resp.bits.toPrintable)
1128  XSDebug(io.toIfu.req.fire, p"fire to ifu " + io.toIfu.req.bits.toPrintable)
1129  XSDebug(do_commit, p"deq! [ptr] $do_commit_ptr\n")
1130  XSDebug(true.B, p"[bpuPtr] $bpuPtr, [ifuPtr] $ifuPtr, [ifuWbPtr] $ifuWbPtr [commPtr] $commPtr\n")
1131  XSDebug(true.B, p"[in] v:${io.fromBpu.resp.valid} r:${io.fromBpu.resp.ready} " +
1132    p"[out] v:${io.toIfu.req.valid} r:${io.toIfu.req.ready}\n")
1133  XSDebug(do_commit, p"[deq info] cfiIndex: $commit_cfi, $commit_pc_bundle, target: ${Hexadecimal(commit_target)}\n")
1134
1135  //   def ubtbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1136  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1137  //       case (((valid, pd), ans), taken) =>
1138  //       Mux(valid && pd.isBr,
1139  //         isWrong ^ Mux(ans.hit.asBool,
1140  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1141  //           !taken),
1142  //         !taken),
1143  //       false.B)
1144  //     }
1145  //   }
1146
1147  //   def btbCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1148  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1149  //       case (((valid, pd), ans), taken) =>
1150  //       Mux(valid && pd.isBr,
1151  //         isWrong ^ Mux(ans.hit.asBool,
1152  //           Mux(ans.taken.asBool, taken && ans.target === commitEntry.target,
1153  //           !taken),
1154  //         !taken),
1155  //       false.B)
1156  //     }
1157  //   }
1158
1159  //   def tageCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1160  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1161  //       case (((valid, pd), ans), taken) =>
1162  //       Mux(valid && pd.isBr,
1163  //         isWrong ^ (ans.taken.asBool === taken),
1164  //       false.B)
1165  //     }
1166  //   }
1167
1168  //   def loopCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1169  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1170  //       case (((valid, pd), ans), taken) =>
1171  //       Mux(valid && (pd.isBr) && ans.hit.asBool,
1172  //         isWrong ^ (!taken),
1173  //           false.B)
1174  //     }
1175  //   }
1176
1177  //   def rasCheck(commit: FtqEntry, predAns: Seq[PredictorAnswer], isWrong: Bool) = {
1178  //     commit.valids.zip(commit.pd).zip(predAns).zip(commit.takens).map {
1179  //       case (((valid, pd), ans), taken) =>
1180  //       Mux(valid && pd.isRet.asBool /*&& taken*/ && ans.hit.asBool,
1181  //         isWrong ^ (ans.target === commitEntry.target),
1182  //           false.B)
1183  //     }
1184  //   }
1185
1186  //   val ubtbRights = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), false.B)
1187  //   val ubtbWrongs = ubtbCheck(commitEntry, commitEntry.metas.map(_.ubtbAns), true.B)
1188  //   // btb and ubtb pred jal and jalr as well
1189  //   val btbRights = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), false.B)
1190  //   val btbWrongs = btbCheck(commitEntry, commitEntry.metas.map(_.btbAns), true.B)
1191  //   val tageRights = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), false.B)
1192  //   val tageWrongs = tageCheck(commitEntry, commitEntry.metas.map(_.tageAns), true.B)
1193
1194  //   val loopRights = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), false.B)
1195  //   val loopWrongs = loopCheck(commitEntry, commitEntry.metas.map(_.loopAns), true.B)
1196
1197  //   val rasRights = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), false.B)
1198  //   val rasWrongs = rasCheck(commitEntry, commitEntry.metas.map(_.rasAns), true.B)
1199
1200  val perfEvents = Seq(
1201    ("bpu_s2_redirect        ", bpu_s2_redirect                                                             ),
1202    ("bpu_s3_redirect        ", bpu_s3_redirect                                                             ),
1203    ("bpu_to_ftq_stall       ", enq.valid && ~enq.ready                                                     ),
1204    ("mispredictRedirect     ", perf_redirect.valid && RedirectLevel.flushAfter === perf_redirect.bits.level),
1205    ("replayRedirect         ", perf_redirect.valid && RedirectLevel.flushItself(perf_redirect.bits.level)  ),
1206    ("predecodeRedirect      ", fromIfuRedirect.valid                                                       ),
1207    ("to_ifu_bubble          ", io.toIfu.req.ready && !io.toIfu.req.valid                                   ),
1208    ("from_bpu_real_bubble   ", !enq.valid && enq.ready && allowBpuIn                                       ),
1209    ("BpInstr                ", PopCount(mbpInstrs)                                                         ),
1210    ("BpBInstr               ", PopCount(mbpBRights | mbpBWrongs)                                           ),
1211    ("BpRight                ", PopCount(mbpRights)                                                         ),
1212    ("BpWrong                ", PopCount(mbpWrongs)                                                         ),
1213    ("BpBRight               ", PopCount(mbpBRights)                                                        ),
1214    ("BpBWrong               ", PopCount(mbpBWrongs)                                                        ),
1215    ("BpJRight               ", PopCount(mbpJRights)                                                        ),
1216    ("BpJWrong               ", PopCount(mbpJWrongs)                                                        ),
1217    ("BpIRight               ", PopCount(mbpIRights)                                                        ),
1218    ("BpIWrong               ", PopCount(mbpIWrongs)                                                        ),
1219    ("BpCRight               ", PopCount(mbpCRights)                                                        ),
1220    ("BpCWrong               ", PopCount(mbpCWrongs)                                                        ),
1221    ("BpRRight               ", PopCount(mbpRRights)                                                        ),
1222    ("BpRWrong               ", PopCount(mbpRWrongs)                                                        ),
1223    ("ftb_false_hit          ", PopCount(ftb_false_hit)                                                     ),
1224    ("ftb_hit                ", PopCount(ftb_hit)                                                           ),
1225  )
1226  generatePerfEvent()
1227}
1228