xref: /XiangShan/src/main/scala/xiangshan/frontend/IBuffer.scala (revision 45f43e6e5f88874a7573ff096d1e5c2855bd16c7)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import xiangshan._
23import utils._
24import utility._
25import xiangshan.ExceptionNO._
26
27class IBufPtr(implicit p: Parameters) extends CircularQueuePtr[IBufPtr](
28  p => p(XSCoreParamsKey).IBufSize
29) {
30}
31
32class IBufInBankPtr(implicit p: Parameters) extends CircularQueuePtr[IBufInBankPtr](
33  p => p(XSCoreParamsKey).IBufSize / p(XSCoreParamsKey).IBufNBank
34) {
35}
36
37class IBufBankPtr(implicit p: Parameters) extends CircularQueuePtr[IBufBankPtr](
38  p => p(XSCoreParamsKey).IBufNBank
39) {
40}
41
42class IBufferIO(implicit p: Parameters) extends XSBundle {
43  val flush = Input(Bool())
44  val ControlRedirect = Input(Bool())
45  val ControlBTBMissBubble = Input(Bool())
46  val TAGEMissBubble = Input(Bool())
47  val SCMissBubble = Input(Bool())
48  val ITTAGEMissBubble = Input(Bool())
49  val RASMissBubble = Input(Bool())
50  val MemVioRedirect = Input(Bool())
51  val in = Flipped(DecoupledIO(new FetchToIBuffer))
52  val out = Vec(DecodeWidth, DecoupledIO(new CtrlFlow))
53  val full = Output(Bool())
54  val stallReason = new StallReasonIO(DecodeWidth)
55}
56
57class IBufEntry(implicit p: Parameters) extends XSBundle {
58  val inst = UInt(32.W)
59  val pc = UInt(VAddrBits.W)
60  val foldpc = UInt(MemPredPCWidth.W)
61  val pd = new PreDecodeInfo
62  val pred_taken = Bool()
63  val ftqPtr = new FtqPtr
64  val ftqOffset = UInt(log2Ceil(PredictWidth).W)
65  val ipf = Bool()
66  val acf = Bool()
67  val crossPageIPFFix = Bool()
68  val triggered = new TriggerCf
69
70  def fromFetch(fetch: FetchToIBuffer, i: Int): IBufEntry = {
71    inst   := fetch.instrs(i)
72    pc     := fetch.pc(i)
73    foldpc := fetch.foldpc(i)
74    pd     := fetch.pd(i)
75    pred_taken := fetch.ftqOffset(i).valid
76    ftqPtr := fetch.ftqPtr
77    ftqOffset := fetch.ftqOffset(i).bits
78    ipf := fetch.ipf(i)
79    acf := fetch.acf(i)
80    crossPageIPFFix := fetch.crossPageIPFFix(i)
81    triggered := fetch.triggered(i)
82    this
83  }
84
85  def toCtrlFlow: CtrlFlow = {
86    val cf = Wire(new CtrlFlow)
87    cf.instr := inst
88    cf.pc := pc
89    cf.foldpc := foldpc
90    cf.exceptionVec := 0.U.asTypeOf(ExceptionVec())
91    cf.exceptionVec(instrPageFault) := ipf
92    cf.exceptionVec(instrAccessFault) := acf
93    cf.trigger := triggered
94    cf.pd := pd
95    cf.pred_taken := pred_taken
96    cf.crossPageIPFFix := crossPageIPFFix
97    cf.storeSetHit := DontCare
98    cf.waitForRobIdx := DontCare
99    cf.loadWaitBit := DontCare
100    cf.loadWaitStrict := DontCare
101    cf.ssid := DontCare
102    cf.ftqPtr := ftqPtr
103    cf.ftqOffset := ftqOffset
104    cf
105  }
106}
107
108class IBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper with HasPerfEvents {
109  val io = IO(new IBufferIO)
110
111  // Parameter Check
112  private val bankSize = IBufSize / IBufNBank
113  require(IBufSize % IBufNBank == 0, s"IBufNBank should divide IBufSize, IBufNBank: $IBufNBank, IBufSize: $IBufSize")
114  require(IBufNBank >= DecodeWidth,
115    s"IBufNBank should be equal or larger than DecodeWidth, IBufNBank: $IBufNBank, DecodeWidth: $DecodeWidth")
116
117  // IBuffer is organized as raw registers
118  // This is due to IBuffer is a huge queue, read & write port logic should be precisely controlled
119  //                             . + + E E E - .
120  //                             . + + E E E - .
121  //                             . . + E E E - .
122  //                             . . + E E E E -
123  // As shown above, + means enqueue, - means dequeue, E is current content
124  // When dequeue, read port is organized like a banked FIFO
125  // Dequeue reads no more than 1 entry from each bank sequentially, this can be exploit to reduce area
126  // Enqueue writes cannot benefit from this characteristic unless use a SRAM
127  // For detail see Enqueue and Dequeue below
128  private val ibuf: Vec[IBufEntry] = RegInit(VecInit.fill(IBufSize)(0.U.asTypeOf(new IBufEntry)))
129  private val bankedIBufView: Vec[Vec[IBufEntry]] = VecInit.tabulate(IBufNBank)(
130    bankID => VecInit.tabulate(bankSize)(
131      inBankOffset => ibuf(bankID + inBankOffset * IBufNBank)
132    )
133  )
134
135
136  // Bypass wire
137  private val bypassEntries = WireDefault(VecInit.fill(DecodeWidth)(0.U.asTypeOf(Valid(new IBufEntry))))
138  // Normal read wire
139  private val deqEntries = WireDefault(VecInit.fill(DecodeWidth)(0.U.asTypeOf(Valid(new IBufEntry))))
140  // Output register
141  private val outputEntries = RegInit(VecInit.fill(DecodeWidth)(0.U.asTypeOf(Valid(new IBufEntry))))
142
143  // Between Bank
144  private val deqBankPtrVec: Vec[IBufBankPtr] = RegInit(VecInit.tabulate(DecodeWidth)(_.U.asTypeOf(new IBufBankPtr)))
145  private val deqBankPtr: IBufBankPtr = deqBankPtrVec(0)
146  // Inside Bank
147  private val deqInBankPtr: Vec[IBufInBankPtr] = RegInit(VecInit.fill(IBufNBank)(0.U.asTypeOf(new IBufInBankPtr)))
148
149  val deqPtr = RegInit(0.U.asTypeOf(new IBufPtr))
150
151  val enqPtrVec = RegInit(VecInit.tabulate(PredictWidth)(_.U.asTypeOf(new IBufPtr)))
152  val enqPtr = enqPtrVec(0)
153
154  val validEntries = distanceBetween(enqPtr, deqPtr)
155  val allowEnq = RegInit(true.B)
156  val useBypass = enqPtr === deqPtr && io.out.head.ready // empty and last cycle fire
157
158  val numFromFetch = PopCount(io.in.bits.enqEnable)
159  val numTryEnq = WireDefault(0.U)
160  val numEnq = Mux(io.in.fire, numTryEnq, 0.U)
161  val numBypass = PopCount(bypassEntries.map(_.valid))
162  val numTryDeq = Mux(validEntries >= DecodeWidth.U, DecodeWidth.U, validEntries)
163  val numDeq = Mux(io.out.head.ready, numTryDeq, 0.U)
164  val numAfterEnq = validEntries +& numEnq
165
166  val nextValidEntries = Mux(io.out(0).ready, numAfterEnq - numTryDeq, numAfterEnq)
167  allowEnq := (IBufSize - PredictWidth).U >= nextValidEntries // Disable when almost full
168
169  val enqOffset = VecInit.tabulate(PredictWidth)(i => PopCount(io.in.bits.valid.asBools.take(i)))
170  val enqData = VecInit.tabulate(PredictWidth)(i => Wire(new IBufEntry).fromFetch(io.in.bits, i))
171
172  // when using bypass, bypassed entries do not enqueue
173  when(useBypass) {
174    when(numFromFetch >= DecodeWidth.U) {
175      numTryEnq := numFromFetch - DecodeWidth.U
176    } .otherwise {
177      numTryEnq := 0.U
178    }
179  } .otherwise {
180    numTryEnq := numFromFetch
181  }
182
183  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
184  // Bypass
185  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
186  bypassEntries.zipWithIndex.foreach {
187    case (entry, idx) =>
188      // Select
189      val validOH = Range(0, PredictWidth).map {
190        i =>
191          io.in.bits.valid(i) &&
192            io.in.bits.enqEnable(i) &&
193            enqOffset(i) === idx.asUInt
194      } // Should be OneHot
195      entry.valid := validOH.reduce(_ || _) && io.in.fire && !io.flush
196      entry.bits := Mux1H(validOH, enqData)
197
198      // Debug Assertion
199      XSError(PopCount(validOH) > 1.asUInt, "validOH is not OneHot")
200  }
201
202  // => Decode Output
203  // clean register output
204  io.out zip outputEntries foreach {
205    case (io, reg) =>
206      io.valid := reg.valid
207      io.bits := reg.bits.toCtrlFlow
208  }
209  outputEntries zip bypassEntries zip deqEntries foreach {
210    case ((out, bypass), deq) =>
211      when(io.out.head.ready) {
212        out := deq
213        when(useBypass) {
214          out := bypass
215        }
216      }
217  }
218
219  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
220  // Enqueue
221  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
222  io.in.ready := allowEnq
223  // Data
224  ibuf.zipWithIndex.foreach {
225    case (entry, idx) => {
226      // Select
227      val validOH = Range(0, PredictWidth).map {
228        i =>
229          val useBypassMatch = enqOffset(i) >= DecodeWidth.U &&
230            enqPtrVec(enqOffset(i) - DecodeWidth.U).value === idx.asUInt
231          val normalMatch = enqPtrVec(enqOffset(i)).value === idx.asUInt
232          val m = Mux(useBypass, useBypassMatch, normalMatch) // when using bypass, bypassed entries do not enqueue
233
234          io.in.bits.valid(i) && io.in.bits.enqEnable(i) && m
235      } // Should be OneHot
236      val wen = validOH.reduce(_ || _) && io.in.fire && !io.flush
237
238      // Write port
239      // Each IBuffer entry has a PredictWidth -> 1 Mux
240      val writeEntry = Mux1H(validOH, enqData)
241      entry := Mux(wen, writeEntry, entry)
242
243      // Debug Assertion
244      XSError(PopCount(validOH) > 1.asUInt, "validOH is not OneHot")
245    }
246  }
247  // Pointer maintenance
248  when (io.in.fire && !io.flush) {
249    enqPtrVec := VecInit(enqPtrVec.map(_ + numTryEnq))
250  }
251
252  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
253  // Dequeue
254  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
255  val validVec = Mux(validEntries >= DecodeWidth.U,
256    ((1 << DecodeWidth) - 1).U,
257    UIntToMask(validEntries(log2Ceil(DecodeWidth) - 1, 0), DecodeWidth)
258  )
259  // Data
260  // Read port
261  // 2-stage, IBufNBank * (bankSize -> 1) + IBufNBank -> 1
262  // Should be better than IBufSize -> 1 in area, with no significant latency increase
263  private val readStage1: Vec[IBufEntry] = VecInit.tabulate(IBufNBank)(
264    bankID => Mux1H(UIntToOH(deqInBankPtr(bankID).value), bankedIBufView(bankID))
265  )
266  for (i <- 0 until DecodeWidth) {
267    deqEntries(i).valid := validVec(i)
268    deqEntries(i).bits := Mux1H(UIntToOH(deqBankPtrVec(i).value), readStage1)
269  }
270  // Pointer maintenance
271  deqBankPtrVec := Mux(io.out.head.ready, VecInit(deqBankPtrVec.map(_ + numTryDeq)), deqBankPtrVec)
272  deqPtr := Mux(io.out.head.ready, deqPtr + numTryDeq, deqPtr)
273  deqInBankPtr.zipWithIndex.foreach {
274    case (ptr, idx) => {
275      // validVec[k] == bankValid[deqBankPtr + k]
276      // So bankValid[n] == validVec[n - deqBankPtr]
277      val validIdx = Mux(idx.asUInt >= deqBankPtr.value,
278        idx.asUInt - deqBankPtr.value,
279        ((idx + IBufNBank).asUInt - deqBankPtr.value)(log2Ceil(IBufNBank) - 1, 0)
280      )
281      val bankAdvance = Mux(validIdx >= DecodeWidth.U,
282        false.B,
283        validVec(validIdx(log2Ceil(DecodeWidth) - 1, 0))
284      ) && io.out.head.ready
285      ptr := Mux(bankAdvance , ptr + 1.U, ptr)
286    }
287  }
288
289  // Flush
290  when (io.flush) {
291    allowEnq := true.B
292    enqPtrVec := enqPtrVec.indices.map(_.U.asTypeOf(new IBufPtr))
293    deqBankPtrVec := deqBankPtrVec.indices.map(_.U.asTypeOf(new IBufBankPtr))
294    deqInBankPtr := VecInit.fill(IBufNBank)(0.U.asTypeOf(new IBufInBankPtr))
295    deqPtr := 0.U.asTypeOf(new IBufPtr())
296    outputEntries.foreach(_.valid := false.B)
297  }
298  io.full := !allowEnq
299
300  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
301  // TopDown
302  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
303  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
304  topdown_stage := io.in.bits.topdown_info
305  when(io.flush) {
306    when(io.ControlRedirect) {
307      when(io.ControlBTBMissBubble) {
308        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
309      }.elsewhen(io.TAGEMissBubble) {
310        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
311      }.elsewhen(io.SCMissBubble) {
312        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
313      }.elsewhen(io.ITTAGEMissBubble) {
314        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
315      }.elsewhen(io.RASMissBubble) {
316        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
317      }
318    }.elsewhen(io.MemVioRedirect) {
319      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
320    }.otherwise {
321      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
322    }
323  }
324
325
326  val dequeueInsufficient = Wire(Bool())
327  val matchBubble = Wire(UInt(log2Up(TopDownCounters.NumStallReasons.id).W))
328  val deqValidCount = PopCount(validVec.asBools)
329  val deqWasteCount = DecodeWidth.U - deqValidCount
330  dequeueInsufficient := deqValidCount < DecodeWidth.U
331  matchBubble := (TopDownCounters.NumStallReasons.id - 1).U - PriorityEncoder(topdown_stage.reasons.reverse)
332
333  io.stallReason.reason.map(_ := 0.U)
334  for (i <- 0 until DecodeWidth) {
335    when(i.U < deqWasteCount) {
336      io.stallReason.reason(DecodeWidth - i - 1) := matchBubble
337    }
338  }
339
340  when(!(deqWasteCount === DecodeWidth.U || topdown_stage.reasons.asUInt.orR)) {
341    // should set reason for FetchFragmentationStall
342    // topdown_stage.reasons(TopDownCounters.FetchFragmentationStall.id) := true.B
343    for (i <- 0 until DecodeWidth) {
344      when(i.U < deqWasteCount) {
345        io.stallReason.reason(DecodeWidth - i - 1) := TopDownCounters.FetchFragBubble.id.U
346      }
347    }
348  }
349
350  when(io.stallReason.backReason.valid) {
351    io.stallReason.reason.map(_ := io.stallReason.backReason.bits)
352  }
353
354  // Debug info
355  XSError(
356    deqPtr.value =/= deqBankPtr.value + deqInBankPtr(deqBankPtr.value).value * IBufNBank.asUInt,
357    "Dequeue PTR mismatch"
358  )
359  XSDebug(io.flush, "IBuffer Flushed\n")
360
361  when(io.in.fire) {
362    XSDebug("Enque:\n")
363    XSDebug(p"MASK=${Binary(io.in.bits.valid)}\n")
364    for(i <- 0 until PredictWidth){
365      XSDebug(p"PC=${Hexadecimal(io.in.bits.pc(i))} ${Hexadecimal(io.in.bits.instrs(i))}\n")
366    }
367  }
368
369  for (i <- 0 until DecodeWidth) {
370    XSDebug(io.out(i).fire,
371      p"deq: ${Hexadecimal(io.out(i).bits.instr)} PC=${Hexadecimal(io.out(i).bits.pc)}" +
372      p"v=${io.out(i).valid} r=${io.out(i).ready} " +
373      p"excpVec=${Binary(io.out(i).bits.exceptionVec.asUInt)} crossPageIPF=${io.out(i).bits.crossPageIPFFix}\n")
374  }
375
376  XSDebug(p"ValidEntries: ${validEntries}\n")
377  XSDebug(p"EnqNum: ${numEnq}\n")
378  XSDebug(p"DeqNum: ${numDeq}\n")
379
380  val afterInit = RegInit(false.B)
381  val headBubble = RegInit(false.B)
382  when (io.in.fire) { afterInit := true.B }
383  when (io.flush) {
384    headBubble := true.B
385  } .elsewhen(validEntries =/= 0.U) {
386    headBubble := false.B
387  }
388  val instrHungry = afterInit && (validEntries === 0.U) && !headBubble
389
390  QueuePerf(IBufSize, validEntries, !allowEnq)
391  XSPerfAccumulate("flush", io.flush)
392  XSPerfAccumulate("hungry", instrHungry)
393
394  val ibuffer_IDWidth_hvButNotFull = afterInit && (validEntries =/= 0.U) && (validEntries < DecodeWidth.U) && !headBubble
395  XSPerfAccumulate("ibuffer_IDWidth_hvButNotFull", ibuffer_IDWidth_hvButNotFull)
396  /*
397  XSPerfAccumulate("ICacheMissBubble", Mux(matchBubbleVec(TopDownCounters.ICacheMissBubble.id), deqWasteCount, 0.U))
398  XSPerfAccumulate("ITLBMissBubble", Mux(matchBubbleVec(TopDownCounters.ITLBMissBubble.id), deqWasteCount, 0.U))
399  XSPerfAccumulate("ControlRedirectBubble", Mux(matchBubbleVec(TopDownCounters.ControlRedirectBubble.id), deqWasteCount, 0.U))
400  XSPerfAccumulate("MemVioRedirectBubble", Mux(matchBubbleVec(TopDownCounters.MemVioRedirectBubble.id), deqWasteCount, 0.U))
401  XSPerfAccumulate("OtherRedirectBubble", Mux(matchBubbleVec(TopDownCounters.OtherRedirectBubble.id), deqWasteCount, 0.U))
402  XSPerfAccumulate("BTBMissBubble", Mux(matchBubbleVec(TopDownCounters.BTBMissBubble.id), deqWasteCount, 0.U))
403  XSPerfAccumulate("OverrideBubble", Mux(matchBubbleVec(TopDownCounters.OverrideBubble.id), deqWasteCount, 0.U))
404  XSPerfAccumulate("FtqUpdateBubble", Mux(matchBubbleVec(TopDownCounters.FtqUpdateBubble.id), deqWasteCount, 0.U))
405  XSPerfAccumulate("FtqFullStall", Mux(matchBubbleVec(TopDownCounters.FtqFullStall.id), deqWasteCount, 0.U))
406  XSPerfAccumulate("FetchFragmentBubble",
407  Mux(deqWasteCount === DecodeWidth.U || topdown_stage.reasons.asUInt.orR, 0.U, deqWasteCount))
408  XSPerfAccumulate("TAGEMissBubble", Mux(matchBubbleVec(TopDownCounters.TAGEMissBubble.id), deqWasteCount, 0.U))
409  XSPerfAccumulate("SCMissBubble", Mux(matchBubbleVec(TopDownCounters.SCMissBubble.id), deqWasteCount, 0.U))
410  XSPerfAccumulate("ITTAGEMissBubble", Mux(matchBubbleVec(TopDownCounters.ITTAGEMissBubble.id), deqWasteCount, 0.U))
411  XSPerfAccumulate("RASMissBubble", Mux(matchBubbleVec(TopDownCounters.RASMissBubble.id), deqWasteCount, 0.U))
412  */
413
414  val perfEvents = Seq(
415    ("IBuffer_Flushed  ", io.flush                                                                     ),
416    ("IBuffer_hungry   ", instrHungry                                                                  ),
417    ("IBuffer_1_4_valid", (validEntries >  (0*(IBufSize/4)).U) & (validEntries < (1*(IBufSize/4)).U)   ),
418    ("IBuffer_2_4_valid", (validEntries >= (1*(IBufSize/4)).U) & (validEntries < (2*(IBufSize/4)).U)   ),
419    ("IBuffer_3_4_valid", (validEntries >= (2*(IBufSize/4)).U) & (validEntries < (3*(IBufSize/4)).U)   ),
420    ("IBuffer_4_4_valid", (validEntries >= (3*(IBufSize/4)).U) & (validEntries < (4*(IBufSize/4)).U)   ),
421    ("IBuffer_full     ",  validEntries.andR                                                           ),
422    ("Front_Bubble     ", PopCount((0 until DecodeWidth).map(i => io.out(i).ready && !io.out(i).valid)))
423  )
424  generatePerfEvent()
425}
426