xref: /XiangShan/src/main/scala/xiangshan/frontend/IBuffer.scala (revision 68de2c3d93763015ac0793019cd4f8dba6f3bbad)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import xiangshan._
23import utils._
24import utility._
25import xiangshan.ExceptionNO._
26
27class IBufPtr(implicit p: Parameters) extends CircularQueuePtr[IBufPtr](
28  p => p(XSCoreParamsKey).IBufSize
29) {
30}
31
32class IBufInBankPtr(implicit p: Parameters) extends CircularQueuePtr[IBufInBankPtr](
33  p => p(XSCoreParamsKey).IBufSize / p(XSCoreParamsKey).IBufNBank
34) {
35}
36
37class IBufBankPtr(implicit p: Parameters) extends CircularQueuePtr[IBufBankPtr](
38  p => p(XSCoreParamsKey).IBufNBank
39) {
40}
41
42class IBufferIO(implicit p: Parameters) extends XSBundle {
43  val flush = Input(Bool())
44  val ControlRedirect = Input(Bool())
45  val ControlBTBMissBubble = Input(Bool())
46  val TAGEMissBubble = Input(Bool())
47  val SCMissBubble = Input(Bool())
48  val ITTAGEMissBubble = Input(Bool())
49  val RASMissBubble = Input(Bool())
50  val MemVioRedirect = Input(Bool())
51  val in = Flipped(DecoupledIO(new FetchToIBuffer))
52  val out = Vec(DecodeWidth, DecoupledIO(new CtrlFlow))
53  val full = Output(Bool())
54  val stallReason = new StallReasonIO(DecodeWidth)
55}
56
57class IBufEntry(implicit p: Parameters) extends XSBundle {
58  val inst = UInt(32.W)
59  val pc = UInt(VAddrBits.W)
60  val foldpc = UInt(MemPredPCWidth.W)
61  val pd = new PreDecodeInfo
62  val pred_taken = Bool()
63  val ftqPtr = new FtqPtr
64  val ftqOffset = UInt(log2Ceil(PredictWidth).W)
65  val ipf = Bool()
66  val acf = Bool()
67  val crossPageIPFFix = Bool()
68  val triggered = new TriggerCf
69
70  def fromFetch(fetch: FetchToIBuffer, i: Int): IBufEntry = {
71    inst   := fetch.instrs(i)
72    pc     := fetch.pc(i)
73    foldpc := fetch.foldpc(i)
74    pd     := fetch.pd(i)
75    pred_taken := fetch.ftqOffset(i).valid
76    ftqPtr := fetch.ftqPtr
77    ftqOffset := fetch.ftqOffset(i).bits
78    ipf := fetch.ipf(i)
79    acf := fetch.acf(i)
80    crossPageIPFFix := fetch.crossPageIPFFix(i)
81    triggered := fetch.triggered(i)
82    this
83  }
84
85  def toCtrlFlow: CtrlFlow = {
86    val cf = Wire(new CtrlFlow)
87    cf.instr := inst
88    cf.pc := pc
89    cf.foldpc := foldpc
90    cf.exceptionVec := 0.U.asTypeOf(ExceptionVec())
91    cf.exceptionVec(instrPageFault) := ipf
92    cf.exceptionVec(instrAccessFault) := acf
93    cf.trigger := triggered
94    cf.pd := pd
95    cf.pred_taken := pred_taken
96    cf.crossPageIPFFix := crossPageIPFFix
97    cf.storeSetHit := DontCare
98    cf.waitForRobIdx := DontCare
99    cf.loadWaitBit := DontCare
100    cf.loadWaitStrict := DontCare
101    cf.ssid := DontCare
102    cf.ftqPtr := ftqPtr
103    cf.ftqOffset := ftqOffset
104    cf
105  }
106}
107
108class IBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper with HasPerfEvents {
109  val io = IO(new IBufferIO)
110
111  // Parameter Check
112  private val bankSize = IBufSize / IBufNBank
113  require(IBufSize % IBufNBank == 0, s"IBufNBank should divide IBufSize, IBufNBank: $IBufNBank, IBufSize: $IBufSize")
114  require(IBufNBank >= DecodeWidth,
115    s"IBufNBank should be equal or larger than DecodeWidth, IBufNBank: $IBufNBank, DecodeWidth: $DecodeWidth")
116
117  // IBuffer is organized as raw registers
118  // This is due to IBuffer is a huge queue, read & write port logic should be precisely controlled
119  //                             . + + E E E - .
120  //                             . + + E E E - .
121  //                             . . + E E E - .
122  //                             . . + E E E E -
123  // As shown above, + means enqueue, - means dequeue, E is current content
124  // When dequeue, read port is organized like a banked FIFO
125  // Dequeue reads no more than 1 entry from each bank sequentially, this can be exploit to reduce area
126  // Enqueue writes cannot benefit from this characteristic unless use a SRAM
127  // For detail see Enqueue and Dequeue below
128  private val ibuf: Vec[IBufEntry] = RegInit(VecInit.fill(IBufSize)(0.U.asTypeOf(new IBufEntry)))
129  private val bankedIBufView: Vec[Vec[IBufEntry]] = VecInit.tabulate(IBufNBank)(
130    bankID => VecInit.tabulate(bankSize)(
131      inBankOffset => ibuf(bankID + inBankOffset * IBufNBank)
132    )
133  )
134
135  // Between Bank
136  private val deqBankPtrVec: Vec[IBufBankPtr] = RegInit(VecInit.tabulate(DecodeWidth)(_.U.asTypeOf(new IBufBankPtr)))
137  private val deqBankPtr: IBufBankPtr = deqBankPtrVec(0)
138  // Inside Bank
139  private val deqInBankPtr: Vec[IBufInBankPtr] = RegInit(VecInit.fill(IBufNBank)(0.U.asTypeOf(new IBufInBankPtr)))
140
141  val deqPtr = RegInit(0.U.asTypeOf(new IBufPtr))
142
143  val enqPtrVec = RegInit(VecInit.tabulate(PredictWidth)(_.U.asTypeOf(new IBufPtr)))
144  val enqPtr = enqPtrVec(0)
145
146  val validEntries = distanceBetween(enqPtr, deqPtr)
147  val allowEnq = RegInit(true.B)
148
149  val numEnq = Mux(io.in.fire, PopCount(io.in.bits.valid), 0.U)
150  val numTryDeq = Mux(validEntries >= DecodeWidth.U, DecodeWidth.U, validEntries)
151  val numDeq = Mux(io.out.head.ready, numTryDeq, 0.U)
152
153  val numAfterEnq = validEntries +& numEnq
154  val nextValidEntries = Mux(io.out(0).ready, numAfterEnq - numTryDeq, numAfterEnq)
155  allowEnq := (IBufSize - PredictWidth).U >= nextValidEntries // Disable when almost full
156
157  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
158  // Enqueue
159  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
160  io.in.ready := allowEnq
161  // Data
162  val enqOffset = VecInit.tabulate(PredictWidth)(i => PopCount(io.in.bits.valid.asBools.take(i)))
163  val enqData = VecInit.tabulate(PredictWidth)(i => Wire(new IBufEntry).fromFetch(io.in.bits, i))
164  ibuf.zipWithIndex.foreach {
165    case (entry, idx) => {
166      // Select
167      val validOH = Range(0, PredictWidth).map {
168        i => io.in.bits.valid(i) &&
169          io.in.bits.enqEnable(i) &&
170          enqPtrVec(enqOffset(i)).value === idx.asUInt
171      } // Should be OneHot
172      val wen = validOH.reduce(_ || _) && io.in.fire && !io.flush
173
174      // Write port
175      // Each IBuffer entry has a PredictWidth -> 1 Mux
176      val writeEntry = Mux1H(validOH, enqData)
177      entry := Mux(wen, writeEntry, entry)
178
179      // Debug Assertion
180      XSError(PopCount(validOH) > 1.asUInt, "validOH is not OneHot")
181    }
182  }
183  // Pointer maintenance
184  when (io.in.fire && !io.flush) {
185    enqPtrVec := VecInit(enqPtrVec.map(_ + PopCount(io.in.bits.enqEnable)))
186  }
187
188  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
189  // Dequeue
190  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
191  val validVec = Mux(validEntries >= DecodeWidth.U,
192    ((1 << DecodeWidth) - 1).U,
193    UIntToMask(validEntries(log2Ceil(DecodeWidth) - 1, 0), DecodeWidth)
194  )
195  // Data
196  // Read port
197  // 2-stage, IBufNBank * (bankSize -> 1) + IBufNBank -> 1
198  // Should be better than IBufSize -> 1 in area, with no significant latency increase
199  private val readStage1: Vec[IBufEntry] = VecInit.tabulate(IBufNBank)(
200    bankID => Mux1H(UIntToOH(deqInBankPtr(bankID).value), bankedIBufView(bankID))
201  )
202  for (i <- 0 until DecodeWidth) {
203    io.out(i).valid := validVec(i)
204    io.out(i).bits := Mux1H(UIntToOH(deqBankPtrVec(i).value), readStage1).toCtrlFlow
205  }
206  // Pointer maintenance
207  deqBankPtrVec := Mux(io.out.head.ready, VecInit(deqBankPtrVec.map(_ + numTryDeq)), deqBankPtrVec)
208  deqPtr := Mux(io.out.head.ready, deqPtr + numTryDeq, deqPtr)
209  deqInBankPtr.zipWithIndex.foreach {
210    case (ptr, idx) => {
211      // validVec[k] == bankValid[deqBankPtr + k]
212      // So bankValid[n] == validVec[n - deqBankPtr]
213      val validIdx = Mux(idx.asUInt >= deqBankPtr.value,
214        idx.asUInt - deqBankPtr.value,
215        ((idx + IBufNBank).asUInt - deqBankPtr.value)(log2Ceil(IBufNBank) - 1, 0)
216      )
217      val bankAdvance = Mux(validIdx >= DecodeWidth.U,
218        false.B,
219        validVec(validIdx(log2Ceil(DecodeWidth) - 1, 0))
220      ) && io.out.head.ready
221      ptr := Mux(bankAdvance , ptr + 1.U, ptr)
222    }
223  }
224
225  // Flush
226  when (io.flush) {
227    allowEnq := true.B
228    enqPtrVec := enqPtrVec.indices.map(_.U.asTypeOf(new IBufPtr))
229    deqBankPtrVec := deqBankPtrVec.indices.map(_.U.asTypeOf(new IBufBankPtr))
230    deqInBankPtr := VecInit.fill(IBufNBank)(0.U.asTypeOf(new IBufInBankPtr))
231    deqPtr := 0.U.asTypeOf(new IBufPtr())
232  }
233  io.full := !allowEnq
234
235  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
236  // TopDown
237  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
238  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
239  topdown_stage := io.in.bits.topdown_info
240  when(io.flush) {
241    when(io.ControlRedirect) {
242      when(io.ControlBTBMissBubble) {
243        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
244      }.elsewhen(io.TAGEMissBubble) {
245        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
246      }.elsewhen(io.SCMissBubble) {
247        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
248      }.elsewhen(io.ITTAGEMissBubble) {
249        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
250      }.elsewhen(io.RASMissBubble) {
251        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
252      }
253    }.elsewhen(io.MemVioRedirect) {
254      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
255    }.otherwise {
256      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
257    }
258  }
259
260
261  val dequeueInsufficient = Wire(Bool())
262  val matchBubble = Wire(UInt(log2Up(TopDownCounters.NumStallReasons.id).W))
263  val deqValidCount = PopCount(validVec.asBools)
264  val deqWasteCount = DecodeWidth.U - deqValidCount
265  dequeueInsufficient := deqValidCount < DecodeWidth.U
266  matchBubble := (TopDownCounters.NumStallReasons.id - 1).U - PriorityEncoder(topdown_stage.reasons.reverse)
267
268  io.stallReason.reason.map(_ := 0.U)
269  for (i <- 0 until DecodeWidth) {
270    when(i.U < deqWasteCount) {
271      io.stallReason.reason(DecodeWidth - i - 1) := matchBubble
272    }
273  }
274
275  when(!(deqWasteCount === DecodeWidth.U || topdown_stage.reasons.asUInt.orR)) {
276    // should set reason for FetchFragmentationStall
277    // topdown_stage.reasons(TopDownCounters.FetchFragmentationStall.id) := true.B
278    for (i <- 0 until DecodeWidth) {
279      when(i.U < deqWasteCount) {
280        io.stallReason.reason(DecodeWidth - i - 1) := TopDownCounters.FetchFragBubble.id.U
281      }
282    }
283  }
284
285  when(io.stallReason.backReason.valid) {
286    io.stallReason.reason.map(_ := io.stallReason.backReason.bits)
287  }
288
289  // Debug info
290  XSError(
291    deqPtr.value =/= deqBankPtr.value + deqInBankPtr(deqBankPtr.value).value * IBufNBank.asUInt,
292    "Dequeue PTR mismatch"
293  )
294  XSDebug(io.flush, "IBuffer Flushed\n")
295
296  when(io.in.fire) {
297    XSDebug("Enque:\n")
298    XSDebug(p"MASK=${Binary(io.in.bits.valid)}\n")
299    for(i <- 0 until PredictWidth){
300      XSDebug(p"PC=${Hexadecimal(io.in.bits.pc(i))} ${Hexadecimal(io.in.bits.instrs(i))}\n")
301    }
302  }
303
304  for (i <- 0 until DecodeWidth) {
305    XSDebug(io.out(i).fire,
306      p"deq: ${Hexadecimal(io.out(i).bits.instr)} PC=${Hexadecimal(io.out(i).bits.pc)}" +
307      p"v=${io.out(i).valid} r=${io.out(i).ready} " +
308      p"excpVec=${Binary(io.out(i).bits.exceptionVec.asUInt)} crossPageIPF=${io.out(i).bits.crossPageIPFFix}\n")
309  }
310
311  XSDebug(p"ValidEntries: ${validEntries}\n")
312  XSDebug(p"EnqNum: ${numEnq}\n")
313  XSDebug(p"DeqNum: ${numDeq}\n")
314
315  val afterInit = RegInit(false.B)
316  val headBubble = RegInit(false.B)
317  when (io.in.fire) { afterInit := true.B }
318  when (io.flush) {
319    headBubble := true.B
320  } .elsewhen(validEntries =/= 0.U) {
321    headBubble := false.B
322  }
323  val instrHungry = afterInit && (validEntries === 0.U) && !headBubble
324
325  QueuePerf(IBufSize, validEntries, !allowEnq)
326  XSPerfAccumulate("flush", io.flush)
327  XSPerfAccumulate("hungry", instrHungry)
328
329  val ibuffer_IDWidth_hvButNotFull = afterInit && (validEntries =/= 0.U) && (validEntries < DecodeWidth.U) && !headBubble
330  XSPerfAccumulate("ibuffer_IDWidth_hvButNotFull", ibuffer_IDWidth_hvButNotFull)
331  /*
332  XSPerfAccumulate("ICacheMissBubble", Mux(matchBubbleVec(TopDownCounters.ICacheMissBubble.id), deqWasteCount, 0.U))
333  XSPerfAccumulate("ITLBMissBubble", Mux(matchBubbleVec(TopDownCounters.ITLBMissBubble.id), deqWasteCount, 0.U))
334  XSPerfAccumulate("ControlRedirectBubble", Mux(matchBubbleVec(TopDownCounters.ControlRedirectBubble.id), deqWasteCount, 0.U))
335  XSPerfAccumulate("MemVioRedirectBubble", Mux(matchBubbleVec(TopDownCounters.MemVioRedirectBubble.id), deqWasteCount, 0.U))
336  XSPerfAccumulate("OtherRedirectBubble", Mux(matchBubbleVec(TopDownCounters.OtherRedirectBubble.id), deqWasteCount, 0.U))
337  XSPerfAccumulate("BTBMissBubble", Mux(matchBubbleVec(TopDownCounters.BTBMissBubble.id), deqWasteCount, 0.U))
338  XSPerfAccumulate("OverrideBubble", Mux(matchBubbleVec(TopDownCounters.OverrideBubble.id), deqWasteCount, 0.U))
339  XSPerfAccumulate("FtqUpdateBubble", Mux(matchBubbleVec(TopDownCounters.FtqUpdateBubble.id), deqWasteCount, 0.U))
340  XSPerfAccumulate("FtqFullStall", Mux(matchBubbleVec(TopDownCounters.FtqFullStall.id), deqWasteCount, 0.U))
341  XSPerfAccumulate("FetchFragmentBubble",
342  Mux(deqWasteCount === DecodeWidth.U || topdown_stage.reasons.asUInt.orR, 0.U, deqWasteCount))
343  XSPerfAccumulate("TAGEMissBubble", Mux(matchBubbleVec(TopDownCounters.TAGEMissBubble.id), deqWasteCount, 0.U))
344  XSPerfAccumulate("SCMissBubble", Mux(matchBubbleVec(TopDownCounters.SCMissBubble.id), deqWasteCount, 0.U))
345  XSPerfAccumulate("ITTAGEMissBubble", Mux(matchBubbleVec(TopDownCounters.ITTAGEMissBubble.id), deqWasteCount, 0.U))
346  XSPerfAccumulate("RASMissBubble", Mux(matchBubbleVec(TopDownCounters.RASMissBubble.id), deqWasteCount, 0.U))
347  */
348
349  val perfEvents = Seq(
350    ("IBuffer_Flushed  ", io.flush                                                                     ),
351    ("IBuffer_hungry   ", instrHungry                                                                  ),
352    ("IBuffer_1_4_valid", (validEntries >  (0*(IBufSize/4)).U) & (validEntries < (1*(IBufSize/4)).U)   ),
353    ("IBuffer_2_4_valid", (validEntries >= (1*(IBufSize/4)).U) & (validEntries < (2*(IBufSize/4)).U)   ),
354    ("IBuffer_3_4_valid", (validEntries >= (2*(IBufSize/4)).U) & (validEntries < (3*(IBufSize/4)).U)   ),
355    ("IBuffer_4_4_valid", (validEntries >= (3*(IBufSize/4)).U) & (validEntries < (4*(IBufSize/4)).U)   ),
356    ("IBuffer_full     ",  validEntries.andR                                                           ),
357    ("Front_Bubble     ", PopCount((0 until DecodeWidth).map(i => io.out(i).ready && !io.out(i).valid)))
358  )
359  generatePerfEvent()
360}
361