xref: /XiangShan/src/main/scala/xiangshan/frontend/IBuffer.scala (revision e4d4d30585412eb8ac83b5c75599a348356342a2)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import xiangshan._
23import utils._
24import utility._
25import xiangshan.ExceptionNO._
26
27class IBufPtr(implicit p: Parameters) extends CircularQueuePtr[IBufPtr](
28  p => p(XSCoreParamsKey).IBufSize
29) {
30}
31
32class IBufInBankPtr(implicit p: Parameters) extends CircularQueuePtr[IBufInBankPtr](
33  p => p(XSCoreParamsKey).IBufSize / p(XSCoreParamsKey).IBufNBank
34) {
35}
36
37class IBufBankPtr(implicit p: Parameters) extends CircularQueuePtr[IBufBankPtr](
38  p => p(XSCoreParamsKey).IBufNBank
39) {
40}
41
42class IBufferIO(implicit p: Parameters) extends XSBundle {
43  val flush = Input(Bool())
44  val ControlRedirect = Input(Bool())
45  val ControlBTBMissBubble = Input(Bool())
46  val TAGEMissBubble = Input(Bool())
47  val SCMissBubble = Input(Bool())
48  val ITTAGEMissBubble = Input(Bool())
49  val RASMissBubble = Input(Bool())
50  val MemVioRedirect = Input(Bool())
51  val in = Flipped(DecoupledIO(new FetchToIBuffer))
52  val out = Vec(DecodeWidth, DecoupledIO(new CtrlFlow))
53  val full = Output(Bool())
54  val stallReason = new StallReasonIO(DecodeWidth)
55}
56
57class IBufEntry(implicit p: Parameters) extends XSBundle {
58  val inst = UInt(32.W)
59  val pc = UInt(VAddrBits.W)
60  val foldpc = UInt(MemPredPCWidth.W)
61  val pd = new PreDecodeInfo
62  val pred_taken = Bool()
63  val ftqPtr = new FtqPtr
64  val ftqOffset = UInt(log2Ceil(PredictWidth).W)
65  val ipf = Bool()
66  val acf = Bool()
67  val crossPageIPFFix = Bool()
68  val triggered = new TriggerCf
69
70  def fromFetch(fetch: FetchToIBuffer, i: Int): IBufEntry = {
71    inst   := fetch.instrs(i)
72    pc     := fetch.pc(i)
73    foldpc := fetch.foldpc(i)
74    pd     := fetch.pd(i)
75    pred_taken := fetch.ftqOffset(i).valid
76    ftqPtr := fetch.ftqPtr
77    ftqOffset := fetch.ftqOffset(i).bits
78    ipf := fetch.ipf(i)
79    acf := fetch.acf(i)
80    crossPageIPFFix := fetch.crossPageIPFFix(i)
81    triggered := fetch.triggered(i)
82    this
83  }
84
85  def toCtrlFlow: CtrlFlow = {
86    val cf = Wire(new CtrlFlow)
87    cf.instr := inst
88    cf.pc := pc
89    cf.foldpc := foldpc
90    cf.exceptionVec := 0.U.asTypeOf(ExceptionVec())
91    cf.exceptionVec(instrPageFault) := ipf
92    cf.exceptionVec(instrAccessFault) := acf
93    cf.trigger := triggered
94    cf.pd := pd
95    cf.pred_taken := pred_taken
96    cf.crossPageIPFFix := crossPageIPFFix
97    cf.storeSetHit := DontCare
98    cf.waitForRobIdx := DontCare
99    cf.loadWaitBit := DontCare
100    cf.loadWaitStrict := DontCare
101    cf.ssid := DontCare
102    cf.ftqPtr := ftqPtr
103    cf.ftqOffset := ftqOffset
104    cf
105  }
106}
107
108class IBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper with HasPerfEvents {
109  val io = IO(new IBufferIO)
110
111  // Parameter Check
112  private val bankSize = IBufSize / IBufNBank
113  require(IBufSize % IBufNBank == 0, s"IBufNBank should divide IBufSize, IBufNBank: $IBufNBank, IBufSize: $IBufSize")
114  require(IBufNBank >= DecodeWidth,
115    s"IBufNBank should be equal or larger than DecodeWidth, IBufNBank: $IBufNBank, DecodeWidth: $DecodeWidth")
116
117  // IBuffer is organized as raw registers
118  // This is due to IBuffer is a huge queue, read & write port logic should be precisely controlled
119  //                             . + + E E E - .
120  //                             . + + E E E - .
121  //                             . . + E E E - .
122  //                             . . + E E E E -
123  // As shown above, + means enqueue, - means dequeue, E is current content
124  // When dequeue, read port is organized like a banked FIFO
125  // Dequeue reads no more than 1 entry from each bank sequentially, this can be exploit to reduce area
126  // Enqueue writes cannot benefit from this characteristic unless use a SRAM
127  // For detail see Enqueue and Dequeue below
128  private val ibuf: Vec[IBufEntry] = RegInit(VecInit.fill(IBufSize)(0.U.asTypeOf(new IBufEntry)))
129  private val bankedIBufView: Vec[Vec[IBufEntry]] = VecInit.tabulate(IBufNBank)(
130    bankID => VecInit.tabulate(bankSize)(
131      inBankOffset => ibuf(bankID + inBankOffset * IBufNBank)
132    )
133  )
134
135  // Between Bank
136  private val deqBankPtrVec: Vec[IBufBankPtr] = RegInit(VecInit.tabulate(DecodeWidth)(_.U.asTypeOf(new IBufBankPtr)))
137  private val deqBankPtr: IBufBankPtr = deqBankPtrVec(0)
138  // Inside Bank
139  private val deqInBankPtr: Vec[IBufInBankPtr] = RegInit(VecInit.fill(IBufNBank)(0.U.asTypeOf(new IBufInBankPtr)))
140
141  val deqPtr = RegInit(0.U.asTypeOf(new IBufPtr))
142
143  val enqPtrVec = RegInit(VecInit.tabulate(PredictWidth)(_.U.asTypeOf(new IBufPtr)))
144  val enqPtr = enqPtrVec(0)
145
146  val validEntries = distanceBetween(enqPtr, deqPtr)
147  val allowEnq = RegInit(true.B)
148
149  val numEnq = Mux(io.in.fire, PopCount(io.in.bits.valid), 0.U)
150//  val numTryDeq = Mux(validEntries >= DecodeWidth.U, DecodeWidth.U, validEntries)
151  val numTryDeq = PopCount(io.out.map(_.fire))
152  val numDeq = Mux(io.out.head.ready, numTryDeq, 0.U)
153
154  val numAfterEnq = validEntries +& numEnq
155  val nextValidEntries = Mux(io.out(0).ready, numAfterEnq - numTryDeq, numAfterEnq)
156  allowEnq := (IBufSize - PredictWidth).U >= nextValidEntries // Disable when almost full
157
158  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
159  // Enqueue
160  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
161  io.in.ready := allowEnq
162  // Data
163  val enqOffset = VecInit.tabulate(PredictWidth)(i => PopCount(io.in.bits.valid.asBools.take(i)))
164  val enqData = VecInit.tabulate(PredictWidth)(i => Wire(new IBufEntry).fromFetch(io.in.bits, i))
165  ibuf.zipWithIndex.foreach {
166    case (entry, idx) => {
167      // Select
168      val validOH = Range(0, PredictWidth).map {
169        i => io.in.bits.valid(i) &&
170          io.in.bits.enqEnable(i) &&
171          enqPtrVec(enqOffset(i)).value === idx.asUInt
172      } // Should be OneHot
173      val wen = validOH.reduce(_ || _) && io.in.fire && !io.flush
174
175      // Write port
176      // Each IBuffer entry has a PredictWidth -> 1 Mux
177      val writeEntry = Mux1H(validOH, enqData)
178      entry := Mux(wen, writeEntry, entry)
179
180      // Debug Assertion
181      XSError(PopCount(validOH) > 1.asUInt, "validOH is not OneHot")
182    }
183  }
184  // Pointer maintenance
185  when (io.in.fire && !io.flush) {
186    enqPtrVec := VecInit(enqPtrVec.map(_ + PopCount(io.in.bits.enqEnable)))
187  }
188
189  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
190  // Dequeue
191  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
192  val validVec = Mux(validEntries >= DecodeWidth.U,
193    ((1 << DecodeWidth) - 1).U,
194    UIntToMask(validEntries(log2Ceil(DecodeWidth) - 1, 0), DecodeWidth)
195  )
196  // Data
197  // Read port
198  // 2-stage, IBufNBank * (bankSize -> 1) + IBufNBank -> 1
199  // Should be better than IBufSize -> 1 in area, with no significant latency increase
200  private val readStage1: Vec[IBufEntry] = VecInit.tabulate(IBufNBank)(
201    bankID => Mux1H(UIntToOH(deqInBankPtr(bankID).value), bankedIBufView(bankID))
202  )
203  for (i <- 0 until DecodeWidth) {
204    io.out(i).valid := validVec(i)
205    io.out(i).bits := Mux1H(UIntToOH(deqBankPtrVec(i).value), readStage1).toCtrlFlow
206  }
207  // Pointer maintenance
208  deqBankPtrVec := Mux(io.out.head.ready, VecInit(deqBankPtrVec.map(_ + numTryDeq)), deqBankPtrVec)
209  deqPtr := Mux(io.out.head.ready, deqPtr + numTryDeq, deqPtr)
210  deqInBankPtr.zipWithIndex.foreach {
211    case (ptr, idx) => {
212      // validVec[k] == bankValid[deqBankPtr + k]
213      // So bankValid[n] == validVec[n - deqBankPtr]
214      val validIdx = Mux(idx.asUInt >= deqBankPtr.value,
215        idx.asUInt - deqBankPtr.value,
216        ((idx + IBufNBank).asUInt - deqBankPtr.value)(log2Ceil(IBufNBank) - 1, 0)
217      )
218      val bankAdvance = Mux(validIdx >= DecodeWidth.U,
219        false.B,
220        validVec(validIdx(log2Ceil(DecodeWidth) - 1, 0))
221      ) && io.out.head.ready
222      ptr := Mux(bankAdvance , ptr + 1.U, ptr)
223    }
224  }
225
226  // Flush
227  when (io.flush) {
228    allowEnq := true.B
229    enqPtrVec := enqPtrVec.indices.map(_.U.asTypeOf(new IBufPtr))
230    deqBankPtrVec := deqBankPtrVec.indices.map(_.U.asTypeOf(new IBufBankPtr))
231    deqInBankPtr := VecInit.fill(IBufNBank)(0.U.asTypeOf(new IBufInBankPtr))
232    deqPtr := 0.U.asTypeOf(new IBufPtr())
233  }
234  io.full := !allowEnq
235
236  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
237  // TopDown
238  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
239  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
240  topdown_stage := io.in.bits.topdown_info
241  when(io.flush) {
242    when(io.ControlRedirect) {
243      when(io.ControlBTBMissBubble) {
244        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
245      }.elsewhen(io.TAGEMissBubble) {
246        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
247      }.elsewhen(io.SCMissBubble) {
248        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
249      }.elsewhen(io.ITTAGEMissBubble) {
250        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
251      }.elsewhen(io.RASMissBubble) {
252        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
253      }
254    }.elsewhen(io.MemVioRedirect) {
255      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
256    }.otherwise {
257      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
258    }
259  }
260
261
262  val dequeueInsufficient = Wire(Bool())
263  val matchBubble = Wire(UInt(log2Up(TopDownCounters.NumStallReasons.id).W))
264  val deqValidCount = PopCount(validVec.asBools)
265  val deqWasteCount = DecodeWidth.U - deqValidCount
266  dequeueInsufficient := deqValidCount < DecodeWidth.U
267  matchBubble := (TopDownCounters.NumStallReasons.id - 1).U - PriorityEncoder(topdown_stage.reasons.reverse)
268
269  io.stallReason.reason.map(_ := 0.U)
270  for (i <- 0 until DecodeWidth) {
271    when(i.U < deqWasteCount) {
272      io.stallReason.reason(DecodeWidth - i - 1) := matchBubble
273    }
274  }
275
276  when(!(deqWasteCount === DecodeWidth.U || topdown_stage.reasons.asUInt.orR)) {
277    // should set reason for FetchFragmentationStall
278    // topdown_stage.reasons(TopDownCounters.FetchFragmentationStall.id) := true.B
279    for (i <- 0 until DecodeWidth) {
280      when(i.U < deqWasteCount) {
281        io.stallReason.reason(DecodeWidth - i - 1) := TopDownCounters.FetchFragBubble.id.U
282      }
283    }
284  }
285
286  when(io.stallReason.backReason.valid) {
287    io.stallReason.reason.map(_ := io.stallReason.backReason.bits)
288  }
289
290  // Debug info
291  XSError(
292    deqPtr.value =/= deqBankPtr.value + deqInBankPtr(deqBankPtr.value).value * IBufNBank.asUInt,
293    "Dequeue PTR mismatch"
294  )
295  XSDebug(io.flush, "IBuffer Flushed\n")
296
297  when(io.in.fire) {
298    XSDebug("Enque:\n")
299    XSDebug(p"MASK=${Binary(io.in.bits.valid)}\n")
300    for(i <- 0 until PredictWidth){
301      XSDebug(p"PC=${Hexadecimal(io.in.bits.pc(i))} ${Hexadecimal(io.in.bits.instrs(i))}\n")
302    }
303  }
304
305  for (i <- 0 until DecodeWidth) {
306    XSDebug(io.out(i).fire,
307      p"deq: ${Hexadecimal(io.out(i).bits.instr)} PC=${Hexadecimal(io.out(i).bits.pc)}" +
308      p"v=${io.out(i).valid} r=${io.out(i).ready} " +
309      p"excpVec=${Binary(io.out(i).bits.exceptionVec.asUInt)} crossPageIPF=${io.out(i).bits.crossPageIPFFix}\n")
310  }
311
312  XSDebug(p"ValidEntries: ${validEntries}\n")
313  XSDebug(p"EnqNum: ${numEnq}\n")
314  XSDebug(p"DeqNum: ${numDeq}\n")
315
316  val afterInit = RegInit(false.B)
317  val headBubble = RegInit(false.B)
318  when (io.in.fire) { afterInit := true.B }
319  when (io.flush) {
320    headBubble := true.B
321  } .elsewhen(validEntries =/= 0.U) {
322    headBubble := false.B
323  }
324  val instrHungry = afterInit && (validEntries === 0.U) && !headBubble
325
326  QueuePerf(IBufSize, validEntries, !allowEnq)
327  XSPerfAccumulate("flush", io.flush)
328  XSPerfAccumulate("hungry", instrHungry)
329
330  val ibuffer_IDWidth_hvButNotFull = afterInit && (validEntries =/= 0.U) && (validEntries < DecodeWidth.U) && !headBubble
331  XSPerfAccumulate("ibuffer_IDWidth_hvButNotFull", ibuffer_IDWidth_hvButNotFull)
332  /*
333  XSPerfAccumulate("ICacheMissBubble", Mux(matchBubbleVec(TopDownCounters.ICacheMissBubble.id), deqWasteCount, 0.U))
334  XSPerfAccumulate("ITLBMissBubble", Mux(matchBubbleVec(TopDownCounters.ITLBMissBubble.id), deqWasteCount, 0.U))
335  XSPerfAccumulate("ControlRedirectBubble", Mux(matchBubbleVec(TopDownCounters.ControlRedirectBubble.id), deqWasteCount, 0.U))
336  XSPerfAccumulate("MemVioRedirectBubble", Mux(matchBubbleVec(TopDownCounters.MemVioRedirectBubble.id), deqWasteCount, 0.U))
337  XSPerfAccumulate("OtherRedirectBubble", Mux(matchBubbleVec(TopDownCounters.OtherRedirectBubble.id), deqWasteCount, 0.U))
338  XSPerfAccumulate("BTBMissBubble", Mux(matchBubbleVec(TopDownCounters.BTBMissBubble.id), deqWasteCount, 0.U))
339  XSPerfAccumulate("OverrideBubble", Mux(matchBubbleVec(TopDownCounters.OverrideBubble.id), deqWasteCount, 0.U))
340  XSPerfAccumulate("FtqUpdateBubble", Mux(matchBubbleVec(TopDownCounters.FtqUpdateBubble.id), deqWasteCount, 0.U))
341  XSPerfAccumulate("FtqFullStall", Mux(matchBubbleVec(TopDownCounters.FtqFullStall.id), deqWasteCount, 0.U))
342  XSPerfAccumulate("FetchFragmentBubble",
343  Mux(deqWasteCount === DecodeWidth.U || topdown_stage.reasons.asUInt.orR, 0.U, deqWasteCount))
344  XSPerfAccumulate("TAGEMissBubble", Mux(matchBubbleVec(TopDownCounters.TAGEMissBubble.id), deqWasteCount, 0.U))
345  XSPerfAccumulate("SCMissBubble", Mux(matchBubbleVec(TopDownCounters.SCMissBubble.id), deqWasteCount, 0.U))
346  XSPerfAccumulate("ITTAGEMissBubble", Mux(matchBubbleVec(TopDownCounters.ITTAGEMissBubble.id), deqWasteCount, 0.U))
347  XSPerfAccumulate("RASMissBubble", Mux(matchBubbleVec(TopDownCounters.RASMissBubble.id), deqWasteCount, 0.U))
348  */
349
350  val perfEvents = Seq(
351    ("IBuffer_Flushed  ", io.flush                                                                     ),
352    ("IBuffer_hungry   ", instrHungry                                                                  ),
353    ("IBuffer_1_4_valid", (validEntries >  (0*(IBufSize/4)).U) & (validEntries < (1*(IBufSize/4)).U)   ),
354    ("IBuffer_2_4_valid", (validEntries >= (1*(IBufSize/4)).U) & (validEntries < (2*(IBufSize/4)).U)   ),
355    ("IBuffer_3_4_valid", (validEntries >= (2*(IBufSize/4)).U) & (validEntries < (3*(IBufSize/4)).U)   ),
356    ("IBuffer_4_4_valid", (validEntries >= (3*(IBufSize/4)).U) & (validEntries < (4*(IBufSize/4)).U)   ),
357    ("IBuffer_full     ",  validEntries.andR                                                           ),
358    ("Front_Bubble     ", PopCount((0 until DecodeWidth).map(i => io.out(i).ready && !io.out(i).valid)))
359  )
360  generatePerfEvent()
361}
362