xref: /XiangShan/src/main/scala/xiangshan/frontend/IBuffer.scala (revision d0de7e4a4bcd4633260dda99dfedc2a5e543b8b4)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import xiangshan._
23import utils._
24import utility._
25import xiangshan.ExceptionNO._
26
27class IBufPtr(implicit p: Parameters) extends CircularQueuePtr[IBufPtr](
28  p => p(XSCoreParamsKey).IBufSize
29) {
30}
31
32class IBufInBankPtr(implicit p: Parameters) extends CircularQueuePtr[IBufInBankPtr](
33  p => p(XSCoreParamsKey).IBufSize / p(XSCoreParamsKey).IBufNBank
34) {
35}
36
37class IBufBankPtr(implicit p: Parameters) extends CircularQueuePtr[IBufBankPtr](
38  p => p(XSCoreParamsKey).IBufNBank
39) {
40}
41
42class IBufferIO(implicit p: Parameters) extends XSBundle {
43  val flush = Input(Bool())
44  val ControlRedirect = Input(Bool())
45  val ControlBTBMissBubble = Input(Bool())
46  val TAGEMissBubble = Input(Bool())
47  val SCMissBubble = Input(Bool())
48  val ITTAGEMissBubble = Input(Bool())
49  val RASMissBubble = Input(Bool())
50  val MemVioRedirect = Input(Bool())
51  val in = Flipped(DecoupledIO(new FetchToIBuffer))
52  val out = Vec(DecodeWidth, DecoupledIO(new CtrlFlow))
53  val full = Output(Bool())
54  val stallReason = new StallReasonIO(DecodeWidth)
55}
56
57class IBufEntry(implicit p: Parameters) extends XSBundle {
58  val inst = UInt(32.W)
59  val pc = UInt(VAddrBits.W)
60  val foldpc = UInt(MemPredPCWidth.W)
61  val pd = new PreDecodeInfo
62  val pred_taken = Bool()
63  val ftqPtr = new FtqPtr
64  val ftqOffset = UInt(log2Ceil(PredictWidth).W)
65  val ipf = Bool()
66  val igpf = Bool()
67  val acf = Bool()
68  val crossPageIPFFix = Bool()
69  val triggered = new TriggerCf
70  val gpaddr = UInt(GPAddrBits.W)
71
72  def fromFetch(fetch: FetchToIBuffer, i: Int): IBufEntry = {
73    inst   := fetch.instrs(i)
74    pc     := fetch.pc(i)
75    foldpc := fetch.foldpc(i)
76    gpaddr := fetch.gpaddr(i)
77    pd     := fetch.pd(i)
78    pred_taken := fetch.ftqOffset(i).valid
79    ftqPtr := fetch.ftqPtr
80    ftqOffset := fetch.ftqOffset(i).bits
81    ipf := fetch.ipf(i)
82    igpf:= fetch.igpf(i)
83    acf := fetch.acf(i)
84    crossPageIPFFix := fetch.crossPageIPFFix(i)
85    triggered := fetch.triggered(i)
86    this
87  }
88
89  def toCtrlFlow: CtrlFlow = {
90    val cf = Wire(new CtrlFlow)
91    cf.instr := inst
92    cf.pc := pc
93    cf.foldpc := foldpc
94    cf.exceptionVec := 0.U.asTypeOf(ExceptionVec())
95    cf.exceptionVec(instrPageFault) := ipf
96    cf.exceptionVec(instrGuestPageFault) := igpf
97    cf.exceptionVec(instrAccessFault) := acf
98    cf.trigger := triggered
99    cf.pd := pd
100    cf.pred_taken := pred_taken
101    cf.crossPageIPFFix := crossPageIPFFix
102    cf.storeSetHit := DontCare
103    cf.waitForRobIdx := DontCare
104    cf.loadWaitBit := DontCare
105    cf.loadWaitStrict := DontCare
106    cf.ssid := DontCare
107    cf.ftqPtr := ftqPtr
108    cf.ftqOffset := ftqOffset
109    cf.gpaddr := gpaddr
110    cf
111  }
112}
113
114class IBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper with HasPerfEvents {
115  val io = IO(new IBufferIO)
116
117  // Parameter Check
118  private val bankSize = IBufSize / IBufNBank
119  require(IBufSize % IBufNBank == 0, s"IBufNBank should divide IBufSize, IBufNBank: $IBufNBank, IBufSize: $IBufSize")
120  require(IBufNBank >= DecodeWidth,
121    s"IBufNBank should be equal or larger than DecodeWidth, IBufNBank: $IBufNBank, DecodeWidth: $DecodeWidth")
122
123  // IBuffer is organized as raw registers
124  // This is due to IBuffer is a huge queue, read & write port logic should be precisely controlled
125  //                             . + + E E E - .
126  //                             . + + E E E - .
127  //                             . . + E E E - .
128  //                             . . + E E E E -
129  // As shown above, + means enqueue, - means dequeue, E is current content
130  // When dequeue, read port is organized like a banked FIFO
131  // Dequeue reads no more than 1 entry from each bank sequentially, this can be exploit to reduce area
132  // Enqueue writes cannot benefit from this characteristic unless use a SRAM
133  // For detail see Enqueue and Dequeue below
134  private val ibuf: Vec[IBufEntry] = RegInit(VecInit.fill(IBufSize)(0.U.asTypeOf(new IBufEntry)))
135  private val bankedIBufView: Vec[Vec[IBufEntry]] = VecInit.tabulate(IBufNBank)(
136    bankID => VecInit.tabulate(bankSize)(
137      inBankOffset => ibuf(bankID + inBankOffset * IBufNBank)
138    )
139  )
140
141
142  // Bypass wire
143  private val bypassEntries = WireDefault(VecInit.fill(DecodeWidth)(0.U.asTypeOf(Valid(new IBufEntry))))
144  // Normal read wire
145  private val deqEntries = WireDefault(VecInit.fill(DecodeWidth)(0.U.asTypeOf(Valid(new IBufEntry))))
146  // Output register
147  private val outputEntries = RegInit(VecInit.fill(DecodeWidth)(0.U.asTypeOf(Valid(new IBufEntry))))
148
149  // Between Bank
150  private val deqBankPtrVec: Vec[IBufBankPtr] = RegInit(VecInit.tabulate(DecodeWidth)(_.U.asTypeOf(new IBufBankPtr)))
151  private val deqBankPtr: IBufBankPtr = deqBankPtrVec(0)
152  // Inside Bank
153  private val deqInBankPtr: Vec[IBufInBankPtr] = RegInit(VecInit.fill(IBufNBank)(0.U.asTypeOf(new IBufInBankPtr)))
154
155  val deqPtr = RegInit(0.U.asTypeOf(new IBufPtr))
156
157  val enqPtrVec = RegInit(VecInit.tabulate(PredictWidth)(_.U.asTypeOf(new IBufPtr)))
158  val enqPtr = enqPtrVec(0)
159
160  val validEntries = distanceBetween(enqPtr, deqPtr)
161  val allowEnq = RegInit(true.B)
162  val useBypass = enqPtr === deqPtr && io.out.head.ready // empty and last cycle fire
163
164  val numFromFetch = PopCount(io.in.bits.enqEnable)
165  val numTryEnq = WireDefault(0.U)
166  val numEnq = Mux(io.in.fire, numTryEnq, 0.U)
167  val numBypass = PopCount(bypassEntries.map(_.valid))
168  val numTryDeq = Mux(validEntries >= DecodeWidth.U, DecodeWidth.U, validEntries)
169  val numDeq = Mux(io.out.head.ready, numTryDeq, 0.U)
170  val numAfterEnq = validEntries +& numEnq
171
172  val nextValidEntries = Mux(io.out(0).ready, numAfterEnq - numTryDeq, numAfterEnq)
173  allowEnq := (IBufSize - PredictWidth).U >= nextValidEntries // Disable when almost full
174
175  val enqOffset = VecInit.tabulate(PredictWidth)(i => PopCount(io.in.bits.valid.asBools.take(i)))
176  val enqData = VecInit.tabulate(PredictWidth)(i => Wire(new IBufEntry).fromFetch(io.in.bits, i))
177
178  // when using bypass, bypassed entries do not enqueue
179  when(useBypass) {
180    when(numFromFetch >= DecodeWidth.U) {
181      numTryEnq := numFromFetch - DecodeWidth.U
182    } .otherwise {
183      numTryEnq := 0.U
184    }
185  } .otherwise {
186    numTryEnq := numFromFetch
187  }
188
189  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
190  // Bypass
191  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
192  bypassEntries.zipWithIndex.foreach {
193    case (entry, idx) =>
194      // Select
195      val validOH = Range(0, PredictWidth).map {
196        i =>
197          io.in.bits.valid(i) &&
198            io.in.bits.enqEnable(i) &&
199            enqOffset(i) === idx.asUInt
200      } // Should be OneHot
201      entry.valid := validOH.reduce(_ || _) && io.in.fire && !io.flush
202      entry.bits := Mux1H(validOH, enqData)
203
204      // Debug Assertion
205      XSError(PopCount(validOH) > 1.asUInt, "validOH is not OneHot")
206  }
207
208  // => Decode Output
209  // clean register output
210  io.out zip outputEntries foreach {
211    case (io, reg) =>
212      io.valid := reg.valid
213      io.bits := reg.bits.toCtrlFlow
214  }
215  outputEntries zip bypassEntries zip deqEntries foreach {
216    case ((out, bypass), deq) =>
217      when(io.out.head.ready) {
218        out := deq
219        when(useBypass) {
220          out := bypass
221        }
222      }
223  }
224
225  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
226  // Enqueue
227  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
228  io.in.ready := allowEnq
229  // Data
230  ibuf.zipWithIndex.foreach {
231    case (entry, idx) => {
232      // Select
233      val validOH = Range(0, PredictWidth).map {
234        i =>
235          val useBypassMatch = enqOffset(i) >= DecodeWidth.U &&
236            enqPtrVec(enqOffset(i) - DecodeWidth.U).value === idx.asUInt
237          val normalMatch = enqPtrVec(enqOffset(i)).value === idx.asUInt
238          val m = Mux(useBypass, useBypassMatch, normalMatch) // when using bypass, bypassed entries do not enqueue
239
240          io.in.bits.valid(i) && io.in.bits.enqEnable(i) && m
241      } // Should be OneHot
242      val wen = validOH.reduce(_ || _) && io.in.fire && !io.flush
243
244      // Write port
245      // Each IBuffer entry has a PredictWidth -> 1 Mux
246      val writeEntry = Mux1H(validOH, enqData)
247      entry := Mux(wen, writeEntry, entry)
248
249      // Debug Assertion
250      XSError(PopCount(validOH) > 1.asUInt, "validOH is not OneHot")
251    }
252  }
253  // Pointer maintenance
254  when (io.in.fire && !io.flush) {
255    enqPtrVec := VecInit(enqPtrVec.map(_ + numTryEnq))
256  }
257
258  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
259  // Dequeue
260  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
261  val validVec = Mux(validEntries >= DecodeWidth.U,
262    ((1 << DecodeWidth) - 1).U,
263    UIntToMask(validEntries(log2Ceil(DecodeWidth) - 1, 0), DecodeWidth)
264  )
265  // Data
266  // Read port
267  // 2-stage, IBufNBank * (bankSize -> 1) + IBufNBank -> 1
268  // Should be better than IBufSize -> 1 in area, with no significant latency increase
269  private val readStage1: Vec[IBufEntry] = VecInit.tabulate(IBufNBank)(
270    bankID => Mux1H(UIntToOH(deqInBankPtr(bankID).value), bankedIBufView(bankID))
271  )
272  for (i <- 0 until DecodeWidth) {
273    deqEntries(i).valid := validVec(i)
274    deqEntries(i).bits := Mux1H(UIntToOH(deqBankPtrVec(i).value), readStage1)
275  }
276  // Pointer maintenance
277  deqBankPtrVec := Mux(io.out.head.ready, VecInit(deqBankPtrVec.map(_ + numTryDeq)), deqBankPtrVec)
278  deqPtr := Mux(io.out.head.ready, deqPtr + numTryDeq, deqPtr)
279  deqInBankPtr.zipWithIndex.foreach {
280    case (ptr, idx) => {
281      // validVec[k] == bankValid[deqBankPtr + k]
282      // So bankValid[n] == validVec[n - deqBankPtr]
283      val validIdx = Mux(idx.asUInt >= deqBankPtr.value,
284        idx.asUInt - deqBankPtr.value,
285        ((idx + IBufNBank).asUInt - deqBankPtr.value)(log2Ceil(IBufNBank) - 1, 0)
286      )
287      val bankAdvance = Mux(validIdx >= DecodeWidth.U,
288        false.B,
289        validVec(validIdx(log2Ceil(DecodeWidth) - 1, 0))
290      ) && io.out.head.ready
291      ptr := Mux(bankAdvance , ptr + 1.U, ptr)
292    }
293  }
294
295  // Flush
296  when (io.flush) {
297    allowEnq := true.B
298    enqPtrVec := enqPtrVec.indices.map(_.U.asTypeOf(new IBufPtr))
299    deqBankPtrVec := deqBankPtrVec.indices.map(_.U.asTypeOf(new IBufBankPtr))
300    deqInBankPtr := VecInit.fill(IBufNBank)(0.U.asTypeOf(new IBufInBankPtr))
301    deqPtr := 0.U.asTypeOf(new IBufPtr())
302    outputEntries.foreach(_.valid := false.B)
303  }
304  io.full := !allowEnq
305
306  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
307  // TopDown
308  /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
309  val topdown_stage = RegInit(0.U.asTypeOf(new FrontendTopDownBundle))
310  topdown_stage := io.in.bits.topdown_info
311  when(io.flush) {
312    when(io.ControlRedirect) {
313      when(io.ControlBTBMissBubble) {
314        topdown_stage.reasons(TopDownCounters.BTBMissBubble.id) := true.B
315      }.elsewhen(io.TAGEMissBubble) {
316        topdown_stage.reasons(TopDownCounters.TAGEMissBubble.id) := true.B
317      }.elsewhen(io.SCMissBubble) {
318        topdown_stage.reasons(TopDownCounters.SCMissBubble.id) := true.B
319      }.elsewhen(io.ITTAGEMissBubble) {
320        topdown_stage.reasons(TopDownCounters.ITTAGEMissBubble.id) := true.B
321      }.elsewhen(io.RASMissBubble) {
322        topdown_stage.reasons(TopDownCounters.RASMissBubble.id) := true.B
323      }
324    }.elsewhen(io.MemVioRedirect) {
325      topdown_stage.reasons(TopDownCounters.MemVioRedirectBubble.id) := true.B
326    }.otherwise {
327      topdown_stage.reasons(TopDownCounters.OtherRedirectBubble.id) := true.B
328    }
329  }
330
331
332  val dequeueInsufficient = Wire(Bool())
333  val matchBubble = Wire(UInt(log2Up(TopDownCounters.NumStallReasons.id).W))
334  val deqValidCount = PopCount(validVec.asBools)
335  val deqWasteCount = DecodeWidth.U - deqValidCount
336  dequeueInsufficient := deqValidCount < DecodeWidth.U
337  matchBubble := (TopDownCounters.NumStallReasons.id - 1).U - PriorityEncoder(topdown_stage.reasons.reverse)
338
339  io.stallReason.reason.map(_ := 0.U)
340  for (i <- 0 until DecodeWidth) {
341    when(i.U < deqWasteCount) {
342      io.stallReason.reason(DecodeWidth - i - 1) := matchBubble
343    }
344  }
345
346  when(!(deqWasteCount === DecodeWidth.U || topdown_stage.reasons.asUInt.orR)) {
347    // should set reason for FetchFragmentationStall
348    // topdown_stage.reasons(TopDownCounters.FetchFragmentationStall.id) := true.B
349    for (i <- 0 until DecodeWidth) {
350      when(i.U < deqWasteCount) {
351        io.stallReason.reason(DecodeWidth - i - 1) := TopDownCounters.FetchFragBubble.id.U
352      }
353    }
354  }
355
356  when(io.stallReason.backReason.valid) {
357    io.stallReason.reason.map(_ := io.stallReason.backReason.bits)
358  }
359
360  // Debug info
361  XSError(
362    deqPtr.value =/= deqBankPtr.value + deqInBankPtr(deqBankPtr.value).value * IBufNBank.asUInt,
363    "Dequeue PTR mismatch"
364  )
365  XSDebug(io.flush, "IBuffer Flushed\n")
366
367  when(io.in.fire) {
368    XSDebug("Enque:\n")
369    XSDebug(p"MASK=${Binary(io.in.bits.valid)}\n")
370    for(i <- 0 until PredictWidth){
371      XSDebug(p"PC=${Hexadecimal(io.in.bits.pc(i))} ${Hexadecimal(io.in.bits.instrs(i))}\n")
372    }
373  }
374
375  for (i <- 0 until DecodeWidth) {
376    XSDebug(io.out(i).fire,
377      p"deq: ${Hexadecimal(io.out(i).bits.instr)} PC=${Hexadecimal(io.out(i).bits.pc)}" +
378      p"v=${io.out(i).valid} r=${io.out(i).ready} " +
379      p"excpVec=${Binary(io.out(i).bits.exceptionVec.asUInt)} crossPageIPF=${io.out(i).bits.crossPageIPFFix}\n")
380  }
381
382  XSDebug(p"ValidEntries: ${validEntries}\n")
383  XSDebug(p"EnqNum: ${numEnq}\n")
384  XSDebug(p"DeqNum: ${numDeq}\n")
385
386  val afterInit = RegInit(false.B)
387  val headBubble = RegInit(false.B)
388  when (io.in.fire) { afterInit := true.B }
389  when (io.flush) {
390    headBubble := true.B
391  } .elsewhen(validEntries =/= 0.U) {
392    headBubble := false.B
393  }
394  val instrHungry = afterInit && (validEntries === 0.U) && !headBubble
395
396  QueuePerf(IBufSize, validEntries, !allowEnq)
397  XSPerfAccumulate("flush", io.flush)
398  XSPerfAccumulate("hungry", instrHungry)
399
400  val ibuffer_IDWidth_hvButNotFull = afterInit && (validEntries =/= 0.U) && (validEntries < DecodeWidth.U) && !headBubble
401  XSPerfAccumulate("ibuffer_IDWidth_hvButNotFull", ibuffer_IDWidth_hvButNotFull)
402  /*
403  XSPerfAccumulate("ICacheMissBubble", Mux(matchBubbleVec(TopDownCounters.ICacheMissBubble.id), deqWasteCount, 0.U))
404  XSPerfAccumulate("ITLBMissBubble", Mux(matchBubbleVec(TopDownCounters.ITLBMissBubble.id), deqWasteCount, 0.U))
405  XSPerfAccumulate("ControlRedirectBubble", Mux(matchBubbleVec(TopDownCounters.ControlRedirectBubble.id), deqWasteCount, 0.U))
406  XSPerfAccumulate("MemVioRedirectBubble", Mux(matchBubbleVec(TopDownCounters.MemVioRedirectBubble.id), deqWasteCount, 0.U))
407  XSPerfAccumulate("OtherRedirectBubble", Mux(matchBubbleVec(TopDownCounters.OtherRedirectBubble.id), deqWasteCount, 0.U))
408  XSPerfAccumulate("BTBMissBubble", Mux(matchBubbleVec(TopDownCounters.BTBMissBubble.id), deqWasteCount, 0.U))
409  XSPerfAccumulate("OverrideBubble", Mux(matchBubbleVec(TopDownCounters.OverrideBubble.id), deqWasteCount, 0.U))
410  XSPerfAccumulate("FtqUpdateBubble", Mux(matchBubbleVec(TopDownCounters.FtqUpdateBubble.id), deqWasteCount, 0.U))
411  XSPerfAccumulate("FtqFullStall", Mux(matchBubbleVec(TopDownCounters.FtqFullStall.id), deqWasteCount, 0.U))
412  XSPerfAccumulate("FetchFragmentBubble",
413  Mux(deqWasteCount === DecodeWidth.U || topdown_stage.reasons.asUInt.orR, 0.U, deqWasteCount))
414  XSPerfAccumulate("TAGEMissBubble", Mux(matchBubbleVec(TopDownCounters.TAGEMissBubble.id), deqWasteCount, 0.U))
415  XSPerfAccumulate("SCMissBubble", Mux(matchBubbleVec(TopDownCounters.SCMissBubble.id), deqWasteCount, 0.U))
416  XSPerfAccumulate("ITTAGEMissBubble", Mux(matchBubbleVec(TopDownCounters.ITTAGEMissBubble.id), deqWasteCount, 0.U))
417  XSPerfAccumulate("RASMissBubble", Mux(matchBubbleVec(TopDownCounters.RASMissBubble.id), deqWasteCount, 0.U))
418  */
419
420  val perfEvents = Seq(
421    ("IBuffer_Flushed  ", io.flush                                                                     ),
422    ("IBuffer_hungry   ", instrHungry                                                                  ),
423    ("IBuffer_1_4_valid", (validEntries >  (0*(IBufSize/4)).U) & (validEntries < (1*(IBufSize/4)).U)   ),
424    ("IBuffer_2_4_valid", (validEntries >= (1*(IBufSize/4)).U) & (validEntries < (2*(IBufSize/4)).U)   ),
425    ("IBuffer_3_4_valid", (validEntries >= (2*(IBufSize/4)).U) & (validEntries < (3*(IBufSize/4)).U)   ),
426    ("IBuffer_4_4_valid", (validEntries >= (3*(IBufSize/4)).U) & (validEntries < (4*(IBufSize/4)).U)   ),
427    ("IBuffer_full     ",  validEntries.andR                                                           ),
428    ("Front_Bubble     ", PopCount((0 until DecodeWidth).map(i => io.out(i).ready && !io.out(i).valid)))
429  )
430  generatePerfEvent()
431}
432