xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala (revision c41f725a91c55e75c95c55b4bb0d2649f43e4c83)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16***************************************************************************************/
17package xiangshan.mem
18
19import org.chipsalliance.cde.config._
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.ExceptionNO._
26import xiangshan.backend.rob.{RobLsqIO, RobPtr}
27import xiangshan.backend.Bundles.{DynInst, MemExuOutput, UopIdx}
28import xiangshan.backend.fu.FuConfig.LduCfg
29import xiangshan.backend.decode.isa.bitfield.{InstVType, XSInstBitFields}
30import xiangshan.backend.fu.FuType
31import xiangshan.mem.Bundles._
32import xiangshan.cache._
33
34class VirtualLoadQueue(implicit p: Parameters) extends XSModule
35  with HasDCacheParameters
36  with HasCircularQueuePtrHelper
37  with HasLoadHelper
38  with HasPerfEvents
39  with HasVLSUParameters {
40  val io = IO(new Bundle() {
41    // control
42    val redirect    = Flipped(Valid(new Redirect))
43    val vecCommit   = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO)))
44    // from dispatch
45    val enq         = new LqEnqIO
46    // from ldu s3
47    val ldin        = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle)))
48    // to LoadQueueReplay and LoadQueueRAR
49    val ldWbPtr     = Output(new LqPtr)
50    // global
51    val lqFull      = Output(Bool())
52    val lqEmpty     = Output(Bool())
53    // to dispatch
54    val lqDeq       = Output(UInt(log2Up(CommitWidth + 1).W))
55    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
56    // for topdown
57    val noUopsIssued = Input(Bool())
58  })
59
60  println("VirtualLoadQueue: size: " + VirtualLoadQueueSize)
61  //  VirtualLoadQueue field
62  //  +-----------+---------+-------+
63  //  | Allocated | MicroOp | Flags |
64  //  +-----------+---------+-------+
65  //  Allocated   : entry has been allocated already
66  //  MicroOp     : inst's microOp
67  //  Flags       : load flags
68  val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value
69  val robIdx = Reg(Vec(VirtualLoadQueueSize, new RobPtr))
70  val uopIdx = Reg(Vec(VirtualLoadQueueSize, UopIdx()))
71  val isvec = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load flow
72  val committed = Reg(Vec(VirtualLoadQueueSize, Bool()))
73
74  /**
75   * used for debug
76   */
77  val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst
78  val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr
79
80  //  maintain pointers
81  val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr))))
82  val enqPtr = enqPtrExt(0).value
83  val deqPtr = Wire(new LqPtr)
84  val deqPtrNext = Wire(new LqPtr)
85
86  /**
87   * update pointer
88   */
89  val lastCycleRedirect = RegNext(io.redirect)
90  val lastLastCycleRedirect = RegNext(lastCycleRedirect)
91
92  val validCount = distanceBetween(enqPtrExt(0), deqPtr)
93  val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U
94  val canEnqueue = io.enq.req.map(_.valid)
95  val vLoadFlow = io.enq.req.map(_.bits.numLsElem.asTypeOf(UInt(elemIdxBits.W)))
96  val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => {
97    robIdx(i).needFlush(io.redirect) && allocated(i)
98  })))
99  val lastNeedCancel = GatedValidRegNext(needCancel)
100  val enqCancel = canEnqueue.zip(io.enq.req).map{case (v , x) =>
101    v && x.bits.robIdx.needFlush(io.redirect)
102  }
103  val enqCancelNum = enqCancel.zip(vLoadFlow).map{case (v, flow) =>
104    Mux(v, flow, 0.U)
105  }
106  val lastEnqCancel = GatedRegNext(enqCancelNum.reduce(_ + _))
107  val lastCycleCancelCount = PopCount(lastNeedCancel)
108  val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid)
109
110  // update enqueue pointer
111  val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNumItem, index) => Mux(canEnqueue(index), vLoadFlowNumItem, 0.U)}
112  val validVLoadOffset = vLoadFlow.zip(io.enq.needAlloc).map{case (flow, needAllocItem) => Mux(needAllocItem, flow, 0.U)}
113  val validVLoadOffsetRShift = 0.U +: validVLoadOffset.take(validVLoadFlow.length - 1)
114
115  val enqNumber = validVLoadFlow.reduce(_ + _)
116  val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr))
117  val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr))
118  when (lastLastCycleRedirect.valid) {
119    // we recover the pointers in the next cycle after redirect
120    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount))
121  } .otherwise {
122    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber))
123  }
124  assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U))
125
126  when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) {
127    enqPtrExtNext := enqPtrExtNextVec
128  } .otherwise {
129    enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U))
130  }
131  enqPtrExt := enqPtrExtNext
132
133  // update dequeue pointer
134  val DeqPtrMoveStride = CommitWidth
135  require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!")
136  val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U))
137  val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) && committed(ptr.value) && ptr =/= enqPtrExt(0)))
138  val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value)))
139  // make chisel happy
140  val deqCountMask = Wire(UInt(DeqPtrMoveStride.W))
141  deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt
142  val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U)
143  val lastCommitCount = GatedRegNext(commitCount)
144
145  // update deqPtr
146  // cycle 1: generate deqPtrNext
147  // cycle 2: update deqPtr
148  val deqPtrUpdateEna = lastCommitCount =/= 0.U
149  deqPtrNext := deqPtr + lastCommitCount
150  deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna)
151
152  io.lqDeq := GatedRegNext(lastCommitCount)
153  io.lqCancelCnt := redirectCancelCount
154  io.ldWbPtr := deqPtr
155  io.lqEmpty := RegNext(validCount === 0.U)
156
157  /**
158   * Enqueue at dispatch
159   *
160   * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth
161   * Dynamic enq based on numLsElem number
162   */
163  io.enq.canAccept := allowEnqueue
164  val enqLowBound = io.enq.req.map(_.bits.lqIdx)
165  val enqUpBound  = io.enq.req.map(x => x.bits.lqIdx + x.bits.numLsElem)
166  val enqCrossLoop = enqLowBound.zip(enqUpBound).map{case (low, up) => low.flag =/= up.flag}
167
168  for(i <- 0 until VirtualLoadQueueSize) {
169    val entryCanEnqSeq = (0 until io.enq.req.length).map { j =>
170      val entryHitBound = Mux(
171        enqCrossLoop(j),
172        enqLowBound(j).value <= i.U || i.U < enqUpBound(j).value,
173        enqLowBound(j).value <= i.U && i.U < enqUpBound(j).value
174      )
175      canEnqueue(j) && !enqCancel(j) && entryHitBound
176    }
177    val entryCanEnq = entryCanEnqSeq.reduce(_ || _)
178    val selectBits = ParallelPriorityMux(entryCanEnqSeq, io.enq.req.map(_.bits))
179    when (entryCanEnq) {
180      allocated(i) := true.B
181      robIdx(i) := selectBits.robIdx
182      uopIdx(i) := selectBits.uopIdx
183      isvec(i) :=  FuType.isVLoad(selectBits.fuType)
184      committed(i) := false.B
185
186      debug_mmio(i) := false.B
187      debug_paddr(i) := 0.U
188    }
189  }
190
191  for (i <- 0 until io.enq.req.length) {
192    val lqIdx = enqPtrExt(0) + validVLoadOffsetRShift.take(i + 1).reduce(_ + _)
193    val index = io.enq.req(i).bits.lqIdx
194    XSError(canEnqueue(i) && !enqCancel(i) && (!io.enq.canAccept || !io.enq.sqCanAccept), s"must accept $i\n")
195    XSError(canEnqueue(i) && !enqCancel(i) && index.value =/= lqIdx.value, s"must be the same entry $i\n")
196    io.enq.resp(i) := lqIdx
197  }
198
199  /**
200    * Load commits
201    *
202    * When load commited, mark it as !allocated and move deqPtr forward.
203    */
204  (0 until DeqPtrMoveStride).map(i => {
205    when (commitCount > i.U) {
206      allocated((deqPtr+i.U).value) := false.B
207    }
208    XSError(commitCount > i.U && !allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n")
209  })
210
211  // vector commit or replay
212  val vecLdCommittmp = Wire(Vec(VirtualLoadQueueSize, Vec(VecLoadPipelineWidth, Bool())))
213  val vecLdCommit = Wire(Vec(VirtualLoadQueueSize, Bool()))
214  for (i <- 0 until VirtualLoadQueueSize) {
215    val cmt = io.vecCommit
216    for (j <- 0 until VecLoadPipelineWidth) {
217      vecLdCommittmp(i)(j) := allocated(i) && cmt(j).valid && robIdx(i) === cmt(j).bits.robidx && uopIdx(i) === cmt(j).bits.uopidx
218    }
219    vecLdCommit(i) := vecLdCommittmp(i).reduce(_ || _)
220
221    when (vecLdCommit(i) && isvec(i)) {
222      committed(i) := true.B
223    }
224  }
225
226  // misprediction recovery / exception redirect
227  // invalidate lq term using robIdx
228  for (i <- 0 until VirtualLoadQueueSize) {
229    when (needCancel(i)) {
230      allocated(i) := false.B
231    }
232  }
233
234  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
235
236  /**
237    * Writeback load from load units
238    *
239    * Most load instructions writeback to regfile at the same time.
240    * However,
241    *   (1) For ready load instruction (no need replay), it writes back to ROB immediately.
242    */
243  for(i <- 0 until LoadPipelineWidth) {
244    //   most lq status need to be updated immediately after load writeback to lq
245    //   flag bits in lq needs to be updated accurately
246    io.ldin(i).ready := true.B
247    val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value
248
249    when (io.ldin(i).valid) {
250      val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR
251      val need_rep = io.ldin(i).bits.rep_info.need_rep
252      val need_valid = io.ldin(i).bits.updateAddrValid
253
254      when (!need_rep && need_valid && !io.ldin(i).bits.isvec) {
255        committed(loadWbIndex) := true.B
256
257        //  Debug info
258        debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio
259        debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr
260      }
261
262      XSInfo(!need_rep && need_valid,
263        "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x isvec %x\n",
264        io.ldin(i).bits.uop.lqIdx.asUInt,
265        io.ldin(i).bits.uop.pc,
266        io.ldin(i).bits.vaddr,
267        io.ldin(i).bits.paddr,
268        io.ldin(i).bits.mask,
269        io.ldin(i).bits.forwardData.asUInt,
270        io.ldin(i).bits.forwardMask.asUInt,
271        io.ldin(i).bits.mmio,
272        io.ldin(i).bits.isvec
273      )
274    }
275  }
276
277  //  perf counter
278  QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue)
279  val vecValidVec = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => allocated(i) && isvec(i))))
280  QueuePerf(VirtualLoadQueueSize, PopCount(vecValidVec), !allowEnqueue)
281  io.lqFull := !allowEnqueue
282
283  def NLoadNotCompleted = 1
284  val validCountReg = RegNext(validCount)
285  val noUopsIssued = io.noUopsIssued
286  val stallLoad = io.noUopsIssued && (validCountReg >= NLoadNotCompleted.U)
287  val memStallAnyLoad = RegNext(stallLoad)
288
289  XSPerfAccumulate("mem_stall_anyload", memStallAnyLoad)
290
291  val perfEvents: Seq[(String, UInt)] = Seq(
292    ("MEMSTALL_ANY_LOAD", memStallAnyLoad),
293  )
294  generatePerfEvent()
295
296  // debug info
297  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value)
298
299  def PrintFlag(flag: Bool, name: String): Unit = {
300    XSDebug(false, flag, name) // when(flag)
301    XSDebug(false, !flag, " ") // otherwise
302  }
303
304  for (i <- 0 until VirtualLoadQueueSize) {
305    PrintFlag(allocated(i), "a")
306    PrintFlag(allocated(i) && committed(i), "c")
307    PrintFlag(allocated(i) && isvec(i), "v")
308    XSDebug(false, true.B, "\n")
309  }
310  // end
311}
312