xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala (revision 0a84afd5dd089307b667fd1d24e3b3b50a5ad80b)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16***************************************************************************************/
17package xiangshan.mem
18
19import chisel3._
20import chisel3.util._
21import org.chipsalliance.cde.config._
22import xiangshan._
23import xiangshan.backend.rob.{RobLsqIO, RobPtr}
24import xiangshan.ExceptionNO._
25import xiangshan.cache._
26import utils._
27import utility._
28import xiangshan.backend.Bundles.{DynInst, MemExuOutput, UopIdx}
29import xiangshan.backend.fu.FuConfig.LduCfg
30import xiangshan.backend.decode.isa.bitfield.{InstVType, XSInstBitFields}
31import xiangshan.backend.fu.FuType
32
33class VirtualLoadQueue(implicit p: Parameters) extends XSModule
34  with HasDCacheParameters
35  with HasCircularQueuePtrHelper
36  with HasLoadHelper
37  with HasPerfEvents
38  with HasVLSUParameters {
39  val io = IO(new Bundle() {
40    // control
41    val redirect    = Flipped(Valid(new Redirect))
42    val vecCommit   = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO)))
43    // from dispatch
44    val enq         = new LqEnqIO
45    // from ldu s3
46    val ldin        = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle)))
47    // to LoadQueueReplay and LoadQueueRAR
48    val ldWbPtr     = Output(new LqPtr)
49    // global
50    val lqFull      = Output(Bool())
51    val lqEmpty     = Output(Bool())
52    // to dispatch
53    val lqDeq       = Output(UInt(log2Up(CommitWidth + 1).W))
54    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
55  })
56
57  println("VirtualLoadQueue: size: " + VirtualLoadQueueSize)
58  //  VirtualLoadQueue field
59  //  +-----------+---------+-------+
60  //  | Allocated | MicroOp | Flags |
61  //  +-----------+---------+-------+
62  //  Allocated   : entry has been allocated already
63  //  MicroOp     : inst's microOp
64  //  Flags       : load flags
65  val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value
66  val robIdx = Reg(Vec(VirtualLoadQueueSize, new RobPtr))
67  val uopIdx = Reg(Vec(VirtualLoadQueueSize, UopIdx()))
68  val isvec = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load flow
69  val committed = Reg(Vec(VirtualLoadQueueSize, Bool()))
70
71  /**
72   * used for debug
73   */
74  val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst
75  val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr
76
77  //  maintain pointers
78  val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr))))
79  val enqPtr = enqPtrExt(0).value
80  val deqPtr = Wire(new LqPtr)
81  val deqPtrNext = Wire(new LqPtr)
82
83  /**
84   * update pointer
85   */
86  val lastCycleRedirect = RegNext(io.redirect)
87  val lastLastCycleRedirect = RegNext(lastCycleRedirect)
88
89  val validCount = distanceBetween(enqPtrExt(0), deqPtr)
90  val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U
91  val canEnqueue = io.enq.req.map(_.valid)
92  val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => {
93    robIdx(i).needFlush(io.redirect) && allocated(i)
94  })))
95  val lastNeedCancel = GatedValidRegNext(needCancel)
96  val enqCancel = canEnqueue.zip(io.enq.req).map{case (v , x) =>
97    v && x.bits.robIdx.needFlush(io.redirect)
98  }
99  val enqCancelNum = enqCancel.zip(io.enq.req).map{case (v, req) =>
100    Mux(v, req.bits.numLsElem, 0.U)
101  }
102  val lastEnqCancel = GatedRegNext(enqCancelNum.reduce(_ + _))
103  val lastCycleCancelCount = PopCount(lastNeedCancel)
104  val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid)
105
106  // update enqueue pointer
107  val vLoadFlow = io.enq.req.map(_.bits.numLsElem.asTypeOf(UInt(elemIdxBits.W)))
108  val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNumItem, index) => Mux(canEnqueue(index), vLoadFlowNumItem, 0.U)}
109  val validVLoadOffset = vLoadFlow.zip(io.enq.needAlloc).map{case (flow, needAllocItem) => Mux(needAllocItem, flow, 0.U)}
110  val validVLoadOffsetRShift = 0.U +: validVLoadOffset.take(validVLoadFlow.length - 1)
111
112  val enqNumber = validVLoadFlow.reduce(_ + _)
113  val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr))
114  val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr))
115  when (lastLastCycleRedirect.valid) {
116    // we recover the pointers in the next cycle after redirect
117    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount))
118  } .otherwise {
119    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber))
120  }
121  assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U))
122
123  when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) {
124    enqPtrExtNext := enqPtrExtNextVec
125  } .otherwise {
126    enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U))
127  }
128  enqPtrExt := enqPtrExtNext
129
130  // update dequeue pointer
131  val DeqPtrMoveStride = CommitWidth
132  require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!")
133  val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U))
134  val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) && committed(ptr.value) && ptr =/= enqPtrExt(0)))
135  val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value)))
136  // make chisel happy
137  val deqCountMask = Wire(UInt(DeqPtrMoveStride.W))
138  deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt
139  val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U)
140  val lastCommitCount = GatedRegNext(commitCount)
141
142  // update deqPtr
143  // cycle 1: generate deqPtrNext
144  // cycle 2: update deqPtr
145  val deqPtrUpdateEna = lastCommitCount =/= 0.U
146  deqPtrNext := deqPtr + lastCommitCount
147  deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna)
148
149  io.lqDeq := GatedRegNext(lastCommitCount)
150  io.lqCancelCnt := redirectCancelCount
151  io.ldWbPtr := deqPtr
152  io.lqEmpty := RegNext(validCount === 0.U)
153
154  /**
155   * Enqueue at dispatch
156   *
157   * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth
158   * Dynamic enq based on numLsElem number
159   */
160  io.enq.canAccept := allowEnqueue
161  val enqLowBound = io.enq.req.map(_.bits.lqIdx)
162  val enqUpBound  = io.enq.req.map(x => x.bits.lqIdx + x.bits.numLsElem)
163  val enqCrossLoop = enqLowBound.zip(enqUpBound).map{case (low, up) => low.flag =/= up.flag}
164
165  for(i <- 0 until VirtualLoadQueueSize) {
166    val entryCanEnqSeq = (0 until io.enq.req.length).map { j =>
167      val entryHitBound = Mux(
168        enqCrossLoop(j),
169        enqLowBound(j).value <= i.U || i.U < enqUpBound(j).value,
170        enqLowBound(j).value <= i.U && i.U < enqUpBound(j).value
171      )
172      canEnqueue(j) && !enqCancel(j) && entryHitBound
173    }
174    val entryCanEnq = entryCanEnqSeq.reduce(_ || _)
175    val selectBits = ParallelPriorityMux(entryCanEnqSeq, io.enq.req.map(_.bits))
176    when (entryCanEnq) {
177      allocated(i) := true.B
178      robIdx(i) := selectBits.robIdx
179      uopIdx(i) := selectBits.uopIdx
180      isvec(i) :=  FuType.isVLoad(selectBits.fuType)
181      committed(i) := false.B
182
183      debug_mmio(i) := false.B
184      debug_paddr(i) := 0.U
185    }
186  }
187
188  for (i <- 0 until io.enq.req.length) {
189    val lqIdx = enqPtrExt(0) + validVLoadOffsetRShift.take(i + 1).reduce(_ + _)
190    val index = io.enq.req(i).bits.lqIdx
191    XSError(canEnqueue(i) && !enqCancel(i) && (!io.enq.canAccept || !io.enq.sqCanAccept), s"must accept $i\n")
192    XSError(canEnqueue(i) && !enqCancel(i) && index.value =/= lqIdx.value, s"must be the same entry $i\n")
193    io.enq.resp(i) := lqIdx
194  }
195
196  /**
197    * Load commits
198    *
199    * When load commited, mark it as !allocated and move deqPtr forward.
200    */
201  (0 until DeqPtrMoveStride).map(i => {
202    when (commitCount > i.U) {
203      allocated((deqPtr+i.U).value) := false.B
204    }
205    XSError(commitCount > i.U && !allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n")
206  })
207
208  // vector commit or replay
209  val vecLdCommittmp = Wire(Vec(VirtualLoadQueueSize, Vec(VecLoadPipelineWidth, Bool())))
210  val vecLdCommit = Wire(Vec(VirtualLoadQueueSize, Bool()))
211  for (i <- 0 until VirtualLoadQueueSize) {
212    val cmt = io.vecCommit
213    for (j <- 0 until VecLoadPipelineWidth) {
214      vecLdCommittmp(i)(j) := allocated(i) && cmt(j).valid && robIdx(i) === cmt(j).bits.robidx && uopIdx(i) === cmt(j).bits.uopidx
215    }
216    vecLdCommit(i) := vecLdCommittmp(i).reduce(_ || _)
217
218    when (vecLdCommit(i) && isvec(i)) {
219      committed(i) := true.B
220    }
221  }
222
223  // misprediction recovery / exception redirect
224  // invalidate lq term using robIdx
225  for (i <- 0 until VirtualLoadQueueSize) {
226    when (needCancel(i)) {
227      allocated(i) := false.B
228    }
229  }
230
231  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
232
233  /**
234    * Writeback load from load units
235    *
236    * Most load instructions writeback to regfile at the same time.
237    * However,
238    *   (1) For ready load instruction (no need replay), it writes back to ROB immediately.
239    */
240  for(i <- 0 until LoadPipelineWidth) {
241    //   most lq status need to be updated immediately after load writeback to lq
242    //   flag bits in lq needs to be updated accurately
243    io.ldin(i).ready := true.B
244    val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value
245
246    when (io.ldin(i).valid) {
247      val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR
248      val need_rep = io.ldin(i).bits.rep_info.need_rep
249      val need_valid = io.ldin(i).bits.updateAddrValid
250
251      when (!need_rep && need_valid && !io.ldin(i).bits.isvec) {
252        committed(loadWbIndex) := true.B
253
254        //  Debug info
255        debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio
256        debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr
257      }
258
259      XSInfo(!need_rep && need_valid,
260        "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x isvec %x\n",
261        io.ldin(i).bits.uop.lqIdx.asUInt,
262        io.ldin(i).bits.uop.pc,
263        io.ldin(i).bits.vaddr,
264        io.ldin(i).bits.paddr,
265        io.ldin(i).bits.mask,
266        io.ldin(i).bits.forwardData.asUInt,
267        io.ldin(i).bits.forwardMask.asUInt,
268        io.ldin(i).bits.mmio,
269        io.ldin(i).bits.isvec
270      )
271    }
272  }
273
274  //  perf counter
275  QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue)
276  val vecValidVec = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => allocated(i) && isvec(i))))
277  QueuePerf(VirtualLoadQueueSize, PopCount(vecValidVec), !allowEnqueue)
278  io.lqFull := !allowEnqueue
279  val perfEvents: Seq[(String, UInt)] = Seq()
280  generatePerfEvent()
281
282  // debug info
283  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value)
284
285  def PrintFlag(flag: Bool, name: String): Unit = {
286    XSDebug(false, flag, name) // when(flag)
287    XSDebug(false, !flag, " ") // otherwise
288  }
289
290  for (i <- 0 until VirtualLoadQueueSize) {
291    PrintFlag(allocated(i), "a")
292    PrintFlag(allocated(i) && committed(i), "c")
293    PrintFlag(allocated(i) && isvec(i), "v")
294    XSDebug(false, true.B, "\n")
295  }
296  // end
297}
298