xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala (revision 211d620b07edb797ba35b635d24fef4e7294bae2)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16***************************************************************************************/
17package xiangshan.mem
18
19import chisel3._
20import chisel3.util._
21import org.chipsalliance.cde.config._
22import xiangshan._
23import xiangshan.backend.rob.{RobLsqIO, RobPtr}
24import xiangshan.ExceptionNO._
25import xiangshan.cache._
26import utils._
27import utility._
28import xiangshan.backend.Bundles.{DynInst, MemExuOutput}
29import xiangshan.backend.fu.FuConfig.LduCfg
30import xiangshan.backend.decode.isa.bitfield.{InstVType, XSInstBitFields}
31import xiangshan.backend.fu.FuType
32
33class VirtualLoadQueue(implicit p: Parameters) extends XSModule
34  with HasDCacheParameters
35  with HasCircularQueuePtrHelper
36  with HasLoadHelper
37  with HasPerfEvents
38  with HasVLSUParameters {
39  val io = IO(new Bundle() {
40    // control
41    val redirect    = Flipped(Valid(new Redirect))
42    val vecCommit   = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO)))
43    // from dispatch
44    val enq         = new LqEnqIO
45    // from ldu s3
46    val ldin        = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle)))
47    // to LoadQueueReplay and LoadQueueRAR
48    val ldWbPtr     = Output(new LqPtr)
49    // global
50    val lqFull      = Output(Bool())
51    val lqEmpty     = Output(Bool())
52    // to dispatch
53    val lqDeq       = Output(UInt(log2Up(CommitWidth + 1).W))
54    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
55  })
56
57  println("VirtualLoadQueue: size: " + VirtualLoadQueueSize)
58  //  VirtualLoadQueue field
59  //  +-----------+---------+-------+
60  //  | Allocated | MicroOp | Flags |
61  //  +-----------+---------+-------+
62  //  Allocated   : entry has been allocated already
63  //  MicroOp     : inst's microOp
64  //  Flags       : load flags
65  val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value
66  val uop = Reg(Vec(VirtualLoadQueueSize, new DynInst))
67  val addrvalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio addr is valid
68  val datavalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio data is valid
69  // vector load: inst -> uop (pdest registor) -> flow (once load operation in loadunit)
70  val isvec = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load flow
71  val veccommitted = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load uop has commited
72
73  /**
74   * used for debug
75   */
76  val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst
77  val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr
78
79  //  maintain pointers
80  val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr))))
81  val enqPtr = enqPtrExt(0).value
82  val deqPtr = Wire(new LqPtr)
83  val deqPtrNext = Wire(new LqPtr)
84
85  /**
86   * update pointer
87   */
88  val lastCycleRedirect = RegNext(io.redirect)
89  val lastLastCycleRedirect = RegNext(lastCycleRedirect)
90
91  val validCount = distanceBetween(enqPtrExt(0), deqPtr)
92  val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U
93  val canEnqueue = io.enq.req.map(_.valid)
94  val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => {
95    uop(i).robIdx.needFlush(io.redirect) && allocated(i)
96  })))
97  val lastNeedCancel = GatedValidRegNext(needCancel)
98  val enqCancel = canEnqueue.zip(io.enq.req).map{case (v , x) =>
99    v && x.bits.robIdx.needFlush(io.redirect)
100  }
101  val enqCancelNum = enqCancel.zip(io.enq.req).map{case (v, req) =>
102    Mux(v, req.bits.numLsElem, 0.U)
103  }
104  val lastEnqCancel = GatedRegNext(enqCancelNum.reduce(_ + _))
105  val lastCycleCancelCount = PopCount(lastNeedCancel)
106  val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid)
107
108  // update enqueue pointer
109  val vLoadFlow = io.enq.req.map(_.bits.numLsElem)
110  val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNumItem, index) => Mux(canEnqueue(index), vLoadFlowNumItem, 0.U)}
111  val validVLoadOffset = vLoadFlow.zip(io.enq.needAlloc).map{case (flow, needAllocItem) => Mux(needAllocItem, flow, 0.U)}
112  val validVLoadOffsetRShift = 0.U +: validVLoadOffset.take(validVLoadFlow.length - 1)
113
114  val enqNumber = validVLoadFlow.reduce(_ + _)
115  val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr))
116  val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr))
117  when (lastLastCycleRedirect.valid) {
118    // we recover the pointers in the next cycle after redirect
119    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount))
120  } .otherwise {
121    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber))
122  }
123  assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U))
124
125  when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) {
126    enqPtrExtNext := enqPtrExtNextVec
127  } .otherwise {
128    enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U))
129  }
130  enqPtrExt := enqPtrExtNext
131
132  // update dequeue pointer
133  val DeqPtrMoveStride = CommitWidth
134  require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!")
135  val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U))
136  val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value)
137    && ((datavalid(ptr.value) && addrvalid(ptr.value) && !isvec(ptr.value)) || (isvec(ptr.value) && veccommitted(ptr.value)))
138    && ptr =/= enqPtrExt(0)))
139  val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value)))
140  // make chisel happy
141  val deqCountMask = Wire(UInt(DeqPtrMoveStride.W))
142  deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt
143  val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U)
144  val lastCommitCount = GatedRegNext(commitCount)
145
146  // update deqPtr
147  // cycle 1: generate deqPtrNext
148  // cycle 2: update deqPtr
149  val deqPtrUpdateEna = lastCommitCount =/= 0.U
150  deqPtrNext := deqPtr + lastCommitCount
151  deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna)
152
153  io.lqDeq := GatedRegNext(lastCommitCount)
154  io.lqCancelCnt := redirectCancelCount
155  io.ldWbPtr := deqPtr
156  io.lqEmpty := RegNext(validCount === 0.U)
157
158  /**
159   * Enqueue at dispatch
160   *
161   * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth
162   */
163  io.enq.canAccept := allowEnqueue
164  for (i <- 0 until io.enq.req.length) {
165    val lqIdx = enqPtrExt(0) + validVLoadOffsetRShift.take(i + 1).reduce(_ + _)
166    val index = io.enq.req(i).bits.lqIdx
167    val enqInstr = io.enq.req(i).bits.instr.asTypeOf(new XSInstBitFields)
168    when (canEnqueue(i) && !enqCancel(i)) {
169      // The maximum 'numLsElem' number that can be emitted per dispatch port is:
170      //    16 2 2 2 2 2.
171      // Therefore, VecMemLSQEnqIteratorNumberSeq = Seq(16, 2, 2, 2, 2, 2)
172      for (j <- 0 until VecMemLSQEnqIteratorNumberSeq(i)) {
173        when (j.U < validVLoadOffset(i)) {
174          allocated((index + j.U).value) := true.B
175          uop((index + j.U).value) := io.enq.req(i).bits
176          uop((index + j.U).value).lqIdx := lqIdx + j.U
177
178          // init
179          addrvalid((index + j.U).value) := false.B
180          datavalid((index + j.U).value) := false.B
181          isvec((index + j.U).value) := FuType.isVLoad(io.enq.req(i).bits.fuType)
182          veccommitted((index + j.U).value) := false.B
183
184          debug_mmio((index + j.U).value) := false.B
185          debug_paddr((index + j.U).value) := 0.U
186
187          XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n")
188          XSError(index.value =/= lqIdx.value, s"must be the same entry $i\n")
189        }
190      }
191    }
192    io.enq.resp(i) := lqIdx
193  }
194
195  /**
196    * Load commits
197    *
198    * When load commited, mark it as !allocated and move deqPtr forward.
199    */
200  (0 until DeqPtrMoveStride).map(i => {
201    when (commitCount > i.U) {
202      allocated((deqPtr+i.U).value) := false.B
203      XSError(!allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n")
204    }
205  })
206
207  // vector commit or replay
208  val vecLdCommittmp = Wire(Vec(VirtualLoadQueueSize, Vec(VecLoadPipelineWidth, Bool())))
209  val vecLdCommit = Wire(Vec(VirtualLoadQueueSize, Bool()))
210  for (i <- 0 until VirtualLoadQueueSize) {
211    val cmt = io.vecCommit
212    for (j <- 0 until VecLoadPipelineWidth) {
213      vecLdCommittmp(i)(j) := allocated(i) && cmt(j).valid && uop(i).robIdx === cmt(j).bits.robidx && uop(i).uopIdx === cmt(j).bits.uopidx
214    }
215    vecLdCommit(i) := vecLdCommittmp(i).reduce(_ || _)
216
217    when (vecLdCommit(i)) {
218      veccommitted(i) := true.B
219    }
220  }
221
222  // misprediction recovery / exception redirect
223  // invalidate lq term using robIdx
224  for (i <- 0 until VirtualLoadQueueSize) {
225    when (needCancel(i)) {
226      allocated(i) := false.B
227    }
228  }
229
230  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
231
232  /**
233    * Writeback load from load units
234    *
235    * Most load instructions writeback to regfile at the same time.
236    * However,
237    *   (1) For ready load instruction (no need replay), it writes back to ROB immediately.
238    */
239  for(i <- 0 until LoadPipelineWidth) {
240    //   most lq status need to be updated immediately after load writeback to lq
241    //   flag bits in lq needs to be updated accurately
242    io.ldin(i).ready := true.B
243    val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value
244
245    when (io.ldin(i).valid) {
246      val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR
247      val need_rep = io.ldin(i).bits.rep_info.need_rep
248
249      when (!need_rep) {
250      // update control flag
251        addrvalid(loadWbIndex) := hasExceptions || !io.ldin(i).bits.tlbMiss || io.ldin(i).bits.isSWPrefetch
252        datavalid(loadWbIndex) :=
253          (if (EnableFastForward) {
254              hasExceptions ||
255              io.ldin(i).bits.mmio ||
256             !io.ldin(i).bits.miss && // dcache miss
257             !io.ldin(i).bits.dcacheRequireReplay || // do not writeback if that inst will be resend from rs
258              io.ldin(i).bits.isSWPrefetch
259           } else {
260              hasExceptions ||
261              io.ldin(i).bits.mmio ||
262             !io.ldin(i).bits.miss ||
263              io.ldin(i).bits.isSWPrefetch
264           })
265
266        //
267        when (io.ldin(i).bits.data_wen_dup(1)) {
268          uop(loadWbIndex) := io.ldin(i).bits.uop
269        }
270        when (io.ldin(i).bits.data_wen_dup(4)) {
271          uop(loadWbIndex).debugInfo := io.ldin(i).bits.uop.debugInfo
272        }
273        uop(loadWbIndex).debugInfo := io.ldin(i).bits.rep_info.debug
274
275        //  Debug info
276        debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio
277        debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr
278
279        XSInfo(io.ldin(i).valid,
280          "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x isvec %x\n",
281          io.ldin(i).bits.uop.lqIdx.asUInt,
282          io.ldin(i).bits.uop.pc,
283          io.ldin(i).bits.vaddr,
284          io.ldin(i).bits.paddr,
285          io.ldin(i).bits.mask,
286          io.ldin(i).bits.forwardData.asUInt,
287          io.ldin(i).bits.forwardMask.asUInt,
288          io.ldin(i).bits.mmio,
289          io.ldin(i).bits.isvec
290        )
291      }
292    }
293  }
294
295  //  perf counter
296  QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue)
297  val vecValidVec = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => allocated(i) && isvec(i))))
298  QueuePerf(VirtualLoadQueueSize, PopCount(vecValidVec), !allowEnqueue)
299  io.lqFull := !allowEnqueue
300  val perfEvents: Seq[(String, UInt)] = Seq()
301  generatePerfEvent()
302
303  // debug info
304  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value)
305
306  def PrintFlag(flag: Bool, name: String): Unit = {
307    when(flag) {
308      XSDebug(false, true.B, name)
309    }.otherwise {
310      XSDebug(false, true.B, " ")
311    }
312  }
313
314  for (i <- 0 until VirtualLoadQueueSize) {
315    XSDebug(s"$i pc %x pa %x ", uop(i).pc, debug_paddr(i))
316    PrintFlag(allocated(i), "v")
317    PrintFlag(allocated(i) && datavalid(i), "d")
318    PrintFlag(allocated(i) && addrvalid(i), "a")
319    PrintFlag(allocated(i) && addrvalid(i) && datavalid(i), "w")
320    PrintFlag(allocated(i) && isvec(i), "c")
321    XSDebug(false, true.B, "\n")
322  }
323  // end
324}
325