xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala (revision 92b88f30156d46e844042eea94f7121557fd09a1)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16package xiangshan.mem
17
18import chisel3._
19import chisel3.util._
20import chipsalliance.rocketchip.config._
21import xiangshan._
22import xiangshan.backend.rob.{RobLsqIO, RobPtr}
23import xiangshan.ExceptionNO._
24import xiangshan.cache._
25import utils._
26import utility._
27import xiangshan.backend.Bundles.DynInst
28import xiangshan.backend.fu.FuConfig.LduCfg
29
30class VirtualLoadQueue(implicit p: Parameters) extends XSModule
31  with HasDCacheParameters
32  with HasCircularQueuePtrHelper
33  with HasLoadHelper
34  with HasPerfEvents
35{
36  val io = IO(new Bundle() {
37    val redirect = Flipped(Valid(new Redirect))
38    val enq = new LqEnqIO
39    val loadIn = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle)))
40    val ldWbPtr = Output(new LqPtr)
41    val lqFull = Output(Bool())
42    val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
43    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
44  })
45
46  println("VirtualLoadQueue: size: " + VirtualLoadQueueSize)
47  //  VirtualLoadQueue field
48  //  +-----------+---------+-------+
49  //  | Allocated | MicroOp | Flags |
50  //  +-----------+---------+-------+
51  //  Allocated   : entry has been allocated already
52  //  MicroOp     : inst's microOp
53  //  Flags       : load flags
54  val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value
55  val uop = Reg(Vec(VirtualLoadQueueSize, new DynInst))
56  val addrvalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio addr is valid
57  val datavalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio data is valid
58
59  /**
60   * used for debug
61   */
62  val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst
63  val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr
64
65  //  maintain pointers
66  val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr))))
67  val enqPtr = enqPtrExt(0).value
68  val deqPtr = Wire(new LqPtr)
69  val deqPtrNext = Wire(new LqPtr)
70
71  /**
72   * update pointer
73   */
74  val lastCycleRedirect = RegNext(io.redirect)
75  val lastLastCycleRedirect = RegNext(lastCycleRedirect)
76
77  val validCount = distanceBetween(enqPtrExt(0), deqPtr)
78  val allowEnqueue = validCount <= (VirtualLoadQueueSize - LoadPipelineWidth).U
79  val canEnqueue = io.enq.req.map(_.valid)
80  val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => {
81    uop(i).robIdx.needFlush(io.redirect) && allocated(i)
82  })))
83  val lastNeedCancel = RegNext(needCancel)
84  val enqCancel = io.enq.req.map(_.bits.robIdx.needFlush(io.redirect))
85  val lastEnqCancel = PopCount(RegNext(VecInit(canEnqueue.zip(enqCancel).map(x => x._1 && x._2))))
86  val lastCycleCancelCount = PopCount(lastNeedCancel)
87
88  // update enqueue pointer
89  val enqCount = Mux(io.enq.canAccept && io.enq.sqCanAccept, PopCount(io.enq.req.map(_.valid)), 0.U)
90  val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr))
91  val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr))
92  when (lastCycleRedirect.valid) {
93    // we recover the pointers in the next cycle after redirect
94    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - (lastCycleCancelCount + lastEnqCancel)))
95  }.otherwise {
96    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqCount))
97  }
98
99  when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) {
100    enqPtrExtNext := enqPtrExtNextVec
101  } .otherwise {
102    enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U))
103  }
104  enqPtrExt := enqPtrExtNext
105
106  // update dequeue pointer
107  val DeqPtrMoveStride = CommitWidth
108  require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!")
109  val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U))
110  val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) && datavalid(ptr.value) && addrvalid(ptr.value) && ptr =/= enqPtrExt(0)))
111  val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value)))
112  // make chisel happy
113  val deqCountMask = Wire(UInt(DeqPtrMoveStride.W))
114  deqCountMask := deqLookup.asUInt & ~deqInSameRedirectCycle.asUInt
115  val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U)
116  val lastCommitCount = RegNext(commitCount)
117
118  // update deqPtr
119  // cycle 1: generate deqPtrNext
120  // cycle 2: update deqPtr
121  val deqPtrUpdateEna = lastCommitCount =/= 0.U
122  deqPtrNext := deqPtr + lastCommitCount
123  deqPtr := RegEnable(next = deqPtrNext, init = 0.U.asTypeOf(new LqPtr), enable = deqPtrUpdateEna)
124
125  io.lqDeq := RegNext(lastCommitCount)
126  io.lqCancelCnt := RegNext(lastCycleCancelCount + lastEnqCancel)
127  io.ldWbPtr := deqPtr
128
129  /**
130   * Enqueue at dispatch
131   *
132   * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth
133   */
134  io.enq.canAccept := allowEnqueue
135  for (i <- 0 until io.enq.req.length) {
136    val offset = PopCount(io.enq.needAlloc.take(i))
137    val lqIdx = enqPtrExt(offset)
138    val index = io.enq.req(i).bits.lqIdx.value
139    when (canEnqueue(i) && !enqCancel(i)) {
140      allocated(index) := true.B
141      uop(index) := io.enq.req(i).bits
142      uop(index).lqIdx := lqIdx
143
144      // init
145      addrvalid(index) := false.B
146      datavalid(index) := false.B
147
148      debug_mmio(index) := false.B
149      debug_paddr(index) := 0.U
150
151      XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n")
152      XSError(index =/= lqIdx.value, s"must be the same entry $i\n")
153    }
154    io.enq.resp(i) := lqIdx
155  }
156
157  /**
158    * Load commits
159    *
160    * When load commited, mark it as !allocated and move deqPtr forward.
161    */
162  (0 until DeqPtrMoveStride).map(i => {
163    when (commitCount > i.U) {
164      allocated((deqPtr+i.U).value) := false.B
165      XSError(!allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n")
166    }
167  })
168
169  // misprediction recovery / exception redirect
170  // invalidate lq term using robIdx
171  for (i <- 0 until VirtualLoadQueueSize) {
172    when (needCancel(i)) {
173      allocated(i) := false.B
174    }
175  }
176
177  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
178
179  /**
180    * Writeback load from load units
181    *
182    * Most load instructions writeback to regfile at the same time.
183    * However,
184    *   (1) For ready load instruction (no need replay), it writes back to ROB immediately.
185    */
186  for(i <- 0 until LoadPipelineWidth) {
187    //   most lq status need to be updated immediately after load writeback to lq
188    //   flag bits in lq needs to be updated accurately
189    io.loadIn(i).ready := true.B
190    val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value
191
192    when (io.loadIn(i).valid) {
193      val hasExceptions = ExceptionNO.selectByFu(io.loadIn(i).bits.uop.exceptionVec, LduCfg).asUInt.orR
194      val needReplay = io.loadIn(i).bits.replayInfo.needReplay()
195
196      when (!needReplay) {
197      // update control flag
198        addrvalid(loadWbIndex) := hasExceptions || !io.loadIn(i).bits.tlbMiss
199        datavalid(loadWbIndex) :=
200          (if (EnableFastForward) {
201              hasExceptions ||
202              io.loadIn(i).bits.mmio ||
203             !io.loadIn(i).bits.miss && // dcache miss
204             !io.loadIn(i).bits.dcacheRequireReplay // do not writeback if that inst will be resend from rs
205           } else {
206              hasExceptions ||
207              io.loadIn(i).bits.mmio ||
208             !io.loadIn(i).bits.miss
209           })
210
211        //
212        when (io.loadIn(i).bits.lqDataWenDup(1)) {
213          uop(loadWbIndex) := io.loadIn(i).bits.uop
214        }
215        when (io.loadIn(i).bits.lqDataWenDup(4)) {
216          uop(loadWbIndex).debugInfo := io.loadIn(i).bits.uop.debugInfo
217        }
218        uop(loadWbIndex).debugInfo := io.loadIn(i).bits.replayInfo.debug
219
220        //  Debug info
221        debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio
222        debug_paddr(loadWbIndex) := io.loadIn(i).bits.paddr
223
224        XSInfo(io.loadIn(i).valid, "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x\n",
225          io.loadIn(i).bits.uop.lqIdx.asUInt,
226          io.loadIn(i).bits.uop.pc,
227          io.loadIn(i).bits.vaddr,
228          io.loadIn(i).bits.paddr,
229          io.loadIn(i).bits.mask,
230          io.loadIn(i).bits.forwardData.asUInt,
231          io.loadIn(i).bits.forwardMask.asUInt,
232          io.loadIn(i).bits.mmio
233        )
234      }
235    }
236  }
237
238  if (env.EnableTopDown) {
239    val stall_loads_bound = WireDefault(0.B)
240    ExcitingUtils.addSink(stall_loads_bound, "stall_loads_bound", ExcitingUtils.Perf)
241    val have_miss_entry = (allocated zip datavalid).map(x => x._1 && !x._2).reduce(_ || _)
242    val l1d_loads_bound = stall_loads_bound && !have_miss_entry
243    ExcitingUtils.addSource(l1d_loads_bound, "l1d_loads_bound", ExcitingUtils.Perf)
244    XSPerfAccumulate("l1d_loads_bound", l1d_loads_bound)
245    val stall_l1d_load_miss = stall_loads_bound && have_miss_entry
246    ExcitingUtils.addSource(stall_l1d_load_miss, "stall_l1d_load_miss", ExcitingUtils.Perf)
247    ExcitingUtils.addSink(WireInit(0.U), "stall_l1d_load_miss", ExcitingUtils.Perf)
248  }
249
250  //  perf counter
251  QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue)
252  io.lqFull := !allowEnqueue
253  val perfEvents: Seq[(String, UInt)] = Seq()
254  generatePerfEvent()
255
256  // debug info
257  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value)
258
259  def PrintFlag(flag: Bool, name: String): Unit = {
260    when(flag) {
261      XSDebug(false, true.B, name)
262    }.otherwise {
263      XSDebug(false, true.B, " ")
264    }
265  }
266
267  for (i <- 0 until VirtualLoadQueueSize) {
268    XSDebug(i + " pc %x pa %x ", uop(i).pc, debug_paddr(i))
269    PrintFlag(allocated(i), "v")
270    PrintFlag(allocated(i) && datavalid(i), "d")
271    PrintFlag(allocated(i) && addrvalid(i), "a")
272    PrintFlag(allocated(i) && addrvalid(i) && datavalid(i), "w")
273    XSDebug(false, true.B, "\n")
274  }
275  // end
276}