xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala (revision f3a9fb053ef5b99b1977960119e3ee440397383e)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16package xiangshan.mem
17
18import chisel3._
19import chisel3.util._
20import org.chipsalliance.cde.config._
21import xiangshan._
22import xiangshan.backend.rob.{RobLsqIO, RobPtr}
23import xiangshan.ExceptionNO._
24import xiangshan.cache._
25import utils._
26import utility._
27import xiangshan.backend.Bundles.{DynInst, MemExuOutput}
28import xiangshan.backend.fu.FuConfig.LduCfg
29
30class VirtualLoadQueue(implicit p: Parameters) extends XSModule
31  with HasDCacheParameters
32  with HasCircularQueuePtrHelper
33  with HasLoadHelper
34  with HasPerfEvents
35  with HasVLSUParameters {
36  val io = IO(new Bundle() {
37    // control
38    val redirect    = Flipped(Valid(new Redirect))
39    // from dispatch
40    val enq         = new LqEnqIO
41    // from ldu s3
42    val ldin        = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle)))
43    // to LoadQueueReplay and LoadQueueRAR
44    val ldWbPtr     = Output(new LqPtr)
45    // global
46    val lqFull      = Output(Bool())
47    val lqEmpty     = Output(Bool())
48    // to dispatch
49    val lqDeq       = Output(UInt(log2Up(CommitWidth + 1).W))
50    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
51    // vector load writeback
52    val vecWriteback = Flipped(ValidIO(new MemExuOutput(isVector = true)))
53  })
54
55  println("VirtualLoadQueue: size: " + VirtualLoadQueueSize)
56  //  VirtualLoadQueue field
57  //  +-----------+---------+-------+
58  //  | Allocated | MicroOp | Flags |
59  //  +-----------+---------+-------+
60  //  Allocated   : entry has been allocated already
61  //  MicroOp     : inst's microOp
62  //  Flags       : load flags
63  val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value
64  val uop = Reg(Vec(VirtualLoadQueueSize, new DynInst))
65  val addrvalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio addr is valid
66  val datavalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio data is valid
67
68  /**
69   * used for debug
70   */
71  val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst
72  val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr
73
74  //  maintain pointers
75  val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr))))
76  val enqPtr = enqPtrExt(0).value
77  val deqPtr = Wire(new LqPtr)
78  val deqPtrNext = Wire(new LqPtr)
79
80  /**
81   * update pointer
82   */
83  val lastCycleRedirect = RegNext(io.redirect)
84  val lastLastCycleRedirect = RegNext(lastCycleRedirect)
85
86  val validCount = distanceBetween(enqPtrExt(0), deqPtr)
87  val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U
88  val canEnqueue = io.enq.req.map(_.valid)
89  val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => {
90    uop(i).robIdx.needFlush(io.redirect) && allocated(i)
91  })))
92  val lastNeedCancel = RegNext(needCancel)
93  val enqCancel = io.enq.req.map(_.bits.robIdx.needFlush(io.redirect))
94  val lastEnqCancel = PopCount(RegNext(VecInit(canEnqueue.zip(enqCancel).map(x => x._1 && x._2))))
95  val lastCycleCancelCount = PopCount(lastNeedCancel)
96  val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid)
97
98  // update enqueue pointer
99  val vLoadFlow = io.enq.req.map(_.bits.numLsElem)
100  val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNum_Item, index) => Mux(io.enq.canAccept && io.enq.sqCanAccept && canEnqueue(index), vLoadFlowNum_Item, 0.U)}
101  val validVLoadOffset = 0.U +: vLoadFlow.zip(io.enq.needAlloc)
102                                .map{case (flow, needAlloc_Item) => Mux(needAlloc_Item, flow, 0.U)}
103                                .slice(0, validVLoadFlow.length - 1)
104  val enqNumber = validVLoadFlow.reduce(_ + _)
105  val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr))
106  val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr))
107  when (lastLastCycleRedirect.valid) {
108    // we recover the pointers in the next cycle after redirect
109    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount))
110  } .otherwise {
111    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber))
112  }
113  assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U))
114
115  when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) {
116    enqPtrExtNext := enqPtrExtNextVec
117  } .otherwise {
118    enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U))
119  }
120  enqPtrExt := enqPtrExtNext
121
122  // update dequeue pointer
123  val DeqPtrMoveStride = CommitWidth
124  require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!")
125  val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U))
126  val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) && datavalid(ptr.value) && addrvalid(ptr.value) && ptr =/= enqPtrExt(0)))
127  val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value)))
128  // make chisel happy
129  val deqCountMask = Wire(UInt(DeqPtrMoveStride.W))
130  deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt
131  val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U)
132  val lastCommitCount = RegNext(commitCount)
133
134  // update deqPtr
135  // cycle 1: generate deqPtrNext
136  // cycle 2: update deqPtr
137  val deqPtrUpdateEna = lastCommitCount =/= 0.U
138  deqPtrNext := deqPtr + lastCommitCount
139  deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna)
140
141  io.lqDeq := RegNext(lastCommitCount)
142  io.lqCancelCnt := redirectCancelCount
143  io.ldWbPtr := deqPtr
144  io.lqEmpty := RegNext(validCount === 0.U)
145
146  /**
147   * Enqueue at dispatch
148   *
149   * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth
150   */
151  io.enq.canAccept := allowEnqueue
152  for (i <- 0 until io.enq.req.length) {
153    val offset = PopCount(io.enq.needAlloc.take(i))
154    val lqIdx = enqPtrExt(0) + validVLoadOffset.take(i + 1).reduce(_ + _)
155//    val lqIdx = 0.U.asTypeOf(new LqPtr)
156    val index = io.enq.req(i).bits.lqIdx.value
157    when (canEnqueue(i) && !enqCancel(i)) {
158      allocated(index) := true.B
159      uop(index) := io.enq.req(i).bits
160      uop(index).lqIdx := lqIdx
161
162      // init
163      addrvalid(index) := false.B
164      datavalid(index) := false.B
165
166      debug_mmio(index) := false.B
167      debug_paddr(index) := 0.U
168
169      XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n")
170      XSError(index =/= lqIdx.value, s"must be the same entry $i\n")
171    }
172    io.enq.resp(i) := lqIdx
173  }
174
175  /**
176    * Load commits
177    *
178    * When load commited, mark it as !allocated and move deqPtr forward.
179    */
180  (0 until DeqPtrMoveStride).map(i => {
181    when (commitCount > i.U) {
182      allocated((deqPtr+i.U).value) := false.B
183      XSError(!allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n")
184    }
185  })
186
187  // misprediction recovery / exception redirect
188  // invalidate lq term using robIdx
189  for (i <- 0 until VirtualLoadQueueSize) {
190    when (needCancel(i)) {
191      allocated(i) := false.B
192    }
193  }
194
195  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
196
197  /**
198    * Writeback load from load units
199    *
200    * Most load instructions writeback to regfile at the same time.
201    * However,
202    *   (1) For ready load instruction (no need replay), it writes back to ROB immediately.
203    */
204  for(i <- 0 until LoadPipelineWidth) {
205    //   most lq status need to be updated immediately after load writeback to lq
206    //   flag bits in lq needs to be updated accurately
207    io.ldin(i).ready := true.B
208    val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value
209    val isvec = io.ldin(i).bits.isvec // vector loads are writebacked from uop queue instead of ldus
210
211    when (io.ldin(i).valid && !isvec) {
212      val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR
213      val need_rep = io.ldin(i).bits.rep_info.need_rep
214
215      when (!need_rep) {
216      // update control flag
217        addrvalid(loadWbIndex) := hasExceptions || !io.ldin(i).bits.tlbMiss
218        datavalid(loadWbIndex) :=
219          (if (EnableFastForward) {
220              hasExceptions ||
221              io.ldin(i).bits.mmio ||
222             !io.ldin(i).bits.miss && // dcache miss
223             !io.ldin(i).bits.dcacheRequireReplay // do not writeback if that inst will be resend from rs
224           } else {
225              hasExceptions ||
226              io.ldin(i).bits.mmio ||
227             !io.ldin(i).bits.miss
228           })
229
230        //
231        when (io.ldin(i).bits.data_wen_dup(1)) {
232          uop(loadWbIndex) := io.ldin(i).bits.uop
233        }
234        when (io.ldin(i).bits.data_wen_dup(4)) {
235          uop(loadWbIndex).debugInfo := io.ldin(i).bits.uop.debugInfo
236        }
237        uop(loadWbIndex).debugInfo := io.ldin(i).bits.rep_info.debug
238
239        //  Debug info
240        debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio
241        debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr
242
243        XSInfo(io.ldin(i).valid, "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x\n",
244          io.ldin(i).bits.uop.lqIdx.asUInt,
245          io.ldin(i).bits.uop.pc,
246          io.ldin(i).bits.vaddr,
247          io.ldin(i).bits.paddr,
248          io.ldin(i).bits.mask,
249          io.ldin(i).bits.forwardData.asUInt,
250          io.ldin(i).bits.forwardMask.asUInt,
251          io.ldin(i).bits.mmio
252        )
253      }
254    }
255  }
256
257  XSError(io.vecWriteback.valid && !allocated(io.vecWriteback.bits.uop.lqIdx.value),
258    "wb lqIdx should be allocated at dispatch stage")
259  when (io.vecWriteback.valid) {
260    val vecWbIndex = io.vecWriteback.bits.uop.lqIdx.value
261    addrvalid(vecWbIndex) := true.B
262    datavalid(vecWbIndex) := true.B
263  }
264
265  //  perf counter
266  QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue)
267  io.lqFull := !allowEnqueue
268  val perfEvents: Seq[(String, UInt)] = Seq()
269  generatePerfEvent()
270
271  // debug info
272  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value)
273
274  def PrintFlag(flag: Bool, name: String): Unit = {
275    when(flag) {
276      XSDebug(false, true.B, name)
277    }.otherwise {
278      XSDebug(false, true.B, " ")
279    }
280  }
281
282  for (i <- 0 until VirtualLoadQueueSize) {
283    XSDebug(i + " pc %x pa %x ", uop(i).pc, debug_paddr(i))
284    PrintFlag(allocated(i), "v")
285    PrintFlag(allocated(i) && datavalid(i), "d")
286    PrintFlag(allocated(i) && addrvalid(i), "a")
287    PrintFlag(allocated(i) && addrvalid(i) && datavalid(i), "w")
288    XSDebug(false, true.B, "\n")
289  }
290  // end
291}
292