xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala (revision bb2f3f51dd67f6e16e0cc1ffe43368c9fc7e4aef)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16package xiangshan.mem
17
18import chisel3._
19import chisel3.util._
20import org.chipsalliance.cde.config._
21import xiangshan._
22import xiangshan.backend.rob.{RobLsqIO, RobPtr}
23import xiangshan.ExceptionNO._
24import xiangshan.cache._
25import utils._
26import utility._
27import xiangshan.backend.Bundles.{DynInst, MemExuOutput}
28import xiangshan.backend.fu.FuConfig.LduCfg
29import xiangshan.backend.decode.isa.bitfield.{InstVType, XSInstBitFields}
30
31class VirtualLoadQueue(implicit p: Parameters) extends XSModule
32  with HasDCacheParameters
33  with HasCircularQueuePtrHelper
34  with HasLoadHelper
35  with HasPerfEvents
36  with HasVLSUParameters {
37  val io = IO(new Bundle() {
38    // control
39    val redirect    = Flipped(Valid(new Redirect))
40    val vecCommit   = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO)))
41    // from dispatch
42    val enq         = new LqEnqIO
43    // from ldu s3
44    val ldin        = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle)))
45    // to LoadQueueReplay and LoadQueueRAR
46    val ldWbPtr     = Output(new LqPtr)
47    // global
48    val lqFull      = Output(Bool())
49    val lqEmpty     = Output(Bool())
50    // to dispatch
51    val lqDeq       = Output(UInt(log2Up(CommitWidth + 1).W))
52    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
53  })
54
55  println("VirtualLoadQueue: size: " + VirtualLoadQueueSize)
56  //  VirtualLoadQueue field
57  //  +-----------+---------+-------+
58  //  | Allocated | MicroOp | Flags |
59  //  +-----------+---------+-------+
60  //  Allocated   : entry has been allocated already
61  //  MicroOp     : inst's microOp
62  //  Flags       : load flags
63  val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value
64  val uop = Reg(Vec(VirtualLoadQueueSize, new DynInst))
65  val addrvalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio addr is valid
66  val datavalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio data is valid
67  // vector load: inst -> uop (pdest registor) -> flow (once load operation in loadunit)
68  val isvec = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load flow
69  val veccommitted = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load uop has commited
70
71  /**
72   * used for debug
73   */
74  val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst
75  val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr
76
77  //  maintain pointers
78  val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr))))
79  val enqPtr = enqPtrExt(0).value
80  val deqPtr = Wire(new LqPtr)
81  val deqPtrNext = Wire(new LqPtr)
82
83  /**
84   * update pointer
85   */
86  val lastCycleRedirect = RegNext(io.redirect)
87  val lastLastCycleRedirect = RegNext(lastCycleRedirect)
88
89  val validCount = distanceBetween(enqPtrExt(0), deqPtr)
90  val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U
91  val canEnqueue = io.enq.req.map(_.valid)
92  val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => {
93    uop(i).robIdx.needFlush(io.redirect) && allocated(i)
94  })))
95  val lastNeedCancel = RegNext(needCancel)
96  val enqCancel = canEnqueue.zip(io.enq.req).map{case (v , x) =>
97    v && x.bits.robIdx.needFlush(io.redirect)
98  }
99  val enqCancelNum = enqCancel.zip(io.enq.req).map{case (v, req) =>
100    Mux(v, req.bits.numLsElem, 0.U)
101  }
102  val lastEnqCancel = RegNext(enqCancelNum.reduce(_ + _))
103  val lastCycleCancelCount = PopCount(lastNeedCancel)
104  val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid)
105
106  // update enqueue pointer
107  val vLoadFlow = io.enq.req.map(_.bits.numLsElem)
108  val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNumItem, index) => Mux(io.enq.canAccept && io.enq.sqCanAccept && canEnqueue(index), vLoadFlowNumItem, 0.U)}
109  val validVLoadOffset = vLoadFlow.zip(io.enq.needAlloc).map{case (flow, needAllocItem) => Mux(needAllocItem, flow, 0.U)}
110  val validVLoadOffsetRShift = 0.U +: validVLoadOffset.take(validVLoadFlow.length - 1)
111
112  val enqNumber = validVLoadFlow.reduce(_ + _)
113  val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr))
114  val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr))
115  when (lastLastCycleRedirect.valid) {
116    // we recover the pointers in the next cycle after redirect
117    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount))
118  } .otherwise {
119    enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber))
120  }
121  assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U))
122
123  when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) {
124    enqPtrExtNext := enqPtrExtNextVec
125  } .otherwise {
126    enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U))
127  }
128  enqPtrExt := enqPtrExtNext
129
130  // update dequeue pointer
131  val DeqPtrMoveStride = CommitWidth
132  require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!")
133  val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U))
134  val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value)
135    && ((datavalid(ptr.value) && addrvalid(ptr.value) && !isvec(ptr.value)) || (isvec(ptr.value) && veccommitted(ptr.value)))
136    && ptr =/= enqPtrExt(0)))
137  val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value)))
138  // make chisel happy
139  val deqCountMask = Wire(UInt(DeqPtrMoveStride.W))
140  deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt
141  val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U)
142  val lastCommitCount = RegNext(commitCount)
143
144  // update deqPtr
145  // cycle 1: generate deqPtrNext
146  // cycle 2: update deqPtr
147  val deqPtrUpdateEna = lastCommitCount =/= 0.U
148  deqPtrNext := deqPtr + lastCommitCount
149  deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna)
150
151  io.lqDeq := RegNext(lastCommitCount)
152  io.lqCancelCnt := redirectCancelCount
153  io.ldWbPtr := deqPtr
154  io.lqEmpty := RegNext(validCount === 0.U)
155
156  /**
157   * Enqueue at dispatch
158   *
159   * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth
160   */
161  io.enq.canAccept := allowEnqueue
162  for (i <- 0 until io.enq.req.length) {
163    val lqIdx = enqPtrExt(0) + validVLoadOffsetRShift.take(i + 1).reduce(_ + _)
164    val index = io.enq.req(i).bits.lqIdx
165    val enqInstr = io.enq.req(i).bits.instr.asTypeOf(new XSInstBitFields)
166    when (canEnqueue(i) && !enqCancel(i)) {
167      // The maximum 'numLsElem' number that can be emitted per dispatch port is:
168      //    16 2 2 2 2 2.
169      // Therefore, VecMemLSQEnqIteratorNumberSeq = Seq(16, 2, 2, 2, 2, 2)
170      for (j <- 0 until VecMemLSQEnqIteratorNumberSeq(i)) {
171        when (j.U < validVLoadOffset(i)) {
172          allocated((index + j.U).value) := true.B
173          uop((index + j.U).value) := io.enq.req(i).bits
174          uop((index + j.U).value).lqIdx := lqIdx + j.U
175
176          // init
177          addrvalid((index + j.U).value) := false.B
178          datavalid((index + j.U).value) := false.B
179          isvec((index + j.U).value) := enqInstr.isVecLoad
180          veccommitted((index + j.U).value) := false.B
181
182          debug_mmio((index + j.U).value) := false.B
183          debug_paddr((index + j.U).value) := 0.U
184
185          XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n")
186          XSError(index.value =/= lqIdx.value, s"must be the same entry $i\n")
187        }
188      }
189    }
190    io.enq.resp(i) := lqIdx
191  }
192
193  /**
194    * Load commits
195    *
196    * When load commited, mark it as !allocated and move deqPtr forward.
197    */
198  (0 until DeqPtrMoveStride).map(i => {
199    when (commitCount > i.U) {
200      allocated((deqPtr+i.U).value) := false.B
201      XSError(!allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n")
202    }
203  })
204
205  // vector commit or replay
206  val vecLdCommittmp = Wire(Vec(VirtualLoadQueueSize, Vec(VecLoadPipelineWidth, Bool())))
207  val vecLdCommit = Wire(Vec(VirtualLoadQueueSize, Bool()))
208  for (i <- 0 until VirtualLoadQueueSize) {
209    val cmt = io.vecCommit
210    for (j <- 0 until VecLoadPipelineWidth) {
211      vecLdCommittmp(i)(j) := allocated(i) && cmt(j).valid && cmt(j).bits.isCommit && uop(i).robIdx === cmt(j).bits.robidx && uop(i).uopIdx === cmt(j).bits.uopidx
212    }
213    vecLdCommit(i) := vecLdCommittmp(i).reduce(_ || _)
214
215    when (vecLdCommit(i)) {
216      veccommitted(i) := true.B
217    }
218  }
219
220  // misprediction recovery / exception redirect
221  // invalidate lq term using robIdx
222  for (i <- 0 until VirtualLoadQueueSize) {
223    when (needCancel(i)) {
224      allocated(i) := false.B
225    }
226  }
227
228  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
229
230  /**
231    * Writeback load from load units
232    *
233    * Most load instructions writeback to regfile at the same time.
234    * However,
235    *   (1) For ready load instruction (no need replay), it writes back to ROB immediately.
236    */
237  for(i <- 0 until LoadPipelineWidth) {
238    //   most lq status need to be updated immediately after load writeback to lq
239    //   flag bits in lq needs to be updated accurately
240    io.ldin(i).ready := true.B
241    val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value
242
243    when (io.ldin(i).valid) {
244      val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR
245      val need_rep = io.ldin(i).bits.rep_info.need_rep
246
247      when (!need_rep) {
248      // update control flag
249        addrvalid(loadWbIndex) := hasExceptions || !io.ldin(i).bits.tlbMiss
250        datavalid(loadWbIndex) :=
251          (if (EnableFastForward) {
252              hasExceptions ||
253              io.ldin(i).bits.mmio ||
254             !io.ldin(i).bits.miss && // dcache miss
255             !io.ldin(i).bits.dcacheRequireReplay // do not writeback if that inst will be resend from rs
256           } else {
257              hasExceptions ||
258              io.ldin(i).bits.mmio ||
259             !io.ldin(i).bits.miss
260           })
261
262        //
263        when (io.ldin(i).bits.data_wen_dup(1)) {
264          uop(loadWbIndex) := io.ldin(i).bits.uop
265        }
266        when (io.ldin(i).bits.data_wen_dup(4)) {
267          uop(loadWbIndex).debugInfo := io.ldin(i).bits.uop.debugInfo
268        }
269        uop(loadWbIndex).debugInfo := io.ldin(i).bits.rep_info.debug
270
271        //  Debug info
272        debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio
273        debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr
274
275        XSInfo(io.ldin(i).valid,
276          "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x isvec %x\n",
277          io.ldin(i).bits.uop.lqIdx.asUInt,
278          io.ldin(i).bits.uop.pc,
279          io.ldin(i).bits.vaddr,
280          io.ldin(i).bits.paddr,
281          io.ldin(i).bits.mask,
282          io.ldin(i).bits.forwardData.asUInt,
283          io.ldin(i).bits.forwardMask.asUInt,
284          io.ldin(i).bits.mmio,
285          io.ldin(i).bits.isvec
286        )
287      }
288    }
289  }
290
291  //  perf counter
292  QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue)
293  val vecValidVec = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => allocated(i) && isvec(i))))
294  QueuePerf(VirtualLoadQueueSize, PopCount(vecValidVec), !allowEnqueue)
295  io.lqFull := !allowEnqueue
296  val perfEvents: Seq[(String, UInt)] = Seq()
297  generatePerfEvent()
298
299  // debug info
300  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value)
301
302  def PrintFlag(flag: Bool, name: String): Unit = {
303    when(flag) {
304      XSDebug(false, true.B, name)
305    }.otherwise {
306      XSDebug(false, true.B, " ")
307    }
308  }
309
310  for (i <- 0 until VirtualLoadQueueSize) {
311    XSDebug(i + " pc %x pa %x ", uop(i).pc, debug_paddr(i))
312    PrintFlag(allocated(i), "v")
313    PrintFlag(allocated(i) && datavalid(i), "d")
314    PrintFlag(allocated(i) && addrvalid(i), "a")
315    PrintFlag(allocated(i) && addrvalid(i) && datavalid(i), "w")
316    PrintFlag(allocated(i) && isvec(i), "c")
317    XSDebug(false, true.B, "\n")
318  }
319  // end
320}
321