xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala (revision b1e920234888fd3e5463ceb2a99c9bdca087f585)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend._
26import xiangshan.backend.fu.fpu._
27import xiangshan.backend.rob.RobLsqIO
28import xiangshan.cache._
29import xiangshan.frontend.FtqPtr
30import xiangshan.ExceptionNO._
31import xiangshan.mem.mdp._
32import xiangshan.backend.Bundles.{DynInst, MemExuOutput, MemMicroOpRbExt}
33import xiangshan.backend.rob.RobPtr
34
35class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr](
36  p => p(XSCoreParamsKey).VirtualLoadQueueSize
37){
38}
39
40object LqPtr {
41  def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = {
42    val ptr = Wire(new LqPtr)
43    ptr.flag := f
44    ptr.value := v
45    ptr
46  }
47}
48
49trait HasLoadHelper { this: XSModule =>
50  def rdataHelper(uop: DynInst, rdata: UInt): UInt = {
51    val fpWen = uop.fpWen
52    LookupTree(uop.fuOpType, List(
53      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
54      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
55      /*
56          riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values
57          Any operation that writes a narrower result to an f register must write
58          all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value.
59      */
60      LSUOpType.lw   -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)),
61      LSUOpType.ld   -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)),
62      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
63      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
64      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
65    ))
66  }
67
68  def rdataVecHelper(alignedType: UInt, rdata: UInt): UInt = {
69    LookupTree(alignedType, List(
70      "b00".U -> ZeroExt(rdata(7, 0), VLEN),
71      "b01".U -> ZeroExt(rdata(15, 0), VLEN),
72      "b10".U -> ZeroExt(rdata(31, 0), VLEN),
73      "b11".U -> ZeroExt(rdata(63, 0), VLEN)
74    ))
75  }
76}
77
78class LqEnqIO(implicit p: Parameters) extends MemBlockBundle {
79  val canAccept = Output(Bool())
80  val sqCanAccept = Input(Bool())
81  val needAlloc = Vec(LSQEnqWidth, Input(Bool()))
82  val req = Vec(LSQEnqWidth, Flipped(ValidIO(new DynInst)))
83  val resp = Vec(LSQEnqWidth, Output(new LqPtr))
84}
85
86class LqTriggerIO(implicit p: Parameters) extends XSBundle {
87  val hitLoadAddrTriggerHitVec = Input(Vec(TriggerNum, Bool()))
88  val lqLoadAddrTriggerHitVec = Output(Vec(TriggerNum, Bool()))
89}
90
91class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle {
92  val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
93  val robHeadTlbReplay = Output(Bool())
94  val robHeadTlbMiss = Output(Bool())
95  val robHeadLoadVio = Output(Bool())
96  val robHeadLoadMSHR = Output(Bool())
97  val robHeadMissInDTlb = Input(Bool())
98  val robHeadOtherReplay = Output(Bool())
99}
100
101class LoadQueue(implicit p: Parameters) extends XSModule
102  with HasDCacheParameters
103  with HasCircularQueuePtrHelper
104  with HasLoadHelper
105  with HasPerfEvents
106{
107  val io = IO(new Bundle() {
108    val redirect = Flipped(Valid(new Redirect))
109    val enq = new LqEnqIO
110    val ldu = new Bundle() {
111        val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
112        val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
113        val ldin         = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
114    }
115    val sta = new Bundle() {
116      val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
117      val vecStoreAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
118    }
119    val std = new Bundle() {
120      val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput))) // from store_s0, store data, send to sq from rs
121    }
122    val sq = new Bundle() {
123      val stAddrReadySqPtr = Input(new SqPtr)
124      val stAddrReadyVec   = Input(Vec(StoreQueueSize, Bool()))
125      val stDataReadySqPtr = Input(new SqPtr)
126      val stDataReadyVec   = Input(Vec(StoreQueueSize, Bool()))
127      val stIssuePtr       = Input(new SqPtr)
128      val sqEmpty          = Input(Bool())
129    }
130    val ldout = Vec(LoadPipelineWidth, DecoupledIO(new MemExuOutput))
131    val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
132    val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
133    val refill = Flipped(ValidIO(new Refill))
134    val tl_d_channel  = Input(new DcacheToLduForwardIO)
135    val release = Flipped(Valid(new Release))
136    val rollback = Output(Valid(new Redirect))
137    val rob = Flipped(new RobLsqIO)
138    val uncache = new UncacheWordIO
139    val trigger = Vec(LoadPipelineWidth, new LqTriggerIO)
140    val exceptionAddr = new ExceptionAddrIO
141    val lqFull = Output(Bool())
142    val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
143    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
144    val lq_rep_full = Output(Bool())
145    val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W)))
146    val l2_hint = Input(Valid(new L2ToL1Hint()))
147    val lqEmpty = Output(Bool())
148
149    val vecWriteback = Flipped(ValidIO(new MemExuOutput(isVector = true)))
150    val lqDeqPtr = Output(new LqPtr)
151
152    val debugTopDown = new LoadQueueTopDownIO
153  })
154
155  val loadQueueRAR = Module(new LoadQueueRAR)  //  read-after-read violation
156  val loadQueueRAW = Module(new LoadQueueRAW)  //  read-after-write violation
157  val loadQueueReplay = Module(new LoadQueueReplay)  //  enqueue if need replay
158  val virtualLoadQueue = Module(new VirtualLoadQueue)  //  control state
159  val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer
160  val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer
161
162  /**
163   * LoadQueueRAR
164   */
165  loadQueueRAR.io.redirect <> io.redirect
166  loadQueueRAR.io.release  <> io.release
167  loadQueueRAR.io.ldWbPtr  <> virtualLoadQueue.io.ldWbPtr
168  for (w <- 0 until LoadPipelineWidth) {
169    loadQueueRAR.io.query(w).req    <> io.ldu.ldld_nuke_query(w).req // from load_s1
170    loadQueueRAR.io.query(w).resp   <> io.ldu.ldld_nuke_query(w).resp // to load_s2
171    loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3
172  }
173
174  /**
175   * LoadQueueRAW
176   */
177  loadQueueRAW.io.redirect         <> io.redirect
178  loadQueueRAW.io.storeIn          <> io.sta.storeAddrIn
179  loadQueueRAW.io.vecStoreIn       <> io.sta.vecStoreAddrIn
180  loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
181  loadQueueRAW.io.stIssuePtr       <> io.sq.stIssuePtr
182  for (w <- 0 until LoadPipelineWidth) {
183    loadQueueRAW.io.query(w).req    <> io.ldu.stld_nuke_query(w).req // from load_s1
184    loadQueueRAW.io.query(w).resp   <> io.ldu.stld_nuke_query(w).resp // to load_s2
185    loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3
186  }
187
188  /**
189   * VirtualLoadQueue
190   */
191  virtualLoadQueue.io.redirect      <> io.redirect
192  virtualLoadQueue.io.enq           <> io.enq
193  virtualLoadQueue.io.ldin          <> io.ldu.ldin // from load_s3
194  virtualLoadQueue.io.lqFull        <> io.lqFull
195  virtualLoadQueue.io.lqDeq         <> io.lqDeq
196  virtualLoadQueue.io.lqCancelCnt   <> io.lqCancelCnt
197  virtualLoadQueue.io.lqEmpty       <> io.lqEmpty
198  virtualLoadQueue.io.vecWriteback  <> io.vecWriteback
199  virtualLoadQueue.io.ldWbPtr       <> io.lqDeqPtr
200
201  /**
202   * Load queue exception buffer
203   */
204  exceptionBuffer.io.redirect <> io.redirect
205  for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) {
206    buff.valid := io.ldu.ldin(w).valid // from load_s3
207    buff.bits := io.ldu.ldin(w).bits
208  }
209  io.exceptionAddr <> exceptionBuffer.io.exceptionAddr
210
211  /**
212   * Load uncache buffer
213   */
214  uncacheBuffer.io.redirect   <> io.redirect
215  uncacheBuffer.io.ldout      <> io.ldout
216  uncacheBuffer.io.ld_raw_data  <> io.ld_raw_data
217  uncacheBuffer.io.rob        <> io.rob
218  uncacheBuffer.io.uncache    <> io.uncache
219  uncacheBuffer.io.trigger    <> io.trigger
220  for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) {
221    buff.valid := io.ldu.ldin(w).valid // from load_s3
222    buff.bits := io.ldu.ldin(w).bits // from load_s3
223  }
224
225  // rollback
226  def selectOldest[T <: Redirect](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
227    assert(valid.length == bits.length)
228    if (valid.length == 0 || valid.length == 1) {
229      (valid, bits)
230    } else if (valid.length == 2) {
231      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
232      for (i <- res.indices) {
233        res(i).valid := valid(i)
234        res(i).bits := bits(i)
235      }
236      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).robIdx, bits(1).robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
237      (Seq(oldest.valid), Seq(oldest.bits))
238    } else {
239      val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
240      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
241      selectOldest(left._1 ++ right._1, left._2 ++ right._2)
242    }
243  }
244
245  val (rollbackSelV, rollbackSelBits) = selectOldest(
246                                          Seq(loadQueueRAW.io.rollback.valid, uncacheBuffer.io.rollback.valid),
247                                          Seq(loadQueueRAW.io.rollback.bits, uncacheBuffer.io.rollback.bits)
248                                        )
249  io.rollback.valid := rollbackSelV.head
250  io.rollback.bits := rollbackSelBits.head
251
252  /* <------- DANGEROUS: Don't change sequence here ! -------> */
253
254  /**
255   * LoadQueueReplay
256   */
257  loadQueueReplay.io.redirect         <> io.redirect
258  loadQueueReplay.io.enq              <> io.ldu.ldin // from load_s3
259  loadQueueReplay.io.storeAddrIn      <> io.sta.storeAddrIn // from store_s1
260  loadQueueReplay.io.storeDataIn      <> io.std.storeDataIn // from store_s0
261  loadQueueReplay.io.replay           <> io.replay
262  loadQueueReplay.io.refill           <> io.refill
263  loadQueueReplay.io.tl_d_channel     <> io.tl_d_channel
264  loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
265  loadQueueReplay.io.stAddrReadyVec   <> io.sq.stAddrReadyVec
266  loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr
267  loadQueueReplay.io.stDataReadyVec   <> io.sq.stDataReadyVec
268  loadQueueReplay.io.sqEmpty          <> io.sq.sqEmpty
269  loadQueueReplay.io.lqFull           <> io.lq_rep_full
270  loadQueueReplay.io.ldWbPtr          <> virtualLoadQueue.io.ldWbPtr
271  loadQueueReplay.io.rarFull          <> loadQueueRAR.io.lqFull
272  loadQueueReplay.io.rawFull          <> loadQueueRAW.io.lqFull
273  loadQueueReplay.io.l2_hint          <> io.l2_hint
274  loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl
275
276  loadQueueReplay.io.debugTopDown <> io.debugTopDown
277
278  val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull)
279  XSPerfAccumulate("full_mask_000", full_mask === 0.U)
280  XSPerfAccumulate("full_mask_001", full_mask === 1.U)
281  XSPerfAccumulate("full_mask_010", full_mask === 2.U)
282  XSPerfAccumulate("full_mask_011", full_mask === 3.U)
283  XSPerfAccumulate("full_mask_100", full_mask === 4.U)
284  XSPerfAccumulate("full_mask_101", full_mask === 5.U)
285  XSPerfAccumulate("full_mask_110", full_mask === 6.U)
286  XSPerfAccumulate("full_mask_111", full_mask === 7.U)
287  XSPerfAccumulate("rollback", io.rollback.valid)
288
289  // perf cnt
290  val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++
291  Seq(
292    ("full_mask_000", full_mask === 0.U),
293    ("full_mask_001", full_mask === 1.U),
294    ("full_mask_010", full_mask === 2.U),
295    ("full_mask_011", full_mask === 3.U),
296    ("full_mask_100", full_mask === 4.U),
297    ("full_mask_101", full_mask === 5.U),
298    ("full_mask_110", full_mask === 6.U),
299    ("full_mask_111", full_mask === 7.U),
300    ("rollback", io.rollback.valid)
301  )
302  generatePerfEvent()
303  // end
304}