xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala (revision 60ebee385ce85a25a994f6da0c84ecce9bb91bca)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.fu.fpu.FPU
26import xiangshan.backend.rob.RobLsqIO
27import xiangshan.cache._
28import xiangshan.frontend.FtqPtr
29import xiangshan.ExceptionNO._
30import xiangshan.mem.mdp._
31import xiangshan.backend.rob.RobPtr
32
33class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr](
34  p => p(XSCoreParamsKey).VirtualLoadQueueSize
35){
36}
37
38object LqPtr {
39  def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = {
40    val ptr = Wire(new LqPtr)
41    ptr.flag := f
42    ptr.value := v
43    ptr
44  }
45}
46
47trait HasLoadHelper { this: XSModule =>
48  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
49    val fpWen = uop.ctrl.fpWen
50    LookupTree(uop.ctrl.fuOpType, List(
51      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
52      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
53      /*
54          riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values
55          Any operation that writes a narrower result to an f register must write
56          all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value.
57      */
58      LSUOpType.lw   -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)),
59      LSUOpType.ld   -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)),
60      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
61      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
62      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
63    ))
64  }
65}
66
67class LqEnqIO(implicit p: Parameters) extends XSBundle {
68  val canAccept = Output(Bool())
69  val sqCanAccept = Input(Bool())
70  val needAlloc = Vec(exuParameters.LsExuCnt, Input(Bool()))
71  val req = Vec(exuParameters.LsExuCnt, Flipped(ValidIO(new MicroOp)))
72  val resp = Vec(exuParameters.LsExuCnt, Output(new LqPtr))
73}
74
75class LqTriggerIO(implicit p: Parameters) extends XSBundle {
76  val hitLoadAddrTriggerHitVec = Input(Vec(3, Bool()))
77  val lqLoadAddrTriggerHitVec = Output(Vec(3, Bool()))
78}
79
80class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle {
81  val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
82  val robHeadTlbReplay = Output(Bool())
83  val robHeadTlbMiss = Output(Bool())
84  val robHeadLoadVio = Output(Bool())
85  val robHeadLoadMSHR = Output(Bool())
86  val robHeadMissInDTlb = Input(Bool())
87  val robHeadOtherReplay = Output(Bool())
88}
89
90class LoadQueue(implicit p: Parameters) extends XSModule
91  with HasDCacheParameters
92  with HasCircularQueuePtrHelper
93  with HasLoadHelper
94  with HasPerfEvents
95{
96  val io = IO(new Bundle() {
97    val redirect = Flipped(Valid(new Redirect))
98    val enq = new LqEnqIO
99    val ldu = new Bundle() {
100        val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
101        val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
102        val ldin         = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
103    }
104    val sta = new Bundle() {
105      val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
106    }
107    val std = new Bundle() {
108      val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new ExuOutput))) // from store_s0, store data, send to sq from rs
109    }
110    val sq = new Bundle() {
111      val stAddrReadySqPtr = Input(new SqPtr)
112      val stAddrReadyVec   = Input(Vec(StoreQueueSize, Bool()))
113      val stDataReadySqPtr = Input(new SqPtr)
114      val stDataReadyVec   = Input(Vec(StoreQueueSize, Bool()))
115      val stIssuePtr       = Input(new SqPtr)
116      val sqEmpty          = Input(Bool())
117    }
118    val ldout = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput))
119    val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
120    val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
121    val refill = Flipped(ValidIO(new Refill))
122    val tl_d_channel  = Input(new DcacheToLduForwardIO)
123    val release = Flipped(Valid(new Release))
124    val rollback = Output(Valid(new Redirect))
125    val rob = Flipped(new RobLsqIO)
126    val uncache = new UncacheWordIO
127    val trigger = Vec(LoadPipelineWidth, new LqTriggerIO)
128    val exceptionAddr = new ExceptionAddrIO
129    val lqFull = Output(Bool())
130    val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
131    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
132    val lq_rep_full = Output(Bool())
133    val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W)))
134    val l2_hint = Input(Valid(new L2ToL1Hint()))
135    val lqEmpty = Output(Bool())
136    val debugTopDown = new LoadQueueTopDownIO
137  })
138
139  val loadQueueRAR = Module(new LoadQueueRAR)  //  read-after-read violation
140  val loadQueueRAW = Module(new LoadQueueRAW)  //  read-after-write violation
141  val loadQueueReplay = Module(new LoadQueueReplay)  //  enqueue if need replay
142  val virtualLoadQueue = Module(new VirtualLoadQueue)  //  control state
143  val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer
144  val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer
145
146  /**
147   * LoadQueueRAR
148   */
149  loadQueueRAR.io.redirect <> io.redirect
150  loadQueueRAR.io.release  <> io.release
151  loadQueueRAR.io.ldWbPtr  <> virtualLoadQueue.io.ldWbPtr
152  for (w <- 0 until LoadPipelineWidth) {
153    loadQueueRAR.io.query(w).req    <> io.ldu.ldld_nuke_query(w).req // from load_s1
154    loadQueueRAR.io.query(w).resp   <> io.ldu.ldld_nuke_query(w).resp // to load_s2
155    loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3
156  }
157
158  /**
159   * LoadQueueRAW
160   */
161  loadQueueRAW.io.redirect         <> io.redirect
162  loadQueueRAW.io.storeIn          <> io.sta.storeAddrIn
163  loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
164  loadQueueRAW.io.stIssuePtr       <> io.sq.stIssuePtr
165  for (w <- 0 until LoadPipelineWidth) {
166    loadQueueRAW.io.query(w).req    <> io.ldu.stld_nuke_query(w).req // from load_s1
167    loadQueueRAW.io.query(w).resp   <> io.ldu.stld_nuke_query(w).resp // to load_s2
168    loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3
169  }
170
171  /**
172   * VirtualLoadQueue
173   */
174  virtualLoadQueue.io.redirect    <> io.redirect
175  virtualLoadQueue.io.enq         <> io.enq
176  virtualLoadQueue.io.ldin        <> io.ldu.ldin // from load_s3
177  virtualLoadQueue.io.lqFull      <> io.lqFull
178  virtualLoadQueue.io.lqDeq       <> io.lqDeq
179  virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt
180  virtualLoadQueue.io.lqEmpty <> io.lqEmpty
181
182  /**
183   * Load queue exception buffer
184   */
185  exceptionBuffer.io.redirect <> io.redirect
186  for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) {
187    buff.valid := io.ldu.ldin(w).valid // from load_s3
188    buff.bits := io.ldu.ldin(w).bits
189  }
190  io.exceptionAddr <> exceptionBuffer.io.exceptionAddr
191
192  /**
193   * Load uncache buffer
194   */
195  uncacheBuffer.io.redirect   <> io.redirect
196  uncacheBuffer.io.ldout      <> io.ldout
197  uncacheBuffer.io.ld_raw_data  <> io.ld_raw_data
198  uncacheBuffer.io.rob        <> io.rob
199  uncacheBuffer.io.uncache    <> io.uncache
200  uncacheBuffer.io.trigger    <> io.trigger
201  for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) {
202    buff.valid := io.ldu.ldin(w).valid // from load_s3
203    buff.bits := io.ldu.ldin(w).bits // from load_s3
204  }
205
206  // rollback
207  def selectOldest[T <: Redirect](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
208    assert(valid.length == bits.length)
209    if (valid.length == 0 || valid.length == 1) {
210      (valid, bits)
211    } else if (valid.length == 2) {
212      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
213      for (i <- res.indices) {
214        res(i).valid := valid(i)
215        res(i).bits := bits(i)
216      }
217      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).robIdx, bits(1).robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
218      (Seq(oldest.valid), Seq(oldest.bits))
219    } else {
220      val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
221      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
222      selectOldest(left._1 ++ right._1, left._2 ++ right._2)
223    }
224  }
225
226  val (rollbackSelV, rollbackSelBits) = selectOldest(
227                                          Seq(loadQueueRAW.io.rollback.valid, uncacheBuffer.io.rollback.valid),
228                                          Seq(loadQueueRAW.io.rollback.bits, uncacheBuffer.io.rollback.bits)
229                                        )
230  io.rollback.valid := rollbackSelV.head
231  io.rollback.bits := rollbackSelBits.head
232
233  /* <------- DANGEROUS: Don't change sequence here ! -------> */
234
235  /**
236   * LoadQueueReplay
237   */
238  loadQueueReplay.io.redirect         <> io.redirect
239  loadQueueReplay.io.enq              <> io.ldu.ldin // from load_s3
240  loadQueueReplay.io.storeAddrIn      <> io.sta.storeAddrIn // from store_s1
241  loadQueueReplay.io.storeDataIn      <> io.std.storeDataIn // from store_s0
242  loadQueueReplay.io.replay           <> io.replay
243  loadQueueReplay.io.refill           <> io.refill
244  loadQueueReplay.io.tl_d_channel     <> io.tl_d_channel
245  loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
246  loadQueueReplay.io.stAddrReadyVec   <> io.sq.stAddrReadyVec
247  loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr
248  loadQueueReplay.io.stDataReadyVec   <> io.sq.stDataReadyVec
249  loadQueueReplay.io.sqEmpty          <> io.sq.sqEmpty
250  loadQueueReplay.io.lqFull           <> io.lq_rep_full
251  loadQueueReplay.io.ldWbPtr          <> virtualLoadQueue.io.ldWbPtr
252  loadQueueReplay.io.rarFull          <> loadQueueRAR.io.lqFull
253  loadQueueReplay.io.rawFull          <> loadQueueRAW.io.lqFull
254  loadQueueReplay.io.l2_hint          <> io.l2_hint
255  loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl
256
257  loadQueueReplay.io.debugTopDown <> io.debugTopDown
258
259  val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull)
260  XSPerfAccumulate("full_mask_000", full_mask === 0.U)
261  XSPerfAccumulate("full_mask_001", full_mask === 1.U)
262  XSPerfAccumulate("full_mask_010", full_mask === 2.U)
263  XSPerfAccumulate("full_mask_011", full_mask === 3.U)
264  XSPerfAccumulate("full_mask_100", full_mask === 4.U)
265  XSPerfAccumulate("full_mask_101", full_mask === 5.U)
266  XSPerfAccumulate("full_mask_110", full_mask === 6.U)
267  XSPerfAccumulate("full_mask_111", full_mask === 7.U)
268  XSPerfAccumulate("rollback", io.rollback.valid)
269
270  // perf cnt
271  val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++
272  Seq(
273    ("full_mask_000", full_mask === 0.U),
274    ("full_mask_001", full_mask === 1.U),
275    ("full_mask_010", full_mask === 2.U),
276    ("full_mask_011", full_mask === 3.U),
277    ("full_mask_100", full_mask === 4.U),
278    ("full_mask_101", full_mask === 5.U),
279    ("full_mask_110", full_mask === 6.U),
280    ("full_mask_111", full_mask === 7.U),
281    ("rollback", io.rollback.valid)
282  )
283  generatePerfEvent()
284  // end
285}