xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala (revision 627be78b11e6272c7c42f2b6b878598058ff15a9)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend._
26import xiangshan.backend.fu.fpu._
27import xiangshan.backend.rob.RobLsqIO
28import xiangshan.cache._
29import xiangshan.cache.mmu._
30import xiangshan.frontend.FtqPtr
31import xiangshan.ExceptionNO._
32import xiangshan.mem.mdp._
33import xiangshan.backend.Bundles.{DynInst, MemExuOutput, MemMicroOpRbExt}
34import xiangshan.backend.rob.RobPtr
35
36class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr](
37  p => p(XSCoreParamsKey).VirtualLoadQueueSize
38){
39}
40
41object LqPtr {
42  def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = {
43    val ptr = Wire(new LqPtr)
44    ptr.flag := f
45    ptr.value := v
46    ptr
47  }
48}
49
50trait HasLoadHelper { this: XSModule =>
51  def rdataHelper(uop: DynInst, rdata: UInt): UInt = {
52    val fpWen = uop.fpWen
53    LookupTree(uop.fuOpType, List(
54      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
55      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
56      /*
57          riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values
58          Any operation that writes a narrower result to an f register must write
59          all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value.
60      */
61      LSUOpType.lw   -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)),
62      LSUOpType.ld   -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)),
63      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
64      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
65      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
66    ))
67  }
68
69  def rdataVecHelper(alignedType: UInt, rdata: UInt): UInt = {
70    LookupTree(alignedType, List(
71      "b00".U -> ZeroExt(rdata(7, 0), VLEN),
72      "b01".U -> ZeroExt(rdata(15, 0), VLEN),
73      "b10".U -> ZeroExt(rdata(31, 0), VLEN),
74      "b11".U -> ZeroExt(rdata(63, 0), VLEN)
75    ))
76  }
77}
78
79class LqEnqIO(implicit p: Parameters) extends MemBlockBundle {
80  val canAccept = Output(Bool())
81  val sqCanAccept = Input(Bool())
82  val needAlloc = Vec(LSQEnqWidth, Input(Bool()))
83  val req = Vec(LSQEnqWidth, Flipped(ValidIO(new DynInst)))
84  val resp = Vec(LSQEnqWidth, Output(new LqPtr))
85}
86
87class LqTriggerIO(implicit p: Parameters) extends XSBundle {
88  val hitLoadAddrTriggerHitVec = Input(Vec(TriggerNum, Bool()))
89  val lqLoadAddrTriggerHitVec = Output(Vec(TriggerNum, Bool()))
90}
91
92class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle {
93  val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
94  val robHeadTlbReplay = Output(Bool())
95  val robHeadTlbMiss = Output(Bool())
96  val robHeadLoadVio = Output(Bool())
97  val robHeadLoadMSHR = Output(Bool())
98  val robHeadMissInDTlb = Input(Bool())
99  val robHeadOtherReplay = Output(Bool())
100}
101
102class LoadQueue(implicit p: Parameters) extends XSModule
103  with HasDCacheParameters
104  with HasCircularQueuePtrHelper
105  with HasLoadHelper
106  with HasPerfEvents
107{
108  val io = IO(new Bundle() {
109    val redirect = Flipped(Valid(new Redirect))
110    val vecFeedback = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO)))
111    val enq = new LqEnqIO
112    val ldu = new Bundle() {
113        val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
114        val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
115        val ldin         = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
116    }
117    val sta = new Bundle() {
118      val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
119    }
120    val std = new Bundle() {
121      val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput(isVector = true)))) // from store_s0, store data, send to sq from rs
122    }
123    val sq = new Bundle() {
124      val stAddrReadySqPtr = Input(new SqPtr)
125      val stAddrReadyVec   = Input(Vec(StoreQueueSize, Bool()))
126      val stDataReadySqPtr = Input(new SqPtr)
127      val stDataReadyVec   = Input(Vec(StoreQueueSize, Bool()))
128      val stIssuePtr       = Input(new SqPtr)
129      val sqEmpty          = Input(Bool())
130    }
131    val ldout = Vec(LoadPipelineWidth, DecoupledIO(new MemExuOutput))
132    val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
133    val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
134    val refill = Flipped(ValidIO(new Refill))
135    val tl_d_channel  = Input(new DcacheToLduForwardIO)
136    val release = Flipped(Valid(new Release))
137    val nuke_rollback = Output(Valid(new Redirect))
138    val nack_rollback = Output(Valid(new Redirect))
139    val rob = Flipped(new RobLsqIO)
140    val uncache = new UncacheWordIO
141    val exceptionAddr = new ExceptionAddrIO
142    val lqFull = Output(Bool())
143    val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
144    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
145    val lq_rep_full = Output(Bool())
146    val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W)))
147    val l2_hint = Input(Valid(new L2ToL1Hint()))
148    val tlb_hint = Flipped(new TlbHintIO)
149    val lqEmpty = Output(Bool())
150
151    val lqDeqPtr = Output(new LqPtr)
152
153    val trigger = Vec(LoadPipelineWidth, new LqTriggerIO)
154
155    val debugTopDown = new LoadQueueTopDownIO
156  })
157
158  val loadQueueRAR = Module(new LoadQueueRAR)  //  read-after-read violation
159  val loadQueueRAW = Module(new LoadQueueRAW)  //  read-after-write violation
160  val loadQueueReplay = Module(new LoadQueueReplay)  //  enqueue if need replay
161  val virtualLoadQueue = Module(new VirtualLoadQueue)  //  control state
162  val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer
163  val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer
164
165  /**
166   * LoadQueueRAR
167   */
168  loadQueueRAR.io.redirect  <> io.redirect
169  loadQueueRAR.io.vecFeedback <> io.vecFeedback
170  loadQueueRAR.io.release   <> io.release
171  loadQueueRAR.io.ldWbPtr   <> virtualLoadQueue.io.ldWbPtr
172  for (w <- 0 until LoadPipelineWidth) {
173    loadQueueRAR.io.query(w).req    <> io.ldu.ldld_nuke_query(w).req // from load_s1
174    loadQueueRAR.io.query(w).resp   <> io.ldu.ldld_nuke_query(w).resp // to load_s2
175    loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3
176  }
177
178  /**
179   * LoadQueueRAW
180   */
181  loadQueueRAW.io.redirect         <> io.redirect
182  loadQueueRAW.io.vecFeedback      <> io.vecFeedback
183  loadQueueRAW.io.storeIn          <> io.sta.storeAddrIn
184  loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
185  loadQueueRAW.io.stIssuePtr       <> io.sq.stIssuePtr
186  for (w <- 0 until LoadPipelineWidth) {
187    loadQueueRAW.io.query(w).req    <> io.ldu.stld_nuke_query(w).req // from load_s1
188    loadQueueRAW.io.query(w).resp   <> io.ldu.stld_nuke_query(w).resp // to load_s2
189    loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3
190  }
191
192  /**
193   * VirtualLoadQueue
194   */
195  virtualLoadQueue.io.redirect      <> io.redirect
196  virtualLoadQueue.io.vecCommit     <> io.vecFeedback
197  virtualLoadQueue.io.enq           <> io.enq
198  virtualLoadQueue.io.ldin          <> io.ldu.ldin // from load_s3
199  virtualLoadQueue.io.lqFull        <> io.lqFull
200  virtualLoadQueue.io.lqDeq         <> io.lqDeq
201  virtualLoadQueue.io.lqCancelCnt   <> io.lqCancelCnt
202  virtualLoadQueue.io.lqEmpty       <> io.lqEmpty
203  virtualLoadQueue.io.ldWbPtr       <> io.lqDeqPtr
204
205  /**
206   * Load queue exception buffer
207   */
208  exceptionBuffer.io.redirect <> io.redirect
209  for (i <- 0 until LoadPipelineWidth) {
210    exceptionBuffer.io.req(i).valid := io.ldu.ldin(i).valid && !io.ldu.ldin(i).bits.isvec // from load_s3
211    exceptionBuffer.io.req(i).bits := io.ldu.ldin(i).bits
212  }
213  // vlsu exception!
214  for (i <- 0 until VecLoadPipelineWidth) {
215    exceptionBuffer.io.req(LoadPipelineWidth + i).valid               := io.vecFeedback(i).valid && io.vecFeedback(i).bits.feedback(VecFeedbacks.FLUSH) // have exception
216    exceptionBuffer.io.req(LoadPipelineWidth + i).bits                := DontCare
217    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.vaddr          := io.vecFeedback(i).bits.vaddr
218    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.uopIdx     := io.vecFeedback(i).bits.uopidx
219    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.robIdx     := io.vecFeedback(i).bits.robidx
220    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.vpu.vstart := io.vecFeedback(i).bits.vstart
221    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.vpu.vl     := io.vecFeedback(i).bits.vl
222  }
223
224  io.exceptionAddr <> exceptionBuffer.io.exceptionAddr
225
226  /**
227   * Load uncache buffer
228   */
229  uncacheBuffer.io.redirect   <> io.redirect
230  uncacheBuffer.io.ldout      <> io.ldout
231  uncacheBuffer.io.ld_raw_data  <> io.ld_raw_data
232  uncacheBuffer.io.rob        <> io.rob
233  uncacheBuffer.io.uncache    <> io.uncache
234  uncacheBuffer.io.trigger    <> io.trigger
235  for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) {
236    buff.valid := io.ldu.ldin(w).valid // from load_s3
237    buff.bits := io.ldu.ldin(w).bits // from load_s3
238  }
239
240
241  io.nuke_rollback := loadQueueRAW.io.rollback
242  io.nack_rollback := uncacheBuffer.io.rollback
243
244  /* <------- DANGEROUS: Don't change sequence here ! -------> */
245
246  /**
247   * LoadQueueReplay
248   */
249  loadQueueReplay.io.redirect         <> io.redirect
250  loadQueueReplay.io.enq              <> io.ldu.ldin // from load_s3
251  loadQueueReplay.io.storeAddrIn      <> io.sta.storeAddrIn // from store_s1
252  loadQueueReplay.io.storeDataIn      <> io.std.storeDataIn // from store_s0
253  loadQueueReplay.io.replay           <> io.replay
254  loadQueueReplay.io.refill           <> io.refill
255  loadQueueReplay.io.tl_d_channel     <> io.tl_d_channel
256  loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
257  loadQueueReplay.io.stAddrReadyVec   <> io.sq.stAddrReadyVec
258  loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr
259  loadQueueReplay.io.stDataReadyVec   <> io.sq.stDataReadyVec
260  loadQueueReplay.io.sqEmpty          <> io.sq.sqEmpty
261  loadQueueReplay.io.lqFull           <> io.lq_rep_full
262  loadQueueReplay.io.ldWbPtr          <> virtualLoadQueue.io.ldWbPtr
263  loadQueueReplay.io.rarFull          <> loadQueueRAR.io.lqFull
264  loadQueueReplay.io.rawFull          <> loadQueueRAW.io.lqFull
265  loadQueueReplay.io.l2_hint          <> io.l2_hint
266  loadQueueReplay.io.tlb_hint         <> io.tlb_hint
267  loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl
268  // TODO: implement it!
269  loadQueueReplay.io.vecFeedback := io.vecFeedback
270
271  loadQueueReplay.io.debugTopDown <> io.debugTopDown
272
273  val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull)
274  XSPerfAccumulate("full_mask_000", full_mask === 0.U)
275  XSPerfAccumulate("full_mask_001", full_mask === 1.U)
276  XSPerfAccumulate("full_mask_010", full_mask === 2.U)
277  XSPerfAccumulate("full_mask_011", full_mask === 3.U)
278  XSPerfAccumulate("full_mask_100", full_mask === 4.U)
279  XSPerfAccumulate("full_mask_101", full_mask === 5.U)
280  XSPerfAccumulate("full_mask_110", full_mask === 6.U)
281  XSPerfAccumulate("full_mask_111", full_mask === 7.U)
282  XSPerfAccumulate("nuke_rollback", io.nuke_rollback.valid)
283  XSPerfAccumulate("nack_rollabck", io.nack_rollback.valid)
284
285  // perf cnt
286  val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++
287  Seq(
288    ("full_mask_000", full_mask === 0.U),
289    ("full_mask_001", full_mask === 1.U),
290    ("full_mask_010", full_mask === 2.U),
291    ("full_mask_011", full_mask === 3.U),
292    ("full_mask_100", full_mask === 4.U),
293    ("full_mask_101", full_mask === 5.U),
294    ("full_mask_110", full_mask === 6.U),
295    ("full_mask_111", full_mask === 7.U),
296    ("nuke_rollback", io.nuke_rollback.valid),
297    ("nack_rollback", io.nack_rollback.valid)
298  )
299  generatePerfEvent()
300  // end
301}