xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala (revision f57f7f2aa52bf8c9d7952402ff7d36066bf8e1b3)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.fu.fpu.FPU
26import xiangshan.backend.rob.RobLsqIO
27import xiangshan.cache._
28import xiangshan.cache.mmu._
29import xiangshan.frontend.FtqPtr
30import xiangshan.ExceptionNO._
31import xiangshan.mem.mdp._
32import xiangshan.backend.rob.RobPtr
33
34class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr](
35  p => p(XSCoreParamsKey).VirtualLoadQueueSize
36){
37}
38
39object LqPtr {
40  def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = {
41    val ptr = Wire(new LqPtr)
42    ptr.flag := f
43    ptr.value := v
44    ptr
45  }
46}
47
48trait HasLoadHelper { this: XSModule =>
49  def rdataHelper(uop: MicroOp, rdata: UInt): UInt = {
50    val fpWen = uop.ctrl.fpWen
51    LookupTree(uop.ctrl.fuOpType, List(
52      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
53      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
54      /*
55          riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values
56          Any operation that writes a narrower result to an f register must write
57          all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value.
58      */
59      LSUOpType.lw   -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)),
60      LSUOpType.ld   -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)),
61      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
62      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
63      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
64    ))
65  }
66}
67
68class LqEnqIO(implicit p: Parameters) extends XSBundle {
69  val canAccept = Output(Bool())
70  val sqCanAccept = Input(Bool())
71  val needAlloc = Vec(exuParameters.LsExuCnt, Input(Bool()))
72  val req = Vec(exuParameters.LsExuCnt, Flipped(ValidIO(new MicroOp)))
73  val resp = Vec(exuParameters.LsExuCnt, Output(new LqPtr))
74}
75
76class LqTriggerIO(implicit p: Parameters) extends XSBundle {
77  val hitLoadAddrTriggerHitVec = Input(Vec(3, Bool()))
78  val lqLoadAddrTriggerHitVec = Output(Vec(3, Bool()))
79}
80
81class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle {
82  val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
83  val robHeadTlbReplay = Output(Bool())
84  val robHeadTlbMiss = Output(Bool())
85  val robHeadLoadVio = Output(Bool())
86  val robHeadLoadMSHR = Output(Bool())
87  val robHeadMissInDTlb = Input(Bool())
88  val robHeadOtherReplay = Output(Bool())
89}
90
91class LoadQueue(implicit p: Parameters) extends XSModule
92  with HasDCacheParameters
93  with HasCircularQueuePtrHelper
94  with HasLoadHelper
95  with HasPerfEvents
96{
97  val io = IO(new Bundle() {
98    val redirect = Flipped(Valid(new Redirect))
99    val enq = new LqEnqIO
100    val ldu = new Bundle() {
101        val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
102        val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
103        val ldin         = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
104    }
105    val sta = new Bundle() {
106      val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
107    }
108    val std = new Bundle() {
109      val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new ExuOutput))) // from store_s0, store data, send to sq from rs
110    }
111    val sq = new Bundle() {
112      val stAddrReadySqPtr = Input(new SqPtr)
113      val stAddrReadyVec   = Input(Vec(StoreQueueSize, Bool()))
114      val stDataReadySqPtr = Input(new SqPtr)
115      val stDataReadyVec   = Input(Vec(StoreQueueSize, Bool()))
116      val stIssuePtr       = Input(new SqPtr)
117      val sqEmpty          = Input(Bool())
118    }
119    val ldout = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput))
120    val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
121    val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
122  //  val refill = Flipped(ValidIO(new Refill))
123    val tl_d_channel  = Input(new DcacheToLduForwardIO)
124    val release = Flipped(Valid(new Release))
125    val nuke_rollback = Output(Valid(new Redirect))
126    val nack_rollback = Output(Valid(new Redirect))
127    val rob = Flipped(new RobLsqIO)
128    val uncache = new UncacheWordIO
129    val exceptionAddr = new ExceptionAddrIO
130    val lqFull = Output(Bool())
131    val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
132    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
133    val lq_rep_full = Output(Bool())
134    val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W)))
135    val l2_hint = Input(Valid(new L2ToL1Hint()))
136    val tlb_hint = Flipped(new TlbHintIO)
137    val lqEmpty = Output(Bool())
138    val debugTopDown = new LoadQueueTopDownIO
139  })
140
141  val loadQueueRAR = Module(new LoadQueueRAR)  //  read-after-read violation
142  val loadQueueRAW = Module(new LoadQueueRAW)  //  read-after-write violation
143  val loadQueueReplay = Module(new LoadQueueReplay)  //  enqueue if need replay
144  val virtualLoadQueue = Module(new VirtualLoadQueue)  //  control state
145  val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer
146  val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer
147  /**
148   * LoadQueueRAR
149   */
150  loadQueueRAR.io.redirect <> io.redirect
151  loadQueueRAR.io.release  <> io.release
152  loadQueueRAR.io.ldWbPtr  <> virtualLoadQueue.io.ldWbPtr
153  for (w <- 0 until LoadPipelineWidth) {
154    loadQueueRAR.io.query(w).req    <> io.ldu.ldld_nuke_query(w).req // from load_s1
155    loadQueueRAR.io.query(w).resp   <> io.ldu.ldld_nuke_query(w).resp // to load_s2
156    loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3
157  }
158
159  /**
160   * LoadQueueRAW
161   */
162  loadQueueRAW.io.redirect         <> io.redirect
163  loadQueueRAW.io.storeIn          <> io.sta.storeAddrIn
164  loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
165  loadQueueRAW.io.stIssuePtr       <> io.sq.stIssuePtr
166  for (w <- 0 until LoadPipelineWidth) {
167    loadQueueRAW.io.query(w).req    <> io.ldu.stld_nuke_query(w).req // from load_s1
168    loadQueueRAW.io.query(w).resp   <> io.ldu.stld_nuke_query(w).resp // to load_s2
169    loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3
170  }
171
172  /**
173   * VirtualLoadQueue
174   */
175  virtualLoadQueue.io.redirect    <> io.redirect
176  virtualLoadQueue.io.enq         <> io.enq
177  virtualLoadQueue.io.ldin        <> io.ldu.ldin // from load_s3
178  virtualLoadQueue.io.lqFull      <> io.lqFull
179  virtualLoadQueue.io.lqDeq       <> io.lqDeq
180  virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt
181  virtualLoadQueue.io.lqEmpty <> io.lqEmpty
182
183  /**
184   * Load queue exception buffer
185   */
186  exceptionBuffer.io.redirect <> io.redirect
187  for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) {
188    buff.valid := io.ldu.ldin(w).valid // from load_s3
189    buff.bits := io.ldu.ldin(w).bits
190  }
191  io.exceptionAddr <> exceptionBuffer.io.exceptionAddr
192
193  /**
194   * Load uncache buffer
195   */
196  uncacheBuffer.io.redirect   <> io.redirect
197  uncacheBuffer.io.ldout      <> io.ldout
198  uncacheBuffer.io.ld_raw_data  <> io.ld_raw_data
199  uncacheBuffer.io.rob        <> io.rob
200  uncacheBuffer.io.uncache    <> io.uncache
201  for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) {
202    buff.valid := io.ldu.ldin(w).valid // from load_s3
203    buff.bits := io.ldu.ldin(w).bits // from load_s3
204  }
205
206
207  io.nuke_rollback := loadQueueRAW.io.rollback
208  io.nack_rollback := uncacheBuffer.io.rollback
209
210  /* <------- DANGEROUS: Don't change sequence here ! -------> */
211
212  /**
213   * LoadQueueReplay
214   */
215  loadQueueReplay.io.redirect         <> io.redirect
216  loadQueueReplay.io.enq              <> io.ldu.ldin // from load_s3
217  loadQueueReplay.io.storeAddrIn      <> io.sta.storeAddrIn // from store_s1
218  loadQueueReplay.io.storeDataIn      <> io.std.storeDataIn // from store_s0
219  loadQueueReplay.io.replay           <> io.replay
220  //loadQueueReplay.io.refill           <> io.refill
221  loadQueueReplay.io.tl_d_channel     <> io.tl_d_channel
222  loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
223  loadQueueReplay.io.stAddrReadyVec   <> io.sq.stAddrReadyVec
224  loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr
225  loadQueueReplay.io.stDataReadyVec   <> io.sq.stDataReadyVec
226  loadQueueReplay.io.sqEmpty          <> io.sq.sqEmpty
227  loadQueueReplay.io.lqFull           <> io.lq_rep_full
228  loadQueueReplay.io.ldWbPtr          <> virtualLoadQueue.io.ldWbPtr
229  loadQueueReplay.io.rarFull          <> loadQueueRAR.io.lqFull
230  loadQueueReplay.io.rawFull          <> loadQueueRAW.io.lqFull
231  loadQueueReplay.io.l2_hint          <> io.l2_hint
232  loadQueueReplay.io.tlb_hint         <> io.tlb_hint
233  loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl
234
235  loadQueueReplay.io.debugTopDown <> io.debugTopDown
236
237  val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull)
238  XSPerfAccumulate("full_mask_000", full_mask === 0.U)
239  XSPerfAccumulate("full_mask_001", full_mask === 1.U)
240  XSPerfAccumulate("full_mask_010", full_mask === 2.U)
241  XSPerfAccumulate("full_mask_011", full_mask === 3.U)
242  XSPerfAccumulate("full_mask_100", full_mask === 4.U)
243  XSPerfAccumulate("full_mask_101", full_mask === 5.U)
244  XSPerfAccumulate("full_mask_110", full_mask === 6.U)
245  XSPerfAccumulate("full_mask_111", full_mask === 7.U)
246  XSPerfAccumulate("nuke_rollback", io.nuke_rollback.valid)
247  XSPerfAccumulate("nack_rollabck", io.nack_rollback.valid)
248
249  // perf cnt
250  val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++
251  Seq(
252    ("full_mask_000", full_mask === 0.U),
253    ("full_mask_001", full_mask === 1.U),
254    ("full_mask_010", full_mask === 2.U),
255    ("full_mask_011", full_mask === 3.U),
256    ("full_mask_100", full_mask === 4.U),
257    ("full_mask_101", full_mask === 5.U),
258    ("full_mask_110", full_mask === 6.U),
259    ("full_mask_111", full_mask === 7.U),
260    ("nuke_rollback", io.nuke_rollback.valid),
261    ("nack_rollback", io.nack_rollback.valid)
262  )
263  generatePerfEvent()
264  // end
265}