xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala (revision bb76fc1b2ecc820357f700a1b7fab9af4780756f)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend._
26import xiangshan.backend.fu.fpu._
27import xiangshan.backend.rob.RobLsqIO
28import xiangshan.cache._
29import xiangshan.cache.mmu._
30import xiangshan.frontend.FtqPtr
31import xiangshan.ExceptionNO._
32import xiangshan.mem.mdp._
33import xiangshan.backend.Bundles.{DynInst, MemExuOutput, MemMicroOpRbExt}
34import xiangshan.backend.rob.RobPtr
35
36class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr](
37  p => p(XSCoreParamsKey).VirtualLoadQueueSize
38){
39}
40
41object LqPtr {
42  def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = {
43    val ptr = Wire(new LqPtr)
44    ptr.flag := f
45    ptr.value := v
46    ptr
47  }
48}
49
50trait HasLoadHelper { this: XSModule =>
51  def rdataHelper(uop: DynInst, rdata: UInt): UInt = {
52    val fpWen = uop.fpWen
53    LookupTree(uop.fuOpType, List(
54      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
55      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
56      /*
57          riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values
58          Any operation that writes a narrower result to an f register must write
59          all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value.
60      */
61      LSUOpType.lw   -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)),
62      LSUOpType.ld   -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)),
63      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
64      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
65      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
66
67      // hypervisor
68      LSUOpType.hlvb -> SignExt(rdata(7, 0), XLEN),
69      LSUOpType.hlvh -> SignExt(rdata(15, 0), XLEN),
70      LSUOpType.hlvw -> SignExt(rdata(31, 0), XLEN),
71      LSUOpType.hlvd -> SignExt(rdata(63, 0), XLEN),
72      LSUOpType.hlvbu -> ZeroExt(rdata(7, 0), XLEN),
73      LSUOpType.hlvhu -> ZeroExt(rdata(15, 0), XLEN),
74      LSUOpType.hlvwu -> ZeroExt(rdata(31, 0), XLEN),
75      LSUOpType.hlvxhu -> ZeroExt(rdata(15, 0), XLEN),
76      LSUOpType.hlvxwu -> ZeroExt(rdata(31, 0), XLEN),
77    ))
78  }
79
80  def genRdataOH(uop: DynInst): UInt = {
81    val fuOpType = uop.fuOpType
82    val fpWen    = uop.fpWen
83    val result = Cat(
84      (fuOpType === LSUOpType.lw && fpWen),
85      (fuOpType === LSUOpType.lh && fpWen),
86      (fuOpType === LSUOpType.lw && !fpWen) || (fuOpType === LSUOpType.hlvw),
87      (fuOpType === LSUOpType.lh && !fpWen) || (fuOpType === LSUOpType.hlvh),
88      (fuOpType === LSUOpType.lb)           || (fuOpType === LSUOpType.hlvb),
89      (fuOpType === LSUOpType.ld)           || (fuOpType === LSUOpType.hlvd),
90      (fuOpType === LSUOpType.lwu)          || (fuOpType === LSUOpType.hlvwu) || (fuOpType === LSUOpType.hlvxwu),
91      (fuOpType === LSUOpType.lhu)          || (fuOpType === LSUOpType.hlvhu) || (fuOpType === LSUOpType.hlvxhu),
92      (fuOpType === LSUOpType.lbu)          || (fuOpType === LSUOpType.hlvbu),
93    )
94    result
95  }
96
97  def newRdataHelper(select: UInt, rdata: UInt): UInt = {
98    XSError(PopCount(select) > 1.U, "data selector must be One-Hot!\n")
99    val selData = Seq(
100      ZeroExt(rdata(7, 0), XLEN),
101      ZeroExt(rdata(15, 0), XLEN),
102      ZeroExt(rdata(31, 0), XLEN),
103      rdata(63, 0),
104      SignExt(rdata(7, 0) , XLEN),
105      SignExt(rdata(15, 0) , XLEN),
106      SignExt(rdata(31, 0) , XLEN),
107      FPU.box(rdata, FPU.H),
108      FPU.box(rdata, FPU.S)
109    )
110    Mux1H(select, selData)
111  }
112
113  def genDataSelectByOffset(addrOffset: UInt): Vec[Bool] = {
114    require(addrOffset.getWidth == 3)
115    VecInit((0 until 8).map{ case i =>
116      addrOffset === i.U
117    })
118  }
119
120  def rdataVecHelper(alignedType: UInt, rdata: UInt): UInt = {
121    LookupTree(alignedType, List(
122      "b00".U -> ZeroExt(rdata(7, 0), VLEN),
123      "b01".U -> ZeroExt(rdata(15, 0), VLEN),
124      "b10".U -> ZeroExt(rdata(31, 0), VLEN),
125      "b11".U -> ZeroExt(rdata(63, 0), VLEN)
126    ))
127  }
128}
129
130class LqEnqIO(implicit p: Parameters) extends MemBlockBundle {
131  val canAccept = Output(Bool())
132  val sqCanAccept = Input(Bool())
133  val needAlloc = Vec(LSQEnqWidth, Input(Bool()))
134  val req = Vec(LSQEnqWidth, Flipped(ValidIO(new DynInst)))
135  val resp = Vec(LSQEnqWidth, Output(new LqPtr))
136}
137
138class LqTriggerIO(implicit p: Parameters) extends XSBundle {
139  val hitLoadAddrTriggerHitVec = Input(Vec(TriggerNum, Bool()))
140  val lqLoadAddrTriggerHitVec = Output(Vec(TriggerNum, Bool()))
141}
142
143class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle {
144  val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
145  val robHeadTlbReplay = Output(Bool())
146  val robHeadTlbMiss = Output(Bool())
147  val robHeadLoadVio = Output(Bool())
148  val robHeadLoadMSHR = Output(Bool())
149  val robHeadMissInDTlb = Input(Bool())
150  val robHeadOtherReplay = Output(Bool())
151}
152
153class LoadQueue(implicit p: Parameters) extends XSModule
154  with HasDCacheParameters
155  with HasCircularQueuePtrHelper
156  with HasLoadHelper
157  with HasPerfEvents
158{
159  val io = IO(new Bundle() {
160    val redirect = Flipped(Valid(new Redirect))
161    val vecFeedback = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO)))
162    val enq = new LqEnqIO
163    val ldu = new Bundle() {
164        val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
165        val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2
166        val ldin         = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
167    }
168    val sta = new Bundle() {
169      val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
170    }
171    val std = new Bundle() {
172      val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput(isVector = true)))) // from store_s0, store data, send to sq from rs
173    }
174    val sq = new Bundle() {
175      val stAddrReadySqPtr = Input(new SqPtr)
176      val stAddrReadyVec   = Input(Vec(StoreQueueSize, Bool()))
177      val stDataReadySqPtr = Input(new SqPtr)
178      val stDataReadyVec   = Input(Vec(StoreQueueSize, Bool()))
179      val stIssuePtr       = Input(new SqPtr)
180      val sqEmpty          = Input(Bool())
181    }
182    val ldout = Vec(LoadPipelineWidth, DecoupledIO(new MemExuOutput))
183    val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
184    val ncOut = Vec(LoadPipelineWidth, DecoupledIO(new LsPipelineBundle))
185    val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
186  //  val refill = Flipped(ValidIO(new Refill))
187    val tl_d_channel  = Input(new DcacheToLduForwardIO)
188    val release = Flipped(Valid(new Release))
189    val nuke_rollback = Vec(StorePipelineWidth, Output(Valid(new Redirect)))
190    val nack_rollback = Vec(2, Output(Valid(new Redirect))) // mmio, nc
191    val rob = Flipped(new RobLsqIO)
192    val uncache = new UncacheWordIO
193    val exceptionAddr = new ExceptionAddrIO
194    val flushFrmMaBuf = Input(Bool())
195    val lqFull = Output(Bool())
196    val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
197    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
198    val lq_rep_full = Output(Bool())
199    val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W)))
200    val l2_hint = Input(Valid(new L2ToL1Hint()))
201    val tlb_hint = Flipped(new TlbHintIO)
202    val lqEmpty = Output(Bool())
203
204    val lqDeqPtr = Output(new LqPtr)
205
206    val debugTopDown = new LoadQueueTopDownIO
207  })
208
209  val loadQueueRAR = Module(new LoadQueueRAR)  //  read-after-read violation
210  val loadQueueRAW = Module(new LoadQueueRAW)  //  read-after-write violation
211  val loadQueueReplay = Module(new LoadQueueReplay)  //  enqueue if need replay
212  val virtualLoadQueue = Module(new VirtualLoadQueue)  //  control state
213  val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer
214  val ioBuffer = Module(new IOBuffer) // uncache io buffer
215  val ncBuffer = Module(new NCBuffer) // uncache nc buffer
216  /**
217   * LoadQueueRAR
218   */
219  loadQueueRAR.io.redirect  <> io.redirect
220  loadQueueRAR.io.vecFeedback <> io.vecFeedback
221  loadQueueRAR.io.release   <> io.release
222  loadQueueRAR.io.ldWbPtr   <> virtualLoadQueue.io.ldWbPtr
223  for (w <- 0 until LoadPipelineWidth) {
224    loadQueueRAR.io.query(w).req    <> io.ldu.ldld_nuke_query(w).req // from load_s1
225    loadQueueRAR.io.query(w).resp   <> io.ldu.ldld_nuke_query(w).resp // to load_s2
226    loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3
227  }
228
229  /**
230   * LoadQueueRAW
231   */
232  loadQueueRAW.io.redirect         <> io.redirect
233  loadQueueRAW.io.vecFeedback      <> io.vecFeedback
234  loadQueueRAW.io.storeIn          <> io.sta.storeAddrIn
235  loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
236  loadQueueRAW.io.stIssuePtr       <> io.sq.stIssuePtr
237  for (w <- 0 until LoadPipelineWidth) {
238    loadQueueRAW.io.query(w).req    <> io.ldu.stld_nuke_query(w).req // from load_s1
239    loadQueueRAW.io.query(w).resp   <> io.ldu.stld_nuke_query(w).resp // to load_s2
240    loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3
241  }
242
243  /**
244   * VirtualLoadQueue
245   */
246  virtualLoadQueue.io.redirect      <> io.redirect
247  virtualLoadQueue.io.vecCommit     <> io.vecFeedback
248  virtualLoadQueue.io.enq           <> io.enq
249  virtualLoadQueue.io.ldin          <> io.ldu.ldin // from load_s3
250  virtualLoadQueue.io.lqFull        <> io.lqFull
251  virtualLoadQueue.io.lqDeq         <> io.lqDeq
252  virtualLoadQueue.io.lqCancelCnt   <> io.lqCancelCnt
253  virtualLoadQueue.io.lqEmpty       <> io.lqEmpty
254  virtualLoadQueue.io.ldWbPtr       <> io.lqDeqPtr
255
256  /**
257   * Load queue exception buffer
258   */
259  exceptionBuffer.io.redirect <> io.redirect
260  for (i <- 0 until LoadPipelineWidth) {
261    exceptionBuffer.io.req(i).valid := io.ldu.ldin(i).valid && !io.ldu.ldin(i).bits.isvec // from load_s3
262    exceptionBuffer.io.req(i).bits := io.ldu.ldin(i).bits
263  }
264  // vlsu exception!
265  for (i <- 0 until VecLoadPipelineWidth) {
266    exceptionBuffer.io.req(LoadPipelineWidth + i).valid                 := io.vecFeedback(i).valid && io.vecFeedback(i).bits.feedback(VecFeedbacks.FLUSH) // have exception
267    exceptionBuffer.io.req(LoadPipelineWidth + i).bits                  := DontCare
268    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.vaddr            := io.vecFeedback(i).bits.vaddr
269    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.fullva           := io.vecFeedback(i).bits.vaddr
270    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.vaNeedExt        := io.vecFeedback(i).bits.vaNeedExt
271    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.gpaddr           := io.vecFeedback(i).bits.gpaddr
272    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.uopIdx       := io.vecFeedback(i).bits.uopidx
273    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.robIdx       := io.vecFeedback(i).bits.robidx
274    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.vpu.vstart   := io.vecFeedback(i).bits.vstart
275    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.vpu.vl       := io.vecFeedback(i).bits.vl
276    exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.exceptionVec := io.vecFeedback(i).bits.exceptionVec
277  }
278  // mmio non-data error exception
279  exceptionBuffer.io.req(LoadPipelineWidth + VecLoadPipelineWidth) := ioBuffer.io.exception
280  exceptionBuffer.io.req(LoadPipelineWidth + VecLoadPipelineWidth).bits.vaNeedExt := true.B
281  exceptionBuffer.io.req.last := ncBuffer.io.exception
282  exceptionBuffer.io.req.last.bits.vaNeedExt := true.B
283  exceptionBuffer.io.flushFrmMaBuf := io.flushFrmMaBuf
284
285  io.exceptionAddr <> exceptionBuffer.io.exceptionAddr
286
287  /**
288   * Load uncache buffer
289   */
290  //mmio
291  ioBuffer.io.redirect <> io.redirect
292  ioBuffer.io.ldout <> io.ldout
293  ioBuffer.io.ld_raw_data <> io.ld_raw_data
294  ioBuffer.io.rob <> io.rob
295  for ((mmio, w) <- ioBuffer.io.req.zipWithIndex) {
296    mmio.valid := io.ldu.ldin(w).valid // from load_s3
297    mmio.bits := io.ldu.ldin(w).bits // from load_s3
298  }
299  ioBuffer.io.uncache.resp.valid := io.uncache.resp.valid && !io.uncache.resp.bits.nc
300  ioBuffer.io.uncache.resp.bits := io.uncache.resp.bits
301  //nc
302  ncBuffer.io.redirect <> io.redirect
303  ncBuffer.io.ncOut <> io.ncOut
304  for ((nc, w) <- ncBuffer.io.req.zipWithIndex) {
305    nc.valid := io.ldu.ldin(w).valid // from load_s3
306    nc.bits := io.ldu.ldin(w).bits // from load_s3
307  }
308  ncBuffer.io.uncache.resp.valid := io.uncache.resp.valid && io.uncache.resp.bits.nc
309  ncBuffer.io.uncache.resp.bits := io.uncache.resp.bits
310  //uncache arbiter
311  ioBuffer.io.uncache.req.ready := io.uncache.req.ready
312  ncBuffer.io.uncache.req.ready := io.uncache.req.ready && !ioBuffer.io.uncache.req.valid
313  when(ioBuffer.io.uncache.req.valid){
314    io.uncache.req.valid := ioBuffer.io.uncache.req.valid
315    io.uncache.req.bits := ioBuffer.io.uncache.req.bits
316  }.otherwise{
317    io.uncache.req.valid := ncBuffer.io.uncache.req.valid
318    io.uncache.req.bits := ncBuffer.io.uncache.req.bits
319  }
320  io.uncache.resp.ready := true.B
321  //TODO lyq: uncache resp ready arbiter? always true?
322  // when(io.uncache.resp.bits.nc){
323  //   io.uncache.resp.ready := ncBuffer.io.uncache.resp.ready
324  // }.otherwise{
325  //   io.uncache.resp.ready := ioBuffer.io.uncache.resp.ready
326  // }
327
328  io.nuke_rollback := loadQueueRAW.io.rollback
329  io.nack_rollback(0) := ioBuffer.io.rollback
330  io.nack_rollback(1) := ncBuffer.io.rollback
331
332  /* <------- DANGEROUS: Don't change sequence here ! -------> */
333
334  /**
335   * LoadQueueReplay
336   */
337  loadQueueReplay.io.redirect         <> io.redirect
338  loadQueueReplay.io.enq              <> io.ldu.ldin // from load_s3
339  loadQueueReplay.io.storeAddrIn      <> io.sta.storeAddrIn // from store_s1
340  loadQueueReplay.io.storeDataIn      <> io.std.storeDataIn // from store_s0
341  loadQueueReplay.io.replay           <> io.replay
342  //loadQueueReplay.io.refill           <> io.refill
343  loadQueueReplay.io.tl_d_channel     <> io.tl_d_channel
344  loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
345  loadQueueReplay.io.stAddrReadyVec   <> io.sq.stAddrReadyVec
346  loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr
347  loadQueueReplay.io.stDataReadyVec   <> io.sq.stDataReadyVec
348  loadQueueReplay.io.sqEmpty          <> io.sq.sqEmpty
349  loadQueueReplay.io.lqFull           <> io.lq_rep_full
350  loadQueueReplay.io.ldWbPtr          <> virtualLoadQueue.io.ldWbPtr
351  loadQueueReplay.io.rarFull          <> loadQueueRAR.io.lqFull
352  loadQueueReplay.io.rawFull          <> loadQueueRAW.io.lqFull
353  loadQueueReplay.io.l2_hint          <> io.l2_hint
354  loadQueueReplay.io.tlb_hint         <> io.tlb_hint
355  loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl
356  // TODO: implement it!
357  loadQueueReplay.io.vecFeedback := io.vecFeedback
358
359  loadQueueReplay.io.debugTopDown <> io.debugTopDown
360
361  val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull)
362  XSPerfAccumulate("full_mask_000", full_mask === 0.U)
363  XSPerfAccumulate("full_mask_001", full_mask === 1.U)
364  XSPerfAccumulate("full_mask_010", full_mask === 2.U)
365  XSPerfAccumulate("full_mask_011", full_mask === 3.U)
366  XSPerfAccumulate("full_mask_100", full_mask === 4.U)
367  XSPerfAccumulate("full_mask_101", full_mask === 5.U)
368  XSPerfAccumulate("full_mask_110", full_mask === 6.U)
369  XSPerfAccumulate("full_mask_111", full_mask === 7.U)
370  XSPerfAccumulate("nuke_rollback", io.nuke_rollback.map(_.valid).reduce(_ || _).asUInt)
371  XSPerfAccumulate("nack_rollabck", io.nack_rollback.map(_.valid).reduce(_ || _).asUInt)
372
373  // perf cnt
374  val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++
375  Seq(
376    ("full_mask_000", full_mask === 0.U),
377    ("full_mask_001", full_mask === 1.U),
378    ("full_mask_010", full_mask === 2.U),
379    ("full_mask_011", full_mask === 3.U),
380    ("full_mask_100", full_mask === 4.U),
381    ("full_mask_101", full_mask === 5.U),
382    ("full_mask_110", full_mask === 6.U),
383    ("full_mask_111", full_mask === 7.U),
384    ("nuke_rollback", io.nuke_rollback.map(_.valid).reduce(_ || _).asUInt),
385    ("nack_rollback", io.nack_rollback.map(_.valid).reduce(_ || _).asUInt)
386  )
387  generatePerfEvent()
388  // end
389}