xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueue.scala (revision 92b88f30156d46e844042eea94f7121557fd09a1)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.fu.fpu.FPU
26import xiangshan.backend.fu.FuConfig.LduCfg
27import xiangshan.backend.rob.RobLsqIO
28import xiangshan.cache._
29import xiangshan.frontend.FtqPtr
30import xiangshan.ExceptionNO._
31import xiangshan.cache.dcache.ReplayCarry
32import xiangshan.mem.mdp._
33import xiangshan.backend.Bundles.{DynInst, MemExuOutput, MemMicroOpRbExt}
34import xiangshan.backend.rob.RobPtr
35
36class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr](
37  p => p(XSCoreParamsKey).VirtualLoadQueueSize
38){
39}
40
41object LqPtr {
42  def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = {
43    val ptr = Wire(new LqPtr)
44    ptr.flag := f
45    ptr.value := v
46    ptr
47  }
48}
49
50trait HasLoadHelper { this: XSModule =>
51  def rdataHelper(uop: DynInst, rdata: UInt): UInt = {
52    val fpWen = uop.fpWen
53    LookupTree(uop.fuOpType, List(
54      LSUOpType.lb   -> SignExt(rdata(7, 0) , XLEN),
55      LSUOpType.lh   -> SignExt(rdata(15, 0), XLEN),
56      /*
57          riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values
58          Any operation that writes a narrower result to an f register must write
59          all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value.
60      */
61      LSUOpType.lw   -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)),
62      LSUOpType.ld   -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)),
63      LSUOpType.lbu  -> ZeroExt(rdata(7, 0) , XLEN),
64      LSUOpType.lhu  -> ZeroExt(rdata(15, 0), XLEN),
65      LSUOpType.lwu  -> ZeroExt(rdata(31, 0), XLEN),
66    ))
67  }
68}
69
70class LqEnqIO(implicit p: Parameters) extends XSBundle {
71  private val LsExuCnt = backendParams.StaCnt + backendParams.LduCnt
72  val canAccept = Output(Bool())
73  val sqCanAccept = Input(Bool())
74  val needAlloc = Vec(LsExuCnt, Input(Bool()))
75  val req = Vec(LsExuCnt, Flipped(ValidIO(new DynInst)))
76  val resp = Vec(LsExuCnt, Output(new LqPtr))
77}
78
79class LqTriggerIO(implicit p: Parameters) extends XSBundle {
80  val hitLoadAddrTriggerHitVec = Input(Vec(3, Bool()))
81  val lqLoadAddrTriggerHitVec = Output(Vec(3, Bool()))
82}
83
84class LqExceptionBuffer(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHelper {
85  val io = IO(new Bundle() {
86    val redirect = Flipped(Valid(new Redirect))
87    val req = Vec(LoadPipelineWidth, Flipped(Valid(new LqWriteBundle)))
88    val exceptionAddr = new ExceptionAddrIO
89  })
90
91  val req_valid = RegInit(false.B)
92  val req = Reg(new LqWriteBundle)
93
94  // enqueue
95  // s1:
96  val s1_req = VecInit(io.req.map(_.bits))
97  val s1_valid = VecInit(io.req.map(x => x.valid))
98
99  // s2: delay 1 cycle
100  val s2_req = RegNext(s1_req)
101  val s2_valid = (0 until LoadPipelineWidth).map(i =>
102    RegNext(s1_valid(i)) &&
103    !s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) &&
104    !s2_req(i).uop.robIdx.needFlush(io.redirect)
105  )
106  val s2_has_exception = s2_req.map(x => ExceptionNO.selectByFu(x.uop.exceptionVec, LduCfg).asUInt.orR)
107
108  val s2_enqueue = Wire(Vec(LoadPipelineWidth, Bool()))
109  for (w <- 0 until LoadPipelineWidth) {
110    s2_enqueue(w) := s2_valid(w) && s2_has_exception(w)
111  }
112
113  when (req.uop.robIdx.needFlush(io.redirect)) {
114    req_valid := false.B
115  } .elsewhen (s2_enqueue.asUInt.orR) {
116    req_valid := req_valid || true.B
117  }
118
119  def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
120    assert(valid.length == bits.length)
121    if (valid.length == 0 || valid.length == 1) {
122      (valid, bits)
123    } else if (valid.length == 2) {
124      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
125      for (i <- res.indices) {
126        res(i).valid := valid(i)
127        res(i).bits := bits(i)
128      }
129      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
130      (Seq(oldest.valid), Seq(oldest.bits))
131    } else {
132      val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
133      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
134      selectOldest(left._1 ++ right._1, left._2 ++ right._2)
135    }
136  }
137
138  val reqSel = selectOldest(s2_enqueue, s2_req)
139
140  when (req_valid) {
141    req := Mux(reqSel._1(0) && isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx), reqSel._2(0), req)
142  } .elsewhen (s2_enqueue.asUInt.orR) {
143    req := reqSel._2(0)
144  }
145
146  io.exceptionAddr.vaddr := req.vaddr
147  XSPerfAccumulate("exception", !RegNext(req_valid) && req_valid)
148
149  // end
150}
151
152class LoadQueue(implicit p: Parameters) extends XSModule
153  with HasDCacheParameters
154  with HasCircularQueuePtrHelper
155  with HasLoadHelper
156  with HasPerfEvents
157{
158  val io = IO(new Bundle() {
159    val redirect = Flipped(Valid(new Redirect))
160    val enq = new LqEnqIO
161    val ldu = new Bundle() {
162        val storeLoadViolationQuery = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) // from load_s2
163        val loadLoadViolationQuery = Vec(LoadPipelineWidth, Flipped(new LoadViolationQueryIO)) // from load_s2
164        val loadIn = Vec(StorePipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3
165    }
166    val sta = new Bundle() {
167      val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1
168    }
169    val std = new Bundle() {
170      val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput))) // from store_s0, store data, send to sq from rs
171    }
172    val sq = new Bundle() {
173      val stAddrReadySqPtr = Input(new SqPtr)
174      val stAddrReadyVec = Input(Vec(StoreQueueSize, Bool()))
175      val stDataReadySqPtr = Input(new SqPtr)
176      val stDataReadyVec = Input(Vec(StoreQueueSize, Bool()))
177      val stIssuePtr = Input(new SqPtr)
178      val sqEmpty = Input(Bool())
179    }
180    val loadOut = Vec(LoadPipelineWidth, DecoupledIO(new MemExuOutput))
181    val ldRawDataOut = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle))
182    val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle))
183    val refill = Flipped(ValidIO(new Refill))
184    val release = Flipped(Valid(new Release))
185    val rollback = Output(Valid(new Redirect))
186    val rob = Flipped(new RobLsqIO)
187    val uncache = new UncacheWordIO
188    val trigger = Vec(LoadPipelineWidth, new LqTriggerIO)
189    val exceptionAddr = new ExceptionAddrIO
190    val lqFull = Output(Bool())
191    val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W))
192    val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W))
193    val lqReplayFull = Output(Bool())
194    val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W)))
195  })
196
197  val loadQueueRAR = Module(new LoadQueueRAR)  //  read-after-read violation
198  val loadQueueRAW = Module(new LoadQueueRAW)  //  read-after-write violation
199  val loadQueueReplay = Module(new LoadQueueReplay)  //  enqueue if need replay
200  val virtualLoadQueue = Module(new VirtualLoadQueue)  //  control state
201  val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer
202  val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer
203
204  /**
205   * LoadQueueRAR
206   */
207  loadQueueRAR.io.redirect <> io.redirect
208  loadQueueRAR.io.release <> io.release
209  loadQueueRAR.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr
210  for (w <- 0 until LoadPipelineWidth) {
211    loadQueueRAR.io.query(w).req <> io.ldu.loadLoadViolationQuery(w).req // from load_s1
212    loadQueueRAR.io.query(w).resp <> io.ldu.loadLoadViolationQuery(w).resp // to load_s2
213    loadQueueRAR.io.query(w).preReq := io.ldu.loadLoadViolationQuery(w).preReq // from load_s1
214    loadQueueRAR.io.query(w).release := io.ldu.loadLoadViolationQuery(w).release // from load_s3
215  }
216
217  /**
218   * LoadQueueRAW
219   */
220  loadQueueRAW.io.redirect <> io.redirect
221  loadQueueRAW.io.storeIn <> io.sta.storeAddrIn
222  loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
223  loadQueueRAW.io.stIssuePtr <> io.sq.stIssuePtr
224  for (w <- 0 until LoadPipelineWidth) {
225    loadQueueRAW.io.query(w).req <> io.ldu.storeLoadViolationQuery(w).req // from load_s1
226    loadQueueRAW.io.query(w).resp <> io.ldu.storeLoadViolationQuery(w).resp // to load_s2
227    loadQueueRAW.io.query(w).preReq := io.ldu.storeLoadViolationQuery(w).preReq // from load_s1
228    loadQueueRAW.io.query(w).release := io.ldu.storeLoadViolationQuery(w).release // from load_s3
229  }
230
231  /**
232   * VirtualLoadQueue
233   */
234  virtualLoadQueue.io.redirect <> io.redirect
235  virtualLoadQueue.io.enq <> io.enq
236  virtualLoadQueue.io.loadIn <> io.ldu.loadIn // from load_s3
237  virtualLoadQueue.io.lqFull <> io.lqFull
238  virtualLoadQueue.io.lqDeq <> io.lqDeq
239  virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt
240
241  /**
242   * Load queue exception buffer
243   */
244  exceptionBuffer.io.redirect <> io.redirect
245  for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) {
246    buff.valid := io.ldu.loadIn(w).valid // from load_s3
247    buff.bits := io.ldu.loadIn(w).bits
248  }
249  io.exceptionAddr <> exceptionBuffer.io.exceptionAddr
250
251  /**
252   * Load uncache buffer
253   */
254  uncacheBuffer.io.redirect <> io.redirect
255  uncacheBuffer.io.loadOut <> io.loadOut
256  uncacheBuffer.io.loadRawDataOut <> io.ldRawDataOut
257  uncacheBuffer.io.rob <> io.rob
258  uncacheBuffer.io.uncache <> io.uncache
259  uncacheBuffer.io.trigger <> io.trigger
260  for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) {
261    buff.valid := io.ldu.loadIn(w).valid // from load_s3
262    buff.bits := io.ldu.loadIn(w).bits // from load_s3
263  }
264
265  // rollback
266  def selectOldest[T <: Redirect](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
267    assert(valid.length == bits.length)
268    if (valid.length == 0 || valid.length == 1) {
269      (valid, bits)
270    } else if (valid.length == 2) {
271      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
272      for (i <- res.indices) {
273        res(i).valid := valid(i)
274        res(i).bits := bits(i)
275      }
276      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).robIdx, bits(1).robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
277      (Seq(oldest.valid), Seq(oldest.bits))
278    } else {
279      val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
280      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
281      selectOldest(left._1 ++ right._1, left._2 ++ right._2)
282    }
283  }
284
285  val (rollbackSelV, rollbackSelBits) = selectOldest(
286                                          Seq(loadQueueRAW.io.rollback.valid, uncacheBuffer.io.rollback.valid),
287                                          Seq(loadQueueRAW.io.rollback.bits, uncacheBuffer.io.rollback.bits)
288                                        )
289  io.rollback.valid := rollbackSelV.head
290  io.rollback.bits := rollbackSelBits.head
291
292  /* <------- DANGEROUS: Don't change sequence here ! -------> */
293
294  /**
295   * LoadQueueReplay
296   */
297  loadQueueReplay.io.redirect <> io.redirect
298  loadQueueReplay.io.enq <> io.ldu.loadIn // from load_s3
299  loadQueueReplay.io.storeAddrIn <> io.sta.storeAddrIn // from store_s1
300  loadQueueReplay.io.storeDataIn <> io.std.storeDataIn // from store_s0
301  loadQueueReplay.io.replay <> io.replay
302  loadQueueReplay.io.refill <> io.refill
303  loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr
304  loadQueueReplay.io.stAddrReadyVec <> io.sq.stAddrReadyVec
305  loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr
306  loadQueueReplay.io.stDataReadyVec <> io.sq.stDataReadyVec
307  loadQueueReplay.io.sqEmpty <> io.sq.sqEmpty
308  loadQueueReplay.io.lqFull <> io.lqReplayFull
309  loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl
310  loadQueueReplay.io.ldWbPtr := virtualLoadQueue.io.ldWbPtr
311  loadQueueReplay.io.rarFull := loadQueueRAR.io.lqFull
312  loadQueueReplay.io.rawFull := loadQueueRAW.io.lqFull
313
314  val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull)
315  XSPerfAccumulate("full_mask_000", full_mask === 0.U)
316  XSPerfAccumulate("full_mask_001", full_mask === 1.U)
317  XSPerfAccumulate("full_mask_010", full_mask === 2.U)
318  XSPerfAccumulate("full_mask_011", full_mask === 3.U)
319  XSPerfAccumulate("full_mask_100", full_mask === 4.U)
320  XSPerfAccumulate("full_mask_101", full_mask === 5.U)
321  XSPerfAccumulate("full_mask_110", full_mask === 6.U)
322  XSPerfAccumulate("full_mask_111", full_mask === 7.U)
323  XSPerfAccumulate("rollback", io.rollback.valid)
324
325  // perf cnt
326  val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++
327  Seq(
328    ("full_mask_000", full_mask === 0.U),
329    ("full_mask_001", full_mask === 1.U),
330    ("full_mask_010", full_mask === 2.U),
331    ("full_mask_011", full_mask === 3.U),
332    ("full_mask_100", full_mask === 4.U),
333    ("full_mask_101", full_mask === 5.U),
334    ("full_mask_110", full_mask === 6.U),
335    ("full_mask_111", full_mask === 7.U),
336    ("rollback", io.rollback.valid)
337  )
338  generatePerfEvent()
339  // end
340}