xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueueRAW.scala (revision 4b0d80d87574e82ba31737496d63ac30bed0d40a)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import chisel3._
20import chisel3.util._
21import org.chipsalliance.cde.config._
22import xiangshan._
23import xiangshan.backend.rob.RobPtr
24import xiangshan.cache._
25import xiangshan.frontend.FtqPtr
26import xiangshan.mem.mdp._
27import utils._
28import utility._
29import xiangshan.backend.Bundles.DynInst
30
31class LoadQueueRAW(implicit p: Parameters) extends XSModule
32  with HasDCacheParameters
33  with HasCircularQueuePtrHelper
34  with HasLoadHelper
35  with HasPerfEvents
36{
37  val io = IO(new Bundle() {
38    // control
39    val redirect = Flipped(ValidIO(new Redirect))
40
41    // violation query
42    val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO))
43
44    // from store unit s1
45    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
46
47    // global rollback flush
48    val rollback = Output(Valid(new Redirect))
49
50    // to LoadQueueReplay
51    val stAddrReadySqPtr = Input(new SqPtr)
52    val stIssuePtr       = Input(new SqPtr)
53    val lqFull           = Output(Bool())
54  })
55
56  println("LoadQueueRAW: size " + LoadQueueRAWSize)
57  //  LoadQueueRAW field
58  //  +-------+--------+-------+-------+-----------+
59  //  | Valid |  uop   |PAddr  | Mask  | Datavalid |
60  //  +-------+--------+-------+-------+-----------+
61  //
62  //  Field descriptions:
63  //  Allocated   : entry has been allocated already
64  //  MicroOp     : inst's microOp
65  //  PAddr       : physical address.
66  //  Mask        : data mask
67  //  Datavalid   : data valid
68  //
69  val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value
70  val uop = Reg(Vec(LoadQueueRAWSize, new DynInst))
71  val paddrModule = Module(new LqPAddrModule(
72    gen = UInt(PAddrBits.W),
73    numEntries = LoadQueueRAWSize,
74    numRead = LoadPipelineWidth,
75    numWrite = LoadPipelineWidth,
76    numWBank = LoadQueueNWriteBanks,
77    numWDelay = 2,
78    numCamPort = StorePipelineWidth
79  ))
80  paddrModule.io := DontCare
81  val maskModule = Module(new LqMaskModule(
82    gen = UInt((VLEN/8).W),
83    numEntries = LoadQueueRAWSize,
84    numRead = LoadPipelineWidth,
85    numWrite = LoadPipelineWidth,
86    numWBank = LoadQueueNWriteBanks,
87    numWDelay = 2,
88    numCamPort = StorePipelineWidth
89  ))
90  maskModule.io := DontCare
91  val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B)))
92
93  // freeliset: store valid entries index.
94  // +---+---+--------------+-----+-----+
95  // | 0 | 1 |      ......  | n-2 | n-1 |
96  // +---+---+--------------+-----+-----+
97  val freeList = Module(new FreeList(
98    size = LoadQueueRAWSize,
99    allocWidth = LoadPipelineWidth,
100    freeWidth = 4,
101    enablePreAlloc = true,
102    moduleName = "LoadQueueRAW freelist"
103  ))
104  freeList.io := DontCare
105
106  //  LoadQueueRAW enqueue
107  val canEnqueue = io.query.map(_.req.valid)
108  val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
109  val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr
110  val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => {
111    Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B)
112  })
113  val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c }
114  val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W)))
115  val bypassMask = Reg(Vec(LoadPipelineWidth, UInt((VLEN/8).W)))
116
117  // Allocate logic
118  val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool()))
119  val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt()))
120
121  // Enqueue
122  for ((enq, w) <- io.query.map(_.req).zipWithIndex) {
123    acceptedVec(w) := false.B
124    paddrModule.io.wen(w) := false.B
125    maskModule.io.wen(w) := false.B
126    freeList.io.doAllocate(w) := false.B
127
128    freeList.io.allocateReq(w) := true.B
129
130    //  Allocate ready
131    val offset = PopCount(needEnqueue.take(w))
132    val canAccept = freeList.io.canAllocate(offset)
133    val enqIndex = freeList.io.allocateSlot(offset)
134    enq.ready := Mux(needEnqueue(w), canAccept, true.B)
135
136    enqIndexVec(w) := enqIndex
137    when (needEnqueue(w) && enq.ready) {
138      acceptedVec(w) := true.B
139
140      val debug_robIdx = enq.bits.uop.robIdx.asUInt
141      XSError(allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx")
142
143      freeList.io.doAllocate(w) := true.B
144
145      //  Allocate new entry
146      allocated(enqIndex) := true.B
147
148      //  Write paddr
149      paddrModule.io.wen(w) := true.B
150      paddrModule.io.waddr(w) := enqIndex
151      paddrModule.io.wdata(w) := enq.bits.paddr
152      bypassPAddr(w) := enq.bits.paddr
153
154      //  Write mask
155      maskModule.io.wen(w) := true.B
156      maskModule.io.waddr(w) := enqIndex
157      maskModule.io.wdata(w) := enq.bits.mask
158      bypassMask(w) := enq.bits.mask
159
160      //  Fill info
161      uop(enqIndex) := enq.bits.uop
162      datavalid(enqIndex) := enq.bits.data_valid
163    }
164  }
165
166  for ((query, w) <- io.query.map(_.resp).zipWithIndex) {
167    query.valid := RegNext(io.query(w).req.valid)
168    query.bits.rep_frm_fetch := RegNext(false.B)
169  }
170
171  //  LoadQueueRAW deallocate
172  val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool()))
173
174  // init
175  freeMaskVec.map(e => e := false.B)
176
177  // when the stores that "older than" current load address were ready.
178  // current load will be released.
179  for (i <- 0 until LoadQueueRAWSize) {
180    val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B)
181    val needCancel = uop(i).robIdx.needFlush(io.redirect)
182
183    when (allocated(i) && (deqNotBlock || needCancel)) {
184      allocated(i) := false.B
185      freeMaskVec(i) := true.B
186    }
187  }
188
189  // if need replay deallocate entry
190  val lastCanAccept = RegNext(acceptedVec)
191  val lastAllocIndex = RegNext(enqIndexVec)
192
193  for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) {
194    val revokeValid = revoke && lastCanAccept(w)
195    val revokeIndex = lastAllocIndex(w)
196
197    when (allocated(revokeIndex) && revokeValid) {
198      allocated(revokeIndex) := false.B
199      freeMaskVec(revokeIndex) := true.B
200    }
201  }
202  freeList.io.free := freeMaskVec.asUInt
203
204  io.lqFull := freeList.io.empty
205
206  /**
207    * Store-Load Memory violation detection
208    * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue).
209    * Scheme 2                : re-fetch instructions from the first instruction after the store instruction.
210    *
211    * When store writes back, it searches LoadQueue for younger load instructions
212    * with the same load physical address. They loaded wrong data and need re-execution.
213    *
214    * Cycle 0: Store Writeback
215    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
216    * Cycle 1: Select oldest load from select group.
217    * Cycle x: Redirect Fire
218    *   Choose the oldest load from LoadPipelineWidth oldest loads.
219    *   Prepare redirect request according to the detected violation.
220    *   Fire redirect request (if valid)
221    */
222  //              SelectGroup 0         SelectGroup 1          SelectGroup y
223  // stage 0:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
224  //                |   |   |             |   |   |              |   |   |
225  // stage 1:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
226  //                 \  |  /    ......     \  |  /    .......     \  |  /
227  // stage 2:           lq                    lq                     lq
228  //                     \  |  /  .......  \  |  /   ........  \  |  /
229  // stage 3:               lq                lq                  lq
230  //                                          ...
231  //                                          ...
232  //                                           |
233  // stage x:                                  lq
234  //                                           |
235  //                                       rollback req
236
237  // select logic
238  val SelectGroupSize = RollbackGroupSize
239  val lgSelectGroupSize = log2Ceil(SelectGroupSize)
240  val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1
241
242  def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
243    assert(valid.length == bits.length)
244    if (valid.length == 0 || valid.length == 1) {
245      (valid, bits)
246    } else if (valid.length == 2) {
247      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
248      for (i <- res.indices) {
249        res(i).valid := valid(i)
250        res(i).bits := bits(i)
251      }
252      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
253      (Seq(oldest.valid), Seq(oldest.bits))
254    } else {
255      val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
256      val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
257      selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2)
258    }
259  }
260
261  def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
262    assert(valid.length == bits.length)
263    val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt
264
265    // group info
266    val selectValidGroups =
267      if (valid.length <= SelectGroupSize) {
268        Seq(valid)
269      } else {
270        (0 until numSelectGroups).map(g => {
271          if (valid.length < (g + 1) * SelectGroupSize) {
272            valid.takeRight(valid.length - g * SelectGroupSize)
273          } else {
274            (0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j))
275          }
276        })
277      }
278    val selectBitsGroups =
279      if (bits.length <= SelectGroupSize) {
280        Seq(bits)
281      } else {
282        (0 until numSelectGroups).map(g => {
283          if (bits.length < (g + 1) * SelectGroupSize) {
284            bits.takeRight(bits.length - g * SelectGroupSize)
285          } else {
286            (0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j))
287          }
288        })
289      }
290
291    // select logic
292    if (valid.length <= SelectGroupSize) {
293      val (selValid, selBits) = selectPartialOldest(valid, bits)
294      val selValidNext = RegNext(selValid(0))
295      val selBitsNext = RegNext(selBits(0))
296      (Seq(selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect))), Seq(selBitsNext))
297    } else {
298      val select = (0 until numSelectGroups).map(g => {
299        val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g))
300        val selValidNext = RegNext(selValid(0))
301        val selBitsNext = RegNext(selBits(0))
302        (selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect)), selBitsNext)
303      })
304      selectOldest(select.map(_._1), select.map(_._2))
305    }
306  }
307
308  def detectRollback(i: Int) = {
309    paddrModule.io.violationMdata(i) := io.storeIn(i).bits.paddr
310    maskModule.io.violationMdata(i) := io.storeIn(i).bits.mask
311
312    val bypassPaddrMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => bypassPAddr(j)(PAddrBits-1, DCacheVWordOffset) === io.storeIn(i).bits.paddr(PAddrBits-1, DCacheVWordOffset))))
313    val bypassMMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => (bypassMask(j) & io.storeIn(i).bits.mask).orR)))
314    val bypassMaskUInt = (0 until LoadPipelineWidth).map(j =>
315      Fill(LoadQueueRAWSize, RegNext(RegNext(io.query(j).req.fire))) & Mux(bypassPaddrMask(j) && bypassMMask(j), UIntToOH(RegNext(RegNext(enqIndexVec(j)))), 0.U(LoadQueueRAWSize.W))
316    ).reduce(_|_)
317
318    val addrMaskMatch = RegNext(paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt) | bypassMaskUInt
319    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueRAWSize).map(j => {
320      allocated(j) && isAfter(uop(j).robIdx, io.storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect)
321    })))
322    val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => {
323      addrMaskMatch(j) && entryNeedCheck(j)
324    }))
325
326    val lqViolationSelUopExts = uop.map(uop => {
327      val wrapper = Wire(new XSBundleWithMicroOp)
328      wrapper.uop := uop
329      wrapper
330    })
331
332    // select logic
333    val lqSelect = selectOldest(lqViolationSelVec, lqViolationSelUopExts)
334
335    // select one inst
336    val lqViolation = lqSelect._1(0)
337    val lqViolationUop = lqSelect._2(0).uop
338
339    XSDebug(
340      lqViolation,
341      "need rollback (ld wb before store) pc %x robidx %d target %x\n",
342      io.storeIn(i).bits.uop.pc, io.storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt
343    )
344
345    (lqViolation, lqViolationUop)
346  }
347
348  // select rollback (part1) and generate rollback request
349  // rollback check
350  // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow
351  val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new MicroOpRbExt)))
352  val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr))
353  val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W)))
354  for (w <- 0 until StorePipelineWidth) {
355    val detectedRollback = detectRollback(w)
356    rollbackLqWb(w).valid     := detectedRollback._1 && DelayN(io.storeIn(w).valid && !io.storeIn(w).bits.miss, TotalSelectCycles)
357    rollbackLqWb(w).bits.uop  := detectedRollback._2
358    rollbackLqWb(w).bits.flag := w.U
359    stFtqIdx(w) := DelayN(io.storeIn(w).bits.uop.ftqPtr, TotalSelectCycles)
360    stFtqOffset(w) := DelayN(io.storeIn(w).bits.uop.ftqOffset, TotalSelectCycles)
361  }
362
363  val rollbackLqWbValid = rollbackLqWb.map(x => x.valid && !x.bits.uop.robIdx.needFlush(io.redirect))
364  val rollbackLqWbBits = rollbackLqWb.map(x => x.bits)
365
366  // select rollback (part2), generate rollback request, then fire rollback request
367  // Note that we use robIdx - 1.U to flush the load instruction itself.
368  // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect.
369
370  // select uop in parallel
371  val lqs = selectPartialOldest(rollbackLqWbValid, rollbackLqWbBits)
372  val rollbackUopExt = lqs._2(0)
373  val rollbackUop = rollbackUopExt.uop
374  val rollbackStFtqIdx = stFtqIdx(rollbackUopExt.flag)
375  val rollbackStFtqOffset = stFtqOffset(rollbackUopExt.flag)
376
377  // check if rollback request is still valid in parallel
378  io.rollback.bits             := DontCare
379  io.rollback.bits.robIdx      := rollbackUop.robIdx
380  io.rollback.bits.ftqIdx      := rollbackUop.ftqPtr
381  io.rollback.bits.stFtqIdx    := rollbackStFtqIdx
382  io.rollback.bits.ftqOffset   := rollbackUop.ftqOffset
383  io.rollback.bits.stFtqOffset := rollbackStFtqOffset
384  io.rollback.bits.level       := RedirectLevel.flush
385  io.rollback.bits.interrupt   := DontCare
386  io.rollback.bits.cfiUpdate   := DontCare
387  io.rollback.bits.cfiUpdate.target := rollbackUop.pc
388  io.rollback.bits.debug_runahead_checkpoint_id := rollbackUop.debugInfo.runahead_checkpoint_id
389  // io.rollback.bits.pc := DontCare
390
391  io.rollback.valid := VecInit(rollbackLqWbValid).asUInt.orR
392
393  // perf cnt
394  val canEnqCount = PopCount(io.query.map(_.req.fire))
395  val validCount = freeList.io.validCount
396  val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U
397
398  QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue)
399  XSPerfAccumulate("enqs", canEnqCount)
400  XSPerfAccumulate("stld_rollback", io.rollback.valid)
401  val perfEvents: Seq[(String, UInt)] = Seq(
402    ("enq ", canEnqCount),
403    ("stld_rollback", io.rollback.valid),
404  )
405  generatePerfEvent()
406  // end
407}