xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueueRAW.scala (revision cd2ff98b2a24aadafed81b4e4b16300bf3fd896d)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import chisel3._
20import chisel3.util._
21import org.chipsalliance.cde.config._
22import xiangshan._
23import xiangshan.backend.rob.RobPtr
24import xiangshan.cache._
25import xiangshan.frontend.FtqPtr
26import xiangshan.mem.mdp._
27import utils._
28import utility._
29
30class LoadQueueRAW(implicit p: Parameters) extends XSModule
31  with HasDCacheParameters
32  with HasCircularQueuePtrHelper
33  with HasLoadHelper
34  with HasPerfEvents
35{
36  val io = IO(new Bundle() {
37    // control
38    val redirect = Flipped(ValidIO(new Redirect))
39
40    // violation query
41    val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO))
42
43    // from store unit s1
44    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
45
46    // global rollback flush
47    val rollback = Output(Valid(new Redirect))
48
49    // to LoadQueueReplay
50    val stAddrReadySqPtr = Input(new SqPtr)
51    val stIssuePtr       = Input(new SqPtr)
52    val lqFull           = Output(Bool())
53  })
54
55  println("LoadQueueRAW: size " + LoadQueueRAWSize)
56  //  LoadQueueRAW field
57  //  +-------+--------+-------+-------+-----------+
58  //  | Valid |  uop   |PAddr  | Mask  | Datavalid |
59  //  +-------+--------+-------+-------+-----------+
60  //
61  //  Field descriptions:
62  //  Allocated   : entry has been allocated already
63  //  MicroOp     : inst's microOp
64  //  PAddr       : physical address.
65  //  Mask        : data mask
66  //  Datavalid   : data valid
67  //
68  val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value
69  val uop = Reg(Vec(LoadQueueRAWSize, new MicroOp))
70  val paddrModule = Module(new LqPAddrModule(
71    gen = UInt(PAddrBits.W),
72    numEntries = LoadQueueRAWSize,
73    numRead = LoadPipelineWidth,
74    numWrite = LoadPipelineWidth,
75    numWBank = LoadQueueNWriteBanks,
76    numWDelay = 2,
77    numCamPort = StorePipelineWidth
78  ))
79  paddrModule.io := DontCare
80  val maskModule = Module(new LqMaskModule(
81    gen = UInt((VLEN/8).W),
82    numEntries = LoadQueueRAWSize,
83    numRead = LoadPipelineWidth,
84    numWrite = LoadPipelineWidth,
85    numWBank = LoadQueueNWriteBanks,
86    numWDelay = 2,
87    numCamPort = StorePipelineWidth
88  ))
89  maskModule.io := DontCare
90  val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B)))
91
92  // freeliset: store valid entries index.
93  // +---+---+--------------+-----+-----+
94  // | 0 | 1 |      ......  | n-2 | n-1 |
95  // +---+---+--------------+-----+-----+
96  val freeList = Module(new FreeList(
97    size = LoadQueueRAWSize,
98    allocWidth = LoadPipelineWidth,
99    freeWidth = 4,
100    enablePreAlloc = true,
101    moduleName = "LoadQueueRAW freelist"
102  ))
103  freeList.io := DontCare
104
105  //  LoadQueueRAW enqueue
106  val canEnqueue = io.query.map(_.req.valid)
107  val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
108  val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr
109  val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => {
110    Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B)
111  })
112  val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c }
113  val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W)))
114  val bypassMask = Reg(Vec(LoadPipelineWidth, UInt((VLEN/8).W)))
115
116  // Allocate logic
117  val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool()))
118  val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt()))
119
120  // Enqueue
121  for ((enq, w) <- io.query.map(_.req).zipWithIndex) {
122    acceptedVec(w) := false.B
123    paddrModule.io.wen(w) := false.B
124    maskModule.io.wen(w) := false.B
125    freeList.io.doAllocate(w) := false.B
126
127    freeList.io.allocateReq(w) := true.B
128
129    //  Allocate ready
130    val offset = PopCount(needEnqueue.take(w))
131    val canAccept = freeList.io.canAllocate(offset)
132    val enqIndex = freeList.io.allocateSlot(offset)
133    enq.ready := Mux(needEnqueue(w), canAccept, true.B)
134
135    enqIndexVec(w) := enqIndex
136    when (needEnqueue(w) && enq.ready) {
137      acceptedVec(w) := true.B
138
139      val debug_robIdx = enq.bits.uop.robIdx.asUInt
140      XSError(allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx")
141
142      freeList.io.doAllocate(w) := true.B
143
144      //  Allocate new entry
145      allocated(enqIndex) := true.B
146
147      //  Write paddr
148      paddrModule.io.wen(w) := true.B
149      paddrModule.io.waddr(w) := enqIndex
150      paddrModule.io.wdata(w) := enq.bits.paddr
151      bypassPAddr(w) := enq.bits.paddr
152
153      //  Write mask
154      maskModule.io.wen(w) := true.B
155      maskModule.io.waddr(w) := enqIndex
156      maskModule.io.wdata(w) := enq.bits.mask
157      bypassMask(w) := enq.bits.mask
158
159      //  Fill info
160      uop(enqIndex) := enq.bits.uop
161      datavalid(enqIndex) := enq.bits.data_valid
162    }
163  }
164
165  for ((query, w) <- io.query.map(_.resp).zipWithIndex) {
166    query.valid := RegNext(io.query(w).req.valid)
167    query.bits.rep_frm_fetch := RegNext(false.B)
168  }
169
170  //  LoadQueueRAW deallocate
171  val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool()))
172
173  // init
174  freeMaskVec.map(e => e := false.B)
175
176  // when the stores that "older than" current load address were ready.
177  // current load will be released.
178  for (i <- 0 until LoadQueueRAWSize) {
179    val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B)
180    val needCancel = uop(i).robIdx.needFlush(io.redirect)
181
182    when (allocated(i) && (deqNotBlock || needCancel)) {
183      allocated(i) := false.B
184      freeMaskVec(i) := true.B
185    }
186  }
187
188  // if need replay deallocate entry
189  val lastCanAccept = RegNext(acceptedVec)
190  val lastAllocIndex = RegNext(enqIndexVec)
191
192  for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) {
193    val revokeValid = revoke && lastCanAccept(w)
194    val revokeIndex = lastAllocIndex(w)
195
196    when (allocated(revokeIndex) && revokeValid) {
197      allocated(revokeIndex) := false.B
198      freeMaskVec(revokeIndex) := true.B
199    }
200  }
201  freeList.io.free := freeMaskVec.asUInt
202
203  io.lqFull := freeList.io.empty
204
205  /**
206    * Store-Load Memory violation detection
207    * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue).
208    * Scheme 2                : re-fetch instructions from the first instruction after the store instruction.
209    *
210    * When store writes back, it searches LoadQueue for younger load instructions
211    * with the same load physical address. They loaded wrong data and need re-execution.
212    *
213    * Cycle 0: Store Writeback
214    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
215    * Cycle 1: Select oldest load from select group.
216    * Cycle x: Redirect Fire
217    *   Choose the oldest load from LoadPipelineWidth oldest loads.
218    *   Prepare redirect request according to the detected violation.
219    *   Fire redirect request (if valid)
220    */
221  //              SelectGroup 0         SelectGroup 1          SelectGroup y
222  // stage 0:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
223  //                |   |   |             |   |   |              |   |   |
224  // stage 1:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
225  //                 \  |  /    ......     \  |  /    .......     \  |  /
226  // stage 2:           lq                    lq                     lq
227  //                     \  |  /  .......  \  |  /   ........  \  |  /
228  // stage 3:               lq                lq                  lq
229  //                                          ...
230  //                                          ...
231  //                                           |
232  // stage x:                                  lq
233  //                                           |
234  //                                       rollback req
235
236  // select logic
237  val SelectGroupSize = RollbackGroupSize
238  val lgSelectGroupSize = log2Ceil(SelectGroupSize)
239  val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1
240
241  def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
242    assert(valid.length == bits.length)
243    if (valid.length == 0 || valid.length == 1) {
244      (valid, bits)
245    } else if (valid.length == 2) {
246      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
247      for (i <- res.indices) {
248        res(i).valid := valid(i)
249        res(i).bits := bits(i)
250      }
251      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
252      (Seq(oldest.valid), Seq(oldest.bits))
253    } else {
254      val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
255      val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
256      selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2)
257    }
258  }
259
260  def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
261    assert(valid.length == bits.length)
262    val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt
263
264    // group info
265    val selectValidGroups =
266      if (valid.length <= SelectGroupSize) {
267        Seq(valid)
268      } else {
269        (0 until numSelectGroups).map(g => {
270          if (valid.length < (g + 1) * SelectGroupSize) {
271            valid.takeRight(valid.length - g * SelectGroupSize)
272          } else {
273            (0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j))
274          }
275        })
276      }
277    val selectBitsGroups =
278      if (bits.length <= SelectGroupSize) {
279        Seq(bits)
280      } else {
281        (0 until numSelectGroups).map(g => {
282          if (bits.length < (g + 1) * SelectGroupSize) {
283            bits.takeRight(bits.length - g * SelectGroupSize)
284          } else {
285            (0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j))
286          }
287        })
288      }
289
290    // select logic
291    if (valid.length <= SelectGroupSize) {
292      val (selValid, selBits) = selectPartialOldest(valid, bits)
293      val selValidNext = RegNext(selValid(0))
294      val selBitsNext = RegNext(selBits(0))
295      (Seq(selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect))), Seq(selBitsNext))
296    } else {
297      val select = (0 until numSelectGroups).map(g => {
298        val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g))
299        val selValidNext = RegNext(selValid(0))
300        val selBitsNext = RegNext(selBits(0))
301        (selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect)), selBitsNext)
302      })
303      selectOldest(select.map(_._1), select.map(_._2))
304    }
305  }
306
307  def detectRollback(i: Int) = {
308    paddrModule.io.violationMdata(i) := io.storeIn(i).bits.paddr
309    maskModule.io.violationMdata(i) := io.storeIn(i).bits.mask
310
311    val bypassPaddrMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => bypassPAddr(j)(PAddrBits-1, DCacheVWordOffset) === io.storeIn(i).bits.paddr(PAddrBits-1, DCacheVWordOffset))))
312    val bypassMMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => (bypassMask(j) & io.storeIn(i).bits.mask).orR)))
313    val bypassMaskUInt = (0 until LoadPipelineWidth).map(j =>
314      Fill(LoadQueueRAWSize, RegNext(RegNext(io.query(j).req.fire))) & Mux(bypassPaddrMask(j) && bypassMMask(j), UIntToOH(RegNext(RegNext(enqIndexVec(j)))), 0.U(LoadQueueRAWSize.W))
315    ).reduce(_|_)
316
317    val addrMaskMatch = RegNext(paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt) | bypassMaskUInt
318    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueRAWSize).map(j => {
319      allocated(j) && isAfter(uop(j).robIdx, io.storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect)
320    })))
321    val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => {
322      addrMaskMatch(j) && entryNeedCheck(j)
323    }))
324
325    val lqViolationSelUopExts = uop.map(uop => {
326      val wrapper = Wire(new XSBundleWithMicroOp)
327      wrapper.uop := uop
328      wrapper
329    })
330
331    // select logic
332    val lqSelect = selectOldest(lqViolationSelVec, lqViolationSelUopExts)
333
334    // select one inst
335    val lqViolation = lqSelect._1(0)
336    val lqViolationUop = lqSelect._2(0).uop
337
338    XSDebug(
339      lqViolation,
340      "need rollback (ld wb before store) pc %x robidx %d target %x\n",
341      io.storeIn(i).bits.uop.cf.pc, io.storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt
342    )
343
344    (lqViolation, lqViolationUop)
345  }
346
347  // select rollback (part1) and generate rollback request
348  // rollback check
349  // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow
350  val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new MicroOp)))
351  val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr))
352  val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W)))
353  for (w <- 0 until StorePipelineWidth) {
354    val detectedRollback = detectRollback(w)
355    rollbackLqWb(w).valid := detectedRollback._1 && DelayN(io.storeIn(w).valid && !io.storeIn(w).bits.miss, TotalSelectCycles)
356    rollbackLqWb(w).bits  := detectedRollback._2
357    stFtqIdx(w) := DelayN(io.storeIn(w).bits.uop.cf.ftqPtr, TotalSelectCycles)
358    stFtqOffset(w) := DelayN(io.storeIn(w).bits.uop.cf.ftqOffset, TotalSelectCycles)
359  }
360
361  // select rollback (part2), generate rollback request, then fire rollback request
362  // Note that we use robIdx - 1.U to flush the load instruction itself.
363  // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect.
364
365  // select uop in parallel
366  def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
367    val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
368    val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
369      (if (j < i) !xs(j).valid || compareVec(i)(j)
370      else if (j == i) xs(i).valid
371      else !xs(j).valid || !compareVec(j)(i))
372    )).andR))
373    resultOnehot
374  }
375  val allRedirect = (0 until StorePipelineWidth).map(i => {
376    val redirect = Wire(Valid(new Redirect))
377    redirect.valid := rollbackLqWb(i).valid
378    redirect.bits             := DontCare
379    redirect.bits.isRVC       := rollbackLqWb(i).bits.cf.pd.isRVC
380    redirect.bits.robIdx      := rollbackLqWb(i).bits.robIdx
381    redirect.bits.ftqIdx      := rollbackLqWb(i).bits.cf.ftqPtr
382    redirect.bits.ftqOffset   := rollbackLqWb(i).bits.cf.ftqOffset
383    redirect.bits.stFtqIdx    := stFtqIdx(i)
384    redirect.bits.stFtqOffset := stFtqOffset(i)
385    redirect.bits.level       := RedirectLevel.flush
386    redirect.bits.cfiUpdate.target := rollbackLqWb(i).bits.cf.pc
387    redirect.bits.debug_runahead_checkpoint_id := rollbackLqWb(i).bits.debugInfo.runahead_checkpoint_id
388    redirect
389  })
390  val oldestOneHot = selectOldestRedirect(allRedirect)
391  val oldestRedirect = Mux1H(oldestOneHot, allRedirect)
392  io.rollback := oldestRedirect
393
394  // perf cnt
395  val canEnqCount = PopCount(io.query.map(_.req.fire))
396  val validCount = freeList.io.validCount
397  val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U
398
399  QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue)
400  XSPerfAccumulate("enqs", canEnqCount)
401  XSPerfAccumulate("stld_rollback", io.rollback.valid)
402  val perfEvents: Seq[(String, UInt)] = Seq(
403    ("enq ", canEnqCount),
404    ("stld_rollback", io.rollback.valid),
405  )
406  generatePerfEvent()
407  // end
408}