xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueueRAW.scala (revision 3c808de005aba6d7539d33be9962c44375b97e6d)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config._
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.frontend.FtqPtr
26import xiangshan.backend.rob.RobPtr
27import xiangshan.backend.Bundles.DynInst
28import xiangshan.mem.mdp._
29import xiangshan.mem.Bundles._
30import xiangshan.cache._
31
32class LoadQueueRAW(implicit p: Parameters) extends XSModule
33  with HasDCacheParameters
34  with HasCircularQueuePtrHelper
35  with HasLoadHelper
36  with HasPerfEvents
37{
38  val io = IO(new Bundle() {
39    // control
40    val redirect = Flipped(ValidIO(new Redirect))
41
42    // violation query
43    val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO))
44
45    // from store unit s1
46    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
47
48    // global rollback flush
49    val rollback = Vec(StorePipelineWidth,Output(Valid(new Redirect)))
50
51    // to LoadQueueReplay
52    val stAddrReadySqPtr = Input(new SqPtr)
53    val stIssuePtr       = Input(new SqPtr)
54    val lqFull           = Output(Bool())
55  })
56
57  private def PartialPAddrWidth: Int = 24
58  private def paddrOffset: Int = DCacheVWordOffset
59  private def genPartialPAddr(paddr: UInt) = {
60    paddr(DCacheVWordOffset + PartialPAddrWidth - 1, paddrOffset)
61  }
62
63  println("LoadQueueRAW: size " + LoadQueueRAWSize)
64  //  LoadQueueRAW field
65  //  +-------+--------+-------+-------+-----------+
66  //  | Valid |  uop   |PAddr  | Mask  | Datavalid |
67  //  +-------+--------+-------+-------+-----------+
68  //
69  //  Field descriptions:
70  //  Allocated   : entry has been allocated already
71  //  MicroOp     : inst's microOp
72  //  PAddr       : physical address.
73  //  Mask        : data mask
74  //  Datavalid   : data valid
75  //
76  val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value
77  val uop = Reg(Vec(LoadQueueRAWSize, new DynInst))
78  val paddrModule = Module(new LqPAddrModule(
79    gen = UInt(PartialPAddrWidth.W),
80    numEntries = LoadQueueRAWSize,
81    numRead = LoadPipelineWidth,
82    numWrite = LoadPipelineWidth,
83    numWBank = LoadQueueNWriteBanks,
84    numWDelay = 2,
85    numCamPort = StorePipelineWidth,
86    enableCacheLineCheck = true,
87    paddrOffset = paddrOffset
88  ))
89  paddrModule.io := DontCare
90  val maskModule = Module(new LqMaskModule(
91    gen = UInt((VLEN/8).W),
92    numEntries = LoadQueueRAWSize,
93    numRead = LoadPipelineWidth,
94    numWrite = LoadPipelineWidth,
95    numWBank = LoadQueueNWriteBanks,
96    numWDelay = 2,
97    numCamPort = StorePipelineWidth
98  ))
99  maskModule.io := DontCare
100  val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B)))
101
102  // freeliset: store valid entries index.
103  // +---+---+--------------+-----+-----+
104  // | 0 | 1 |      ......  | n-2 | n-1 |
105  // +---+---+--------------+-----+-----+
106  val freeList = Module(new FreeList(
107    size = LoadQueueRAWSize,
108    allocWidth = LoadPipelineWidth,
109    freeWidth = 4,
110    enablePreAlloc = true,
111    moduleName = "LoadQueueRAW freelist"
112  ))
113  freeList.io := DontCare
114
115  //  LoadQueueRAW enqueue
116  val canEnqueue = io.query.map(_.req.valid)
117  val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
118  val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr
119  val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => {
120    Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B)
121  })
122  val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c }
123
124  // Allocate logic
125  val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool()))
126  val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt(log2Up(LoadQueueRAWSize).W)))
127
128  // Enqueue
129  for ((enq, w) <- io.query.map(_.req).zipWithIndex) {
130    acceptedVec(w) := false.B
131    paddrModule.io.wen(w) := false.B
132    maskModule.io.wen(w) := false.B
133    freeList.io.doAllocate(w) := false.B
134
135    freeList.io.allocateReq(w) := true.B
136
137    //  Allocate ready
138    val offset = PopCount(needEnqueue.take(w))
139    val canAccept = freeList.io.canAllocate(offset)
140    val enqIndex = freeList.io.allocateSlot(offset)
141    enq.ready := Mux(needEnqueue(w), canAccept, true.B)
142
143    enqIndexVec(w) := enqIndex
144    when (needEnqueue(w) && enq.ready) {
145      acceptedVec(w) := true.B
146
147      freeList.io.doAllocate(w) := true.B
148
149      //  Allocate new entry
150      allocated(enqIndex) := true.B
151
152      //  Write paddr
153      paddrModule.io.wen(w) := true.B
154      paddrModule.io.waddr(w) := enqIndex
155      paddrModule.io.wdata(w) := genPartialPAddr(enq.bits.paddr)
156
157      //  Write mask
158      maskModule.io.wen(w) := true.B
159      maskModule.io.waddr(w) := enqIndex
160      maskModule.io.wdata(w) := enq.bits.mask
161
162      //  Fill info
163      uop(enqIndex) := enq.bits.uop
164      datavalid(enqIndex) := enq.bits.data_valid
165    }
166    val debug_robIdx = enq.bits.uop.robIdx.asUInt
167    XSError(needEnqueue(w) && enq.ready && allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx")
168  }
169
170  for ((query, w) <- io.query.map(_.resp).zipWithIndex) {
171    query.valid := RegNext(io.query(w).req.valid)
172    query.bits.rep_frm_fetch := RegNext(false.B)
173  }
174
175  //  LoadQueueRAW deallocate
176  val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool()))
177
178  // init
179  freeMaskVec.map(e => e := false.B)
180
181  // when the stores that "older than" current load address were ready.
182  // current load will be released.
183  for (i <- 0 until LoadQueueRAWSize) {
184    val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B)
185    val needCancel = uop(i).robIdx.needFlush(io.redirect)
186
187    when (allocated(i) && (deqNotBlock || needCancel)) {
188      allocated(i) := false.B
189      freeMaskVec(i) := true.B
190    }
191  }
192
193  // if need replay deallocate entry
194  val lastCanAccept = GatedValidRegNext(acceptedVec)
195  val lastAllocIndex = GatedRegNext(enqIndexVec)
196
197  for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) {
198    val revokeValid = revoke && lastCanAccept(w)
199    val revokeIndex = lastAllocIndex(w)
200
201    when (allocated(revokeIndex) && revokeValid) {
202      allocated(revokeIndex) := false.B
203      freeMaskVec(revokeIndex) := true.B
204    }
205  }
206  freeList.io.free := freeMaskVec.asUInt
207
208  io.lqFull := freeList.io.empty
209
210  /**
211    * Store-Load Memory violation detection
212    * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue).
213    * Scheme 2                : re-fetch instructions from the first instruction after the store instruction.
214    *
215    * When store writes back, it searches LoadQueue for younger load instructions
216    * with the same load physical address. They loaded wrong data and need re-execution.
217    *
218    * Cycle 0: Store Writeback
219    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
220    * Cycle 1: Select oldest load from select group.
221    * Cycle x: Redirect Fire
222    *   Choose the oldest load from LoadPipelineWidth oldest loads.
223    *   Prepare redirect request according to the detected violation.
224    *   Fire redirect request (if valid)
225    */
226  //              SelectGroup 0         SelectGroup 1          SelectGroup y
227  // stage 0:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
228  //                |   |   |             |   |   |              |   |   |
229  // stage 1:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
230  //                 \  |  /    ......     \  |  /    .......     \  |  /
231  // stage 2:           lq                    lq                     lq
232  //                     \  |  /  .......  \  |  /   ........  \  |  /
233  // stage 3:               lq                lq                  lq
234  //                                          ...
235  //                                          ...
236  //                                           |
237  // stage x:                                  lq
238  //                                           |
239  //                                       rollback req
240
241  // select logic
242  val SelectGroupSize = RollbackGroupSize
243  val lgSelectGroupSize = log2Ceil(SelectGroupSize)
244  val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1
245
246  def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
247    assert(valid.length == bits.length)
248    if (valid.length == 0 || valid.length == 1) {
249      (valid, bits)
250    } else if (valid.length == 2) {
251      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
252      for (i <- res.indices) {
253        res(i).valid := valid(i)
254        res(i).bits := bits(i)
255      }
256      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
257      (Seq(oldest.valid), Seq(oldest.bits))
258    } else {
259      val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
260      val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
261      selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2)
262    }
263  }
264
265  def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
266    assert(valid.length == bits.length)
267    val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt
268
269    // group info
270    val selectValidGroups = valid.grouped(SelectGroupSize).toList
271    val selectBitsGroups = bits.grouped(SelectGroupSize).toList
272    // select logic
273    if (valid.length <= SelectGroupSize) {
274      val (selValid, selBits) = selectPartialOldest(valid, bits)
275      val selValidNext = GatedValidRegNext(selValid(0))
276      val selBitsNext = RegEnable(selBits(0), selValid(0))
277      (Seq(selValidNext && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect))), Seq(selBitsNext))
278    } else {
279      val select = (0 until numSelectGroups).map(g => {
280        val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g))
281        val selValidNext = RegNext(selValid(0))
282        val selBitsNext = RegEnable(selBits(0), selValid(0))
283        (selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect)), selBitsNext)
284      })
285      selectOldest(select.map(_._1), select.map(_._2))
286    }
287  }
288
289  val storeIn = io.storeIn
290
291  def detectRollback(i: Int) = {
292    paddrModule.io.violationMdata(i) := genPartialPAddr(RegEnable(storeIn(i).bits.paddr, storeIn(i).valid))
293    paddrModule.io.violationCheckLine.get(i) := storeIn(i).bits.wlineflag
294    maskModule.io.violationMdata(i) := RegEnable(storeIn(i).bits.mask, storeIn(i).valid)
295
296    val addrMaskMatch = paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt
297    val entryNeedCheck = GatedValidRegNext(VecInit((0 until LoadQueueRAWSize).map(j => {
298      allocated(j) && storeIn(i).valid && isAfter(uop(j).robIdx, storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect)
299    })))
300    val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => {
301      addrMaskMatch(j) && entryNeedCheck(j)
302    }))
303
304    val lqViolationSelUopExts = uop.map(uop => {
305      val wrapper = Wire(new XSBundleWithMicroOp)
306      wrapper.uop := uop
307      wrapper
308    })
309
310    // select logic
311    val lqSelect: (Seq[Bool], Seq[XSBundleWithMicroOp]) = selectOldest(lqViolationSelVec, lqViolationSelUopExts)
312
313    // select one inst
314    val lqViolation = lqSelect._1(0)
315    val lqViolationUop = lqSelect._2(0).uop
316
317    XSDebug(
318      lqViolation,
319      "need rollback (ld wb before store) pc %x robidx %d target %x\n",
320      storeIn(i).bits.uop.pc, storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt
321    )
322
323    (lqViolation, lqViolationUop)
324  }
325
326  // select rollback (part1) and generate rollback request
327  // rollback check
328  // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow
329  val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new DynInst)))
330  val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr))
331  val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W)))
332  for (w <- 0 until StorePipelineWidth) {
333    val detectedRollback = detectRollback(w)
334    rollbackLqWb(w).valid := detectedRollback._1 && DelayN(storeIn(w).valid && !storeIn(w).bits.miss, TotalSelectCycles)
335    rollbackLqWb(w).bits  := detectedRollback._2
336    stFtqIdx(w) := DelayNWithValid(storeIn(w).bits.uop.ftqPtr, storeIn(w).valid, TotalSelectCycles)._2
337    stFtqOffset(w) := DelayNWithValid(storeIn(w).bits.uop.ftqOffset, storeIn(w).valid, TotalSelectCycles)._2
338  }
339
340  // select rollback (part2), generate rollback request, then fire rollback request
341  // Note that we use robIdx - 1.U to flush the load instruction itself.
342  // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect.
343
344  // select uop in parallel
345
346  val allRedirect = (0 until StorePipelineWidth).map(i => {
347    val redirect = Wire(Valid(new Redirect))
348    redirect.valid := rollbackLqWb(i).valid
349    redirect.bits             := DontCare
350    redirect.bits.isRVC       := rollbackLqWb(i).bits.preDecodeInfo.isRVC
351    redirect.bits.robIdx      := rollbackLqWb(i).bits.robIdx
352    redirect.bits.ftqIdx      := rollbackLqWb(i).bits.ftqPtr
353    redirect.bits.ftqOffset   := rollbackLqWb(i).bits.ftqOffset
354    redirect.bits.stFtqIdx    := stFtqIdx(i)
355    redirect.bits.stFtqOffset := stFtqOffset(i)
356    redirect.bits.level       := RedirectLevel.flush
357    redirect.bits.cfiUpdate.target := rollbackLqWb(i).bits.pc
358    redirect.bits.debug_runahead_checkpoint_id := rollbackLqWb(i).bits.debugInfo.runahead_checkpoint_id
359    redirect
360  })
361  io.rollback := allRedirect
362
363  // perf cnt
364  val canEnqCount = PopCount(io.query.map(_.req.fire))
365  val validCount = freeList.io.validCount
366  val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U
367  val rollbaclValid = io.rollback.map(_.valid).reduce(_ || _).asUInt
368
369  QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue)
370  XSPerfAccumulate("enqs", canEnqCount)
371  XSPerfAccumulate("stld_rollback", rollbaclValid)
372  val perfEvents: Seq[(String, UInt)] = Seq(
373    ("enq ", canEnqCount),
374    ("stld_rollback", rollbaclValid),
375  )
376  generatePerfEvent()
377  // end
378}
379