xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueueRAW.scala (revision 627be78b11e6272c7c42f2b6b878598058ff15a9)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import chisel3._
20import chisel3.util._
21import org.chipsalliance.cde.config._
22import xiangshan._
23import xiangshan.backend.rob.RobPtr
24import xiangshan.cache._
25import xiangshan.frontend.FtqPtr
26import xiangshan.mem.mdp._
27import utils._
28import utility._
29import xiangshan.backend.Bundles.DynInst
30
31class LoadQueueRAW(implicit p: Parameters) extends XSModule
32  with HasDCacheParameters
33  with HasCircularQueuePtrHelper
34  with HasLoadHelper
35  with HasPerfEvents
36{
37  val io = IO(new Bundle() {
38    // control
39    val redirect = Flipped(ValidIO(new Redirect))
40    val vecFeedback = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO)))
41
42    // violation query
43    val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO))
44
45    // from store unit s1
46    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
47
48    // global rollback flush
49    val rollback = Output(Valid(new Redirect))
50
51    // to LoadQueueReplay
52    val stAddrReadySqPtr = Input(new SqPtr)
53    val stIssuePtr       = Input(new SqPtr)
54    val lqFull           = Output(Bool())
55  })
56
57  println("LoadQueueRAW: size " + LoadQueueRAWSize)
58  //  LoadQueueRAW field
59  //  +-------+--------+-------+-------+-----------+
60  //  | Valid |  uop   |PAddr  | Mask  | Datavalid |
61  //  +-------+--------+-------+-------+-----------+
62  //
63  //  Field descriptions:
64  //  Allocated   : entry has been allocated already
65  //  MicroOp     : inst's microOp
66  //  PAddr       : physical address.
67  //  Mask        : data mask
68  //  Datavalid   : data valid
69  //
70  val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value
71  val uop = Reg(Vec(LoadQueueRAWSize, new DynInst))
72  val paddrModule = Module(new LqPAddrModule(
73    gen = UInt(PAddrBits.W),
74    numEntries = LoadQueueRAWSize,
75    numRead = LoadPipelineWidth,
76    numWrite = LoadPipelineWidth,
77    numWBank = LoadQueueNWriteBanks,
78    numWDelay = 2,
79    numCamPort = StorePipelineWidth
80  ))
81  paddrModule.io := DontCare
82  val maskModule = Module(new LqMaskModule(
83    gen = UInt((VLEN/8).W),
84    numEntries = LoadQueueRAWSize,
85    numRead = LoadPipelineWidth,
86    numWrite = LoadPipelineWidth,
87    numWBank = LoadQueueNWriteBanks,
88    numWDelay = 2,
89    numCamPort = StorePipelineWidth
90  ))
91  maskModule.io := DontCare
92  val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B)))
93
94  // freeliset: store valid entries index.
95  // +---+---+--------------+-----+-----+
96  // | 0 | 1 |      ......  | n-2 | n-1 |
97  // +---+---+--------------+-----+-----+
98  val freeList = Module(new FreeList(
99    size = LoadQueueRAWSize,
100    allocWidth = LoadPipelineWidth,
101    freeWidth = 4,
102    enablePreAlloc = true,
103    moduleName = "LoadQueueRAW freelist"
104  ))
105  freeList.io := DontCare
106
107  //  LoadQueueRAW enqueue
108  val canEnqueue = io.query.map(_.req.valid)
109  val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
110  val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr
111  val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => {
112    Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B)
113  })
114  val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c }
115  val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W)))
116  val bypassMask = Reg(Vec(LoadPipelineWidth, UInt((VLEN/8).W)))
117
118  // Allocate logic
119  val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool()))
120  val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt()))
121
122  // Enqueue
123  for ((enq, w) <- io.query.map(_.req).zipWithIndex) {
124    acceptedVec(w) := false.B
125    paddrModule.io.wen(w) := false.B
126    maskModule.io.wen(w) := false.B
127    freeList.io.doAllocate(w) := false.B
128
129    freeList.io.allocateReq(w) := true.B
130
131    //  Allocate ready
132    val offset = PopCount(needEnqueue.take(w))
133    val canAccept = freeList.io.canAllocate(offset)
134    val enqIndex = freeList.io.allocateSlot(offset)
135    enq.ready := Mux(needEnqueue(w), canAccept, true.B)
136
137    enqIndexVec(w) := enqIndex
138    when (needEnqueue(w) && enq.ready) {
139      acceptedVec(w) := true.B
140
141      val debug_robIdx = enq.bits.uop.robIdx.asUInt
142      XSError(allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx")
143
144      freeList.io.doAllocate(w) := true.B
145
146      //  Allocate new entry
147      allocated(enqIndex) := true.B
148
149      //  Write paddr
150      paddrModule.io.wen(w) := true.B
151      paddrModule.io.waddr(w) := enqIndex
152      paddrModule.io.wdata(w) := enq.bits.paddr
153      bypassPAddr(w) := enq.bits.paddr
154
155      //  Write mask
156      maskModule.io.wen(w) := true.B
157      maskModule.io.waddr(w) := enqIndex
158      maskModule.io.wdata(w) := enq.bits.mask
159      bypassMask(w) := enq.bits.mask
160
161      //  Fill info
162      uop(enqIndex) := enq.bits.uop
163      datavalid(enqIndex) := enq.bits.data_valid
164    }
165  }
166
167  for ((query, w) <- io.query.map(_.resp).zipWithIndex) {
168    query.valid := RegNext(io.query(w).req.valid)
169    query.bits.rep_frm_fetch := RegNext(false.B)
170  }
171
172  //  LoadQueueRAW deallocate
173  val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool()))
174
175  // init
176  freeMaskVec.map(e => e := false.B)
177
178  // when the stores that "older than" current load address were ready.
179  // current load will be released.
180  val vecLdCanceltmp = Wire(Vec(LoadQueueRAWSize, Vec(VecLoadPipelineWidth, Bool())))
181  val vecLdCancel = Wire(Vec(LoadQueueRAWSize, Bool()))
182  for (i <- 0 until LoadQueueRAWSize) {
183    val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B)
184    val needCancel = uop(i).robIdx.needFlush(io.redirect)
185    val fbk = io.vecFeedback
186    for (j <- 0 until VecLoadPipelineWidth) {
187      vecLdCanceltmp(i)(j) := fbk(j).valid && fbk(j).bits.isFlush && uop(i).robIdx === fbk(j).bits.robidx && uop(i).uopIdx === fbk(j).bits.uopidx
188    }
189    vecLdCancel(i) := vecLdCanceltmp(i).reduce(_ || _)
190
191    when (allocated(i) && (deqNotBlock || needCancel || vecLdCancel(i))) {
192      allocated(i) := false.B
193      freeMaskVec(i) := true.B
194    }
195  }
196
197  // if need replay deallocate entry
198  val lastCanAccept = RegNext(acceptedVec)
199  val lastAllocIndex = RegNext(enqIndexVec)
200
201  for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) {
202    val revokeValid = revoke && lastCanAccept(w)
203    val revokeIndex = lastAllocIndex(w)
204
205    when (allocated(revokeIndex) && revokeValid) {
206      allocated(revokeIndex) := false.B
207      freeMaskVec(revokeIndex) := true.B
208    }
209  }
210  freeList.io.free := freeMaskVec.asUInt
211
212  io.lqFull := freeList.io.empty
213
214  /**
215    * Store-Load Memory violation detection
216    * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue).
217    * Scheme 2                : re-fetch instructions from the first instruction after the store instruction.
218    *
219    * When store writes back, it searches LoadQueue for younger load instructions
220    * with the same load physical address. They loaded wrong data and need re-execution.
221    *
222    * Cycle 0: Store Writeback
223    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
224    * Cycle 1: Select oldest load from select group.
225    * Cycle x: Redirect Fire
226    *   Choose the oldest load from LoadPipelineWidth oldest loads.
227    *   Prepare redirect request according to the detected violation.
228    *   Fire redirect request (if valid)
229    */
230  //              SelectGroup 0         SelectGroup 1          SelectGroup y
231  // stage 0:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
232  //                |   |   |             |   |   |              |   |   |
233  // stage 1:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
234  //                 \  |  /    ......     \  |  /    .......     \  |  /
235  // stage 2:           lq                    lq                     lq
236  //                     \  |  /  .......  \  |  /   ........  \  |  /
237  // stage 3:               lq                lq                  lq
238  //                                          ...
239  //                                          ...
240  //                                           |
241  // stage x:                                  lq
242  //                                           |
243  //                                       rollback req
244
245  // select logic
246  val SelectGroupSize = RollbackGroupSize
247  val lgSelectGroupSize = log2Ceil(SelectGroupSize)
248  val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1
249
250  def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
251    assert(valid.length == bits.length)
252    if (valid.length == 0 || valid.length == 1) {
253      (valid, bits)
254    } else if (valid.length == 2) {
255      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
256      for (i <- res.indices) {
257        res(i).valid := valid(i)
258        res(i).bits := bits(i)
259      }
260      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
261      (Seq(oldest.valid), Seq(oldest.bits))
262    } else {
263      val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
264      val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
265      selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2)
266    }
267  }
268
269  def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
270    assert(valid.length == bits.length)
271    val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt
272
273    // group info
274    val selectValidGroups =
275      if (valid.length <= SelectGroupSize) {
276        Seq(valid)
277      } else {
278        (0 until numSelectGroups).map(g => {
279          if (valid.length < (g + 1) * SelectGroupSize) {
280            valid.takeRight(valid.length - g * SelectGroupSize)
281          } else {
282            (0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j))
283          }
284        })
285      }
286    val selectBitsGroups =
287      if (bits.length <= SelectGroupSize) {
288        Seq(bits)
289      } else {
290        (0 until numSelectGroups).map(g => {
291          if (bits.length < (g + 1) * SelectGroupSize) {
292            bits.takeRight(bits.length - g * SelectGroupSize)
293          } else {
294            (0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j))
295          }
296        })
297      }
298
299    // select logic
300    if (valid.length <= SelectGroupSize) {
301      val (selValid, selBits) = selectPartialOldest(valid, bits)
302      val selValidNext = RegNext(selValid(0))
303      val selBitsNext = RegNext(selBits(0))
304      (Seq(selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect))), Seq(selBitsNext))
305    } else {
306      val select = (0 until numSelectGroups).map(g => {
307        val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g))
308        val selValidNext = RegNext(selValid(0))
309        val selBitsNext = RegNext(selBits(0))
310        (selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect)), selBitsNext)
311      })
312      selectOldest(select.map(_._1), select.map(_._2))
313    }
314  }
315
316  val storeIn = io.storeIn
317
318  def detectRollback(i: Int) = {
319    paddrModule.io.violationMdata(i) := storeIn(i).bits.paddr
320    maskModule.io.violationMdata(i) := storeIn(i).bits.mask
321
322    val bypassPaddrMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => bypassPAddr(j)(PAddrBits-1, DCacheVWordOffset) === storeIn(i).bits.paddr(PAddrBits-1, DCacheVWordOffset))))
323    val bypassMMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => (bypassMask(j) & storeIn(i).bits.mask).orR)))
324    val bypassMaskUInt = (0 until LoadPipelineWidth).map(j =>
325      Fill(LoadQueueRAWSize, RegNext(RegNext(io.query(j).req.fire))) & Mux(bypassPaddrMask(j) && bypassMMask(j), UIntToOH(RegNext(RegNext(enqIndexVec(j)))), 0.U(LoadQueueRAWSize.W))
326    ).reduce(_|_)
327
328    val addrMaskMatch = RegNext(paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt) | bypassMaskUInt
329    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueRAWSize).map(j => {
330      allocated(j) && isAfter(uop(j).robIdx, storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect)
331    })))
332    val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => {
333      addrMaskMatch(j) && entryNeedCheck(j)
334    }))
335
336    val lqViolationSelUopExts = uop.map(uop => {
337      val wrapper = Wire(new XSBundleWithMicroOp)
338      wrapper.uop := uop
339      wrapper
340    })
341
342    // select logic
343    val lqSelect = selectOldest(lqViolationSelVec, lqViolationSelUopExts)
344
345    // select one inst
346    val lqViolation = lqSelect._1(0)
347    val lqViolationUop = lqSelect._2(0).uop
348
349    XSDebug(
350      lqViolation,
351      "need rollback (ld wb before store) pc %x robidx %d target %x\n",
352      storeIn(i).bits.uop.pc, storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt
353    )
354
355    (lqViolation, lqViolationUop)
356  }
357
358  // select rollback (part1) and generate rollback request
359  // rollback check
360  // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow
361  val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new DynInst)))
362  val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr))
363  val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W)))
364  for (w <- 0 until StorePipelineWidth) {
365    val detectedRollback = detectRollback(w)
366    rollbackLqWb(w).valid := detectedRollback._1 && DelayN(storeIn(w).valid && !storeIn(w).bits.miss, TotalSelectCycles)
367    rollbackLqWb(w).bits  := detectedRollback._2
368    stFtqIdx(w) := DelayN(storeIn(w).bits.uop.ftqPtr, TotalSelectCycles)
369    stFtqOffset(w) := DelayN(storeIn(w).bits.uop.ftqOffset, TotalSelectCycles)
370  }
371
372  // select rollback (part2), generate rollback request, then fire rollback request
373  // Note that we use robIdx - 1.U to flush the load instruction itself.
374  // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect.
375
376  // select uop in parallel
377  def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
378    val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
379    val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
380      (if (j < i) !xs(j).valid || compareVec(i)(j)
381      else if (j == i) xs(i).valid
382      else !xs(j).valid || !compareVec(j)(i))
383    )).andR))
384    resultOnehot
385  }
386  val allRedirect = (0 until StorePipelineWidth).map(i => {
387    val redirect = Wire(Valid(new Redirect))
388    redirect.valid := rollbackLqWb(i).valid
389    redirect.bits             := DontCare
390    redirect.bits.isRVC       := rollbackLqWb(i).bits.preDecodeInfo.isRVC
391    redirect.bits.robIdx      := rollbackLqWb(i).bits.robIdx
392    redirect.bits.ftqIdx      := rollbackLqWb(i).bits.ftqPtr
393    redirect.bits.ftqOffset   := rollbackLqWb(i).bits.ftqOffset
394    redirect.bits.stFtqIdx    := stFtqIdx(i)
395    redirect.bits.stFtqOffset := stFtqOffset(i)
396    redirect.bits.level       := RedirectLevel.flush
397    redirect.bits.cfiUpdate.target := rollbackLqWb(i).bits.pc
398    redirect.bits.debug_runahead_checkpoint_id := rollbackLqWb(i).bits.debugInfo.runahead_checkpoint_id
399    redirect
400  })
401  val oldestOneHot = selectOldestRedirect(allRedirect)
402  val oldestRedirect = Mux1H(oldestOneHot, allRedirect)
403  io.rollback := oldestRedirect
404
405  // perf cnt
406  val canEnqCount = PopCount(io.query.map(_.req.fire))
407  val validCount = freeList.io.validCount
408  val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U
409
410  QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue)
411  XSPerfAccumulate("enqs", canEnqCount)
412  XSPerfAccumulate("stld_rollback", io.rollback.valid)
413  val perfEvents: Seq[(String, UInt)] = Seq(
414    ("enq ", canEnqCount),
415    ("stld_rollback", io.rollback.valid),
416  )
417  generatePerfEvent()
418  // end
419}