xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadQueueRAW.scala (revision 627be78b11e6272c7c42f2b6b878598058ff15a9)
1e4f69d78Ssfencevma/***************************************************************************************
2e4f69d78Ssfencevma* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3e4f69d78Ssfencevma* Copyright (c) 2020-2021 Peng Cheng Laboratory
4e4f69d78Ssfencevma*
5e4f69d78Ssfencevma* XiangShan is licensed under Mulan PSL v2.
6e4f69d78Ssfencevma* You can use this software according to the terms and conditions of the Mulan PSL v2.
7e4f69d78Ssfencevma* You may obtain a copy of Mulan PSL v2 at:
8e4f69d78Ssfencevma*          http://license.coscl.org.cn/MulanPSL2
9e4f69d78Ssfencevma*
10e4f69d78Ssfencevma* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11e4f69d78Ssfencevma* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12e4f69d78Ssfencevma* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13e4f69d78Ssfencevma*
14e4f69d78Ssfencevma* See the Mulan PSL v2 for more details.
15e4f69d78Ssfencevma***************************************************************************************/
16e4f69d78Ssfencevma
17e4f69d78Ssfencevmapackage xiangshan.mem
18e4f69d78Ssfencevma
19e4f69d78Ssfencevmaimport chisel3._
20e4f69d78Ssfencevmaimport chisel3.util._
218891a219SYinan Xuimport org.chipsalliance.cde.config._
22e4f69d78Ssfencevmaimport xiangshan._
23e4f69d78Ssfencevmaimport xiangshan.backend.rob.RobPtr
24e4f69d78Ssfencevmaimport xiangshan.cache._
25e4f69d78Ssfencevmaimport xiangshan.frontend.FtqPtr
26e4f69d78Ssfencevmaimport xiangshan.mem.mdp._
27e4f69d78Ssfencevmaimport utils._
28e4f69d78Ssfencevmaimport utility._
29dfb4c5dcSXuan Huimport xiangshan.backend.Bundles.DynInst
30e4f69d78Ssfencevma
31e4f69d78Ssfencevmaclass LoadQueueRAW(implicit p: Parameters) extends XSModule
32e4f69d78Ssfencevma  with HasDCacheParameters
33e4f69d78Ssfencevma  with HasCircularQueuePtrHelper
34e4f69d78Ssfencevma  with HasLoadHelper
35e4f69d78Ssfencevma  with HasPerfEvents
36e4f69d78Ssfencevma{
37e4f69d78Ssfencevma  val io = IO(new Bundle() {
3814a67055Ssfencevma    // control
39e4f69d78Ssfencevma    val redirect = Flipped(ValidIO(new Redirect))
40*627be78bSgood-circle    val vecFeedback = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO)))
4114a67055Ssfencevma
4214a67055Ssfencevma    // violation query
4314a67055Ssfencevma    val query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO))
4414a67055Ssfencevma
4514a67055Ssfencevma    // from store unit s1
46e4f69d78Ssfencevma    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
4714a67055Ssfencevma
4814a67055Ssfencevma    // global rollback flush
49e4f69d78Ssfencevma    val rollback = Output(Valid(new Redirect))
5014a67055Ssfencevma
5114a67055Ssfencevma    // to LoadQueueReplay
52e4f69d78Ssfencevma    val stAddrReadySqPtr = Input(new SqPtr)
53e4f69d78Ssfencevma    val stIssuePtr       = Input(new SqPtr)
54e4f69d78Ssfencevma    val lqFull           = Output(Bool())
55e4f69d78Ssfencevma  })
56e4f69d78Ssfencevma
57e4f69d78Ssfencevma  println("LoadQueueRAW: size " + LoadQueueRAWSize)
58e4f69d78Ssfencevma  //  LoadQueueRAW field
59e4f69d78Ssfencevma  //  +-------+--------+-------+-------+-----------+
60e4f69d78Ssfencevma  //  | Valid |  uop   |PAddr  | Mask  | Datavalid |
61e4f69d78Ssfencevma  //  +-------+--------+-------+-------+-----------+
62e4f69d78Ssfencevma  //
63e4f69d78Ssfencevma  //  Field descriptions:
64e4f69d78Ssfencevma  //  Allocated   : entry has been allocated already
65e4f69d78Ssfencevma  //  MicroOp     : inst's microOp
66e4f69d78Ssfencevma  //  PAddr       : physical address.
67e4f69d78Ssfencevma  //  Mask        : data mask
68e4f69d78Ssfencevma  //  Datavalid   : data valid
69e4f69d78Ssfencevma  //
70e4f69d78Ssfencevma  val allocated = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B))) // The control signals need to explicitly indicate the initial value
71dfb4c5dcSXuan Hu  val uop = Reg(Vec(LoadQueueRAWSize, new DynInst))
72e4f69d78Ssfencevma  val paddrModule = Module(new LqPAddrModule(
73e4f69d78Ssfencevma    gen = UInt(PAddrBits.W),
74e4f69d78Ssfencevma    numEntries = LoadQueueRAWSize,
75e4f69d78Ssfencevma    numRead = LoadPipelineWidth,
76e4f69d78Ssfencevma    numWrite = LoadPipelineWidth,
77e4f69d78Ssfencevma    numWBank = LoadQueueNWriteBanks,
78e4f69d78Ssfencevma    numWDelay = 2,
79e4f69d78Ssfencevma    numCamPort = StorePipelineWidth
80e4f69d78Ssfencevma  ))
81e4f69d78Ssfencevma  paddrModule.io := DontCare
82e4f69d78Ssfencevma  val maskModule = Module(new LqMaskModule(
83cdbff57cSHaoyuan Feng    gen = UInt((VLEN/8).W),
84e4f69d78Ssfencevma    numEntries = LoadQueueRAWSize,
85e4f69d78Ssfencevma    numRead = LoadPipelineWidth,
86e4f69d78Ssfencevma    numWrite = LoadPipelineWidth,
87e4f69d78Ssfencevma    numWBank = LoadQueueNWriteBanks,
88e4f69d78Ssfencevma    numWDelay = 2,
89e4f69d78Ssfencevma    numCamPort = StorePipelineWidth
90e4f69d78Ssfencevma  ))
91e4f69d78Ssfencevma  maskModule.io := DontCare
92e4f69d78Ssfencevma  val datavalid = RegInit(VecInit(List.fill(LoadQueueRAWSize)(false.B)))
93e4f69d78Ssfencevma
94e4f69d78Ssfencevma  // freeliset: store valid entries index.
95e4f69d78Ssfencevma  // +---+---+--------------+-----+-----+
96e4f69d78Ssfencevma  // | 0 | 1 |      ......  | n-2 | n-1 |
97e4f69d78Ssfencevma  // +---+---+--------------+-----+-----+
98e4f69d78Ssfencevma  val freeList = Module(new FreeList(
99e4f69d78Ssfencevma    size = LoadQueueRAWSize,
100e4f69d78Ssfencevma    allocWidth = LoadPipelineWidth,
101e4f69d78Ssfencevma    freeWidth = 4,
102f275998aSsfencevma    enablePreAlloc = true,
103e4f69d78Ssfencevma    moduleName = "LoadQueueRAW freelist"
104e4f69d78Ssfencevma  ))
105e4f69d78Ssfencevma  freeList.io := DontCare
106e4f69d78Ssfencevma
107e4f69d78Ssfencevma  //  LoadQueueRAW enqueue
108e4f69d78Ssfencevma  val canEnqueue = io.query.map(_.req.valid)
109e4f69d78Ssfencevma  val cancelEnqueue = io.query.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
110e4f69d78Ssfencevma  val allAddrCheck = io.stIssuePtr === io.stAddrReadySqPtr
111e4f69d78Ssfencevma  val hasAddrInvalidStore = io.query.map(_.req.bits.uop.sqIdx).map(sqIdx => {
112e4f69d78Ssfencevma    Mux(!allAddrCheck, isBefore(io.stAddrReadySqPtr, sqIdx), false.B)
113e4f69d78Ssfencevma  })
114e4f69d78Ssfencevma  val needEnqueue = canEnqueue.zip(hasAddrInvalidStore).zip(cancelEnqueue).map { case ((v, r), c) => v && r && !c }
115e4f69d78Ssfencevma  val bypassPAddr = Reg(Vec(LoadPipelineWidth, UInt(PAddrBits.W)))
116cdbff57cSHaoyuan Feng  val bypassMask = Reg(Vec(LoadPipelineWidth, UInt((VLEN/8).W)))
117e4f69d78Ssfencevma
118e4f69d78Ssfencevma  // Allocate logic
119f275998aSsfencevma  val acceptedVec = Wire(Vec(LoadPipelineWidth, Bool()))
120e4f69d78Ssfencevma  val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt()))
121e4f69d78Ssfencevma
122e4f69d78Ssfencevma  // Enqueue
123e4f69d78Ssfencevma  for ((enq, w) <- io.query.map(_.req).zipWithIndex) {
124f275998aSsfencevma    acceptedVec(w) := false.B
125e4f69d78Ssfencevma    paddrModule.io.wen(w) := false.B
126e4f69d78Ssfencevma    maskModule.io.wen(w) := false.B
127e4f69d78Ssfencevma    freeList.io.doAllocate(w) := false.B
128e4f69d78Ssfencevma
129f275998aSsfencevma    freeList.io.allocateReq(w) := true.B
130e4f69d78Ssfencevma
131e4f69d78Ssfencevma    //  Allocate ready
132f275998aSsfencevma    val offset = PopCount(needEnqueue.take(w))
133f275998aSsfencevma    val canAccept = freeList.io.canAllocate(offset)
134f275998aSsfencevma    val enqIndex = freeList.io.allocateSlot(offset)
135f275998aSsfencevma    enq.ready := Mux(needEnqueue(w), canAccept, true.B)
136e4f69d78Ssfencevma
137f275998aSsfencevma    enqIndexVec(w) := enqIndex
138e4f69d78Ssfencevma    when (needEnqueue(w) && enq.ready) {
139f275998aSsfencevma      acceptedVec(w) := true.B
140f275998aSsfencevma
141e4f69d78Ssfencevma      val debug_robIdx = enq.bits.uop.robIdx.asUInt
142e4f69d78Ssfencevma      XSError(allocated(enqIndex), p"LoadQueueRAW: You can not write an valid entry! check: ldu $w, robIdx $debug_robIdx")
143e4f69d78Ssfencevma
144e4f69d78Ssfencevma      freeList.io.doAllocate(w) := true.B
145e4f69d78Ssfencevma
146e4f69d78Ssfencevma      //  Allocate new entry
147e4f69d78Ssfencevma      allocated(enqIndex) := true.B
148e4f69d78Ssfencevma
149e4f69d78Ssfencevma      //  Write paddr
150e4f69d78Ssfencevma      paddrModule.io.wen(w) := true.B
151e4f69d78Ssfencevma      paddrModule.io.waddr(w) := enqIndex
152e4f69d78Ssfencevma      paddrModule.io.wdata(w) := enq.bits.paddr
153e4f69d78Ssfencevma      bypassPAddr(w) := enq.bits.paddr
154e4f69d78Ssfencevma
155e4f69d78Ssfencevma      //  Write mask
156e4f69d78Ssfencevma      maskModule.io.wen(w) := true.B
157e4f69d78Ssfencevma      maskModule.io.waddr(w) := enqIndex
158e4f69d78Ssfencevma      maskModule.io.wdata(w) := enq.bits.mask
159e4f69d78Ssfencevma      bypassMask(w) := enq.bits.mask
160e4f69d78Ssfencevma
161e4f69d78Ssfencevma      //  Fill info
162e4f69d78Ssfencevma      uop(enqIndex) := enq.bits.uop
16314a67055Ssfencevma      datavalid(enqIndex) := enq.bits.data_valid
164e4f69d78Ssfencevma    }
165e4f69d78Ssfencevma  }
166e4f69d78Ssfencevma
167e4f69d78Ssfencevma  for ((query, w) <- io.query.map(_.resp).zipWithIndex) {
168e4f69d78Ssfencevma    query.valid := RegNext(io.query(w).req.valid)
16914a67055Ssfencevma    query.bits.rep_frm_fetch := RegNext(false.B)
170e4f69d78Ssfencevma  }
171e4f69d78Ssfencevma
172e4f69d78Ssfencevma  //  LoadQueueRAW deallocate
173e4f69d78Ssfencevma  val freeMaskVec = Wire(Vec(LoadQueueRAWSize, Bool()))
174e4f69d78Ssfencevma
175e4f69d78Ssfencevma  // init
176e4f69d78Ssfencevma  freeMaskVec.map(e => e := false.B)
177e4f69d78Ssfencevma
178e4f69d78Ssfencevma  // when the stores that "older than" current load address were ready.
179e4f69d78Ssfencevma  // current load will be released.
180*627be78bSgood-circle  val vecLdCanceltmp = Wire(Vec(LoadQueueRAWSize, Vec(VecLoadPipelineWidth, Bool())))
18126af847eSgood-circle  val vecLdCancel = Wire(Vec(LoadQueueRAWSize, Bool()))
182e4f69d78Ssfencevma  for (i <- 0 until LoadQueueRAWSize) {
183e4f69d78Ssfencevma    val deqNotBlock = Mux(!allAddrCheck, !isBefore(io.stAddrReadySqPtr, uop(i).sqIdx), true.B)
184e4f69d78Ssfencevma    val needCancel = uop(i).robIdx.needFlush(io.redirect)
185*627be78bSgood-circle    val fbk = io.vecFeedback
186*627be78bSgood-circle    for (j <- 0 until VecLoadPipelineWidth) {
187*627be78bSgood-circle      vecLdCanceltmp(i)(j) := fbk(j).valid && fbk(j).bits.isFlush && uop(i).robIdx === fbk(j).bits.robidx && uop(i).uopIdx === fbk(j).bits.uopidx
188*627be78bSgood-circle    }
189*627be78bSgood-circle    vecLdCancel(i) := vecLdCanceltmp(i).reduce(_ || _)
190e4f69d78Ssfencevma
19126af847eSgood-circle    when (allocated(i) && (deqNotBlock || needCancel || vecLdCancel(i))) {
192e4f69d78Ssfencevma      allocated(i) := false.B
193e4f69d78Ssfencevma      freeMaskVec(i) := true.B
194e4f69d78Ssfencevma    }
195e4f69d78Ssfencevma  }
196e4f69d78Ssfencevma
197e4f69d78Ssfencevma  // if need replay deallocate entry
198f275998aSsfencevma  val lastCanAccept = RegNext(acceptedVec)
199e4f69d78Ssfencevma  val lastAllocIndex = RegNext(enqIndexVec)
200e4f69d78Ssfencevma
20114a67055Ssfencevma  for ((revoke, w) <- io.query.map(_.revoke).zipWithIndex) {
20214a67055Ssfencevma    val revokeValid = revoke && lastCanAccept(w)
20314a67055Ssfencevma    val revokeIndex = lastAllocIndex(w)
204e4f69d78Ssfencevma
20514a67055Ssfencevma    when (allocated(revokeIndex) && revokeValid) {
20614a67055Ssfencevma      allocated(revokeIndex) := false.B
20714a67055Ssfencevma      freeMaskVec(revokeIndex) := true.B
208e4f69d78Ssfencevma    }
209e4f69d78Ssfencevma  }
210e4f69d78Ssfencevma  freeList.io.free := freeMaskVec.asUInt
211e4f69d78Ssfencevma
212e4f69d78Ssfencevma  io.lqFull := freeList.io.empty
213e4f69d78Ssfencevma
214e4f69d78Ssfencevma  /**
215e4f69d78Ssfencevma    * Store-Load Memory violation detection
216e4f69d78Ssfencevma    * Scheme 1(Current scheme): flush the pipeline then re-fetch from the load instruction (like old load queue).
217e4f69d78Ssfencevma    * Scheme 2                : re-fetch instructions from the first instruction after the store instruction.
218e4f69d78Ssfencevma    *
219e4f69d78Ssfencevma    * When store writes back, it searches LoadQueue for younger load instructions
220e4f69d78Ssfencevma    * with the same load physical address. They loaded wrong data and need re-execution.
221e4f69d78Ssfencevma    *
222e4f69d78Ssfencevma    * Cycle 0: Store Writeback
223e4f69d78Ssfencevma    *   Generate match vector for store address with rangeMask(stPtr, enqPtr).
224e4f69d78Ssfencevma    * Cycle 1: Select oldest load from select group.
225e4f69d78Ssfencevma    * Cycle x: Redirect Fire
226e4f69d78Ssfencevma    *   Choose the oldest load from LoadPipelineWidth oldest loads.
227e4f69d78Ssfencevma    *   Prepare redirect request according to the detected violation.
228e4f69d78Ssfencevma    *   Fire redirect request (if valid)
229e4f69d78Ssfencevma    */
230e4f69d78Ssfencevma  //              SelectGroup 0         SelectGroup 1          SelectGroup y
231e4f69d78Ssfencevma  // stage 0:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
232e4f69d78Ssfencevma  //                |   |   |             |   |   |              |   |   |
233e4f69d78Ssfencevma  // stage 1:       lq  lq  lq  ......    lq  lq  lq  .......    lq  lq  lq
234e4f69d78Ssfencevma  //                 \  |  /    ......     \  |  /    .......     \  |  /
235e4f69d78Ssfencevma  // stage 2:           lq                    lq                     lq
236e4f69d78Ssfencevma  //                     \  |  /  .......  \  |  /   ........  \  |  /
237e4f69d78Ssfencevma  // stage 3:               lq                lq                  lq
238e4f69d78Ssfencevma  //                                          ...
239e4f69d78Ssfencevma  //                                          ...
240e4f69d78Ssfencevma  //                                           |
241e4f69d78Ssfencevma  // stage x:                                  lq
242e4f69d78Ssfencevma  //                                           |
243e4f69d78Ssfencevma  //                                       rollback req
244e4f69d78Ssfencevma
245e4f69d78Ssfencevma  // select logic
246e4f69d78Ssfencevma  val SelectGroupSize = RollbackGroupSize
247e4f69d78Ssfencevma  val lgSelectGroupSize = log2Ceil(SelectGroupSize)
248e4f69d78Ssfencevma  val TotalSelectCycles = scala.math.ceil(log2Ceil(LoadQueueRAWSize).toFloat / lgSelectGroupSize).toInt + 1
249e4f69d78Ssfencevma
250e4f69d78Ssfencevma  def selectPartialOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
251e4f69d78Ssfencevma    assert(valid.length == bits.length)
252e4f69d78Ssfencevma    if (valid.length == 0 || valid.length == 1) {
253e4f69d78Ssfencevma      (valid, bits)
254e4f69d78Ssfencevma    } else if (valid.length == 2) {
255e4f69d78Ssfencevma      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
256e4f69d78Ssfencevma      for (i <- res.indices) {
257e4f69d78Ssfencevma        res(i).valid := valid(i)
258e4f69d78Ssfencevma        res(i).bits := bits(i)
259e4f69d78Ssfencevma      }
260e4f69d78Ssfencevma      val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1)))
261e4f69d78Ssfencevma      (Seq(oldest.valid), Seq(oldest.bits))
262e4f69d78Ssfencevma    } else {
263e4f69d78Ssfencevma      val left = selectPartialOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
264e4f69d78Ssfencevma      val right = selectPartialOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
265e4f69d78Ssfencevma      selectPartialOldest(left._1 ++ right._1, left._2 ++ right._2)
266e4f69d78Ssfencevma    }
267e4f69d78Ssfencevma  }
268e4f69d78Ssfencevma
269e4f69d78Ssfencevma  def selectOldest[T <: XSBundleWithMicroOp](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
270e4f69d78Ssfencevma    assert(valid.length == bits.length)
271e4f69d78Ssfencevma    val numSelectGroups = scala.math.ceil(valid.length.toFloat / SelectGroupSize).toInt
272e4f69d78Ssfencevma
273e4f69d78Ssfencevma    // group info
274e4f69d78Ssfencevma    val selectValidGroups =
275e4f69d78Ssfencevma      if (valid.length <= SelectGroupSize) {
276e4f69d78Ssfencevma        Seq(valid)
277e4f69d78Ssfencevma      } else {
278e4f69d78Ssfencevma        (0 until numSelectGroups).map(g => {
279e4f69d78Ssfencevma          if (valid.length < (g + 1) * SelectGroupSize) {
280e4f69d78Ssfencevma            valid.takeRight(valid.length - g * SelectGroupSize)
281e4f69d78Ssfencevma          } else {
282e4f69d78Ssfencevma            (0 until SelectGroupSize).map(j => valid(g * SelectGroupSize + j))
283e4f69d78Ssfencevma          }
284e4f69d78Ssfencevma        })
285e4f69d78Ssfencevma      }
286e4f69d78Ssfencevma    val selectBitsGroups =
287e4f69d78Ssfencevma      if (bits.length <= SelectGroupSize) {
288e4f69d78Ssfencevma        Seq(bits)
289e4f69d78Ssfencevma      } else {
290e4f69d78Ssfencevma        (0 until numSelectGroups).map(g => {
291e4f69d78Ssfencevma          if (bits.length < (g + 1) * SelectGroupSize) {
292e4f69d78Ssfencevma            bits.takeRight(bits.length - g * SelectGroupSize)
293e4f69d78Ssfencevma          } else {
294e4f69d78Ssfencevma            (0 until SelectGroupSize).map(j => bits(g * SelectGroupSize + j))
295e4f69d78Ssfencevma          }
296e4f69d78Ssfencevma        })
297e4f69d78Ssfencevma      }
298e4f69d78Ssfencevma
299e4f69d78Ssfencevma    // select logic
300e4f69d78Ssfencevma    if (valid.length <= SelectGroupSize) {
301e4f69d78Ssfencevma      val (selValid, selBits) = selectPartialOldest(valid, bits)
302f275998aSsfencevma      val selValidNext = RegNext(selValid(0))
303f275998aSsfencevma      val selBitsNext = RegNext(selBits(0))
304f275998aSsfencevma      (Seq(selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect))), Seq(selBitsNext))
305e4f69d78Ssfencevma    } else {
306e4f69d78Ssfencevma      val select = (0 until numSelectGroups).map(g => {
307e4f69d78Ssfencevma        val (selValid, selBits) = selectPartialOldest(selectValidGroups(g), selectBitsGroups(g))
308f275998aSsfencevma        val selValidNext = RegNext(selValid(0))
309f275998aSsfencevma        val selBitsNext = RegNext(selBits(0))
310f275998aSsfencevma        (selValidNext && !selBitsNext.uop.robIdx.needFlush(io.redirect) && !selBitsNext.uop.robIdx.needFlush(RegNext(io.redirect)), selBitsNext)
311e4f69d78Ssfencevma      })
312e4f69d78Ssfencevma      selectOldest(select.map(_._1), select.map(_._2))
313e4f69d78Ssfencevma    }
314e4f69d78Ssfencevma  }
315e4f69d78Ssfencevma
31626af847eSgood-circle  val storeIn = io.storeIn
317e4f69d78Ssfencevma
31820a5248fSzhanglinjuan  def detectRollback(i: Int) = {
31920a5248fSzhanglinjuan    paddrModule.io.violationMdata(i) := storeIn(i).bits.paddr
32020a5248fSzhanglinjuan    maskModule.io.violationMdata(i) := storeIn(i).bits.mask
32120a5248fSzhanglinjuan
32220a5248fSzhanglinjuan    val bypassPaddrMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => bypassPAddr(j)(PAddrBits-1, DCacheVWordOffset) === storeIn(i).bits.paddr(PAddrBits-1, DCacheVWordOffset))))
32320a5248fSzhanglinjuan    val bypassMMask = RegNext(VecInit((0 until LoadPipelineWidth).map(j => (bypassMask(j) & storeIn(i).bits.mask).orR)))
324e4f69d78Ssfencevma    val bypassMaskUInt = (0 until LoadPipelineWidth).map(j =>
325935edac4STang Haojin      Fill(LoadQueueRAWSize, RegNext(RegNext(io.query(j).req.fire))) & Mux(bypassPaddrMask(j) && bypassMMask(j), UIntToOH(RegNext(RegNext(enqIndexVec(j)))), 0.U(LoadQueueRAWSize.W))
326e4f69d78Ssfencevma    ).reduce(_|_)
327e4f69d78Ssfencevma
328e4f69d78Ssfencevma    val addrMaskMatch = RegNext(paddrModule.io.violationMmask(i).asUInt & maskModule.io.violationMmask(i).asUInt) | bypassMaskUInt
329e4f69d78Ssfencevma    val entryNeedCheck = RegNext(VecInit((0 until LoadQueueRAWSize).map(j => {
33020a5248fSzhanglinjuan      allocated(j) && isAfter(uop(j).robIdx, storeIn(i).bits.uop.robIdx) && datavalid(j) && !uop(j).robIdx.needFlush(io.redirect)
331e4f69d78Ssfencevma    })))
332e4f69d78Ssfencevma    val lqViolationSelVec = VecInit((0 until LoadQueueRAWSize).map(j => {
333e4f69d78Ssfencevma      addrMaskMatch(j) && entryNeedCheck(j)
334e4f69d78Ssfencevma    }))
335e4f69d78Ssfencevma
336e4f69d78Ssfencevma    val lqViolationSelUopExts = uop.map(uop => {
337e4f69d78Ssfencevma      val wrapper = Wire(new XSBundleWithMicroOp)
338e4f69d78Ssfencevma      wrapper.uop := uop
339e4f69d78Ssfencevma      wrapper
340e4f69d78Ssfencevma    })
341e4f69d78Ssfencevma
342e4f69d78Ssfencevma    // select logic
343e4f69d78Ssfencevma    val lqSelect = selectOldest(lqViolationSelVec, lqViolationSelUopExts)
344e4f69d78Ssfencevma
345e4f69d78Ssfencevma    // select one inst
346e4f69d78Ssfencevma    val lqViolation = lqSelect._1(0)
347e4f69d78Ssfencevma    val lqViolationUop = lqSelect._2(0).uop
348e4f69d78Ssfencevma
349e4f69d78Ssfencevma    XSDebug(
350e4f69d78Ssfencevma      lqViolation,
351e4f69d78Ssfencevma      "need rollback (ld wb before store) pc %x robidx %d target %x\n",
35220a5248fSzhanglinjuan      storeIn(i).bits.uop.pc, storeIn(i).bits.uop.robIdx.asUInt, lqViolationUop.robIdx.asUInt
353e4f69d78Ssfencevma    )
354e4f69d78Ssfencevma
355e4f69d78Ssfencevma    (lqViolation, lqViolationUop)
356e4f69d78Ssfencevma  }
357e4f69d78Ssfencevma
358e4f69d78Ssfencevma  // select rollback (part1) and generate rollback request
359e4f69d78Ssfencevma  // rollback check
360e4f69d78Ssfencevma  // Lq rollback seq check is done in s3 (next stage), as getting rollbackLq MicroOp is slow
36171489510SXuan Hu  val rollbackLqWb = Wire(Vec(StorePipelineWidth, Valid(new DynInst)))
362e4f69d78Ssfencevma  val stFtqIdx = Wire(Vec(StorePipelineWidth, new FtqPtr))
363e4f69d78Ssfencevma  val stFtqOffset = Wire(Vec(StorePipelineWidth, UInt(log2Up(PredictWidth).W)))
364e4f69d78Ssfencevma  for (w <- 0 until StorePipelineWidth) {
365e4f69d78Ssfencevma    val detectedRollback = detectRollback(w)
36620a5248fSzhanglinjuan    rollbackLqWb(w).valid := detectedRollback._1 && DelayN(storeIn(w).valid && !storeIn(w).bits.miss, TotalSelectCycles)
367cd2ff98bShappy-lx    rollbackLqWb(w).bits  := detectedRollback._2
36820a5248fSzhanglinjuan    stFtqIdx(w) := DelayN(storeIn(w).bits.uop.ftqPtr, TotalSelectCycles)
36920a5248fSzhanglinjuan    stFtqOffset(w) := DelayN(storeIn(w).bits.uop.ftqOffset, TotalSelectCycles)
370e4f69d78Ssfencevma  }
371e4f69d78Ssfencevma
372e4f69d78Ssfencevma  // select rollback (part2), generate rollback request, then fire rollback request
373e4f69d78Ssfencevma  // Note that we use robIdx - 1.U to flush the load instruction itself.
374e4f69d78Ssfencevma  // Thus, here if last cycle's robIdx equals to this cycle's robIdx, it still triggers the redirect.
375e4f69d78Ssfencevma
376e4f69d78Ssfencevma  // select uop in parallel
377cd2ff98bShappy-lx  def selectOldestRedirect(xs: Seq[Valid[Redirect]]): Vec[Bool] = {
378cd2ff98bShappy-lx    val compareVec = (0 until xs.length).map(i => (0 until i).map(j => isAfter(xs(j).bits.robIdx, xs(i).bits.robIdx)))
379cd2ff98bShappy-lx    val resultOnehot = VecInit((0 until xs.length).map(i => Cat((0 until xs.length).map(j =>
380cd2ff98bShappy-lx      (if (j < i) !xs(j).valid || compareVec(i)(j)
381cd2ff98bShappy-lx      else if (j == i) xs(i).valid
382cd2ff98bShappy-lx      else !xs(j).valid || !compareVec(j)(i))
383cd2ff98bShappy-lx    )).andR))
384cd2ff98bShappy-lx    resultOnehot
385cd2ff98bShappy-lx  }
386cd2ff98bShappy-lx  val allRedirect = (0 until StorePipelineWidth).map(i => {
387cd2ff98bShappy-lx    val redirect = Wire(Valid(new Redirect))
388cd2ff98bShappy-lx    redirect.valid := rollbackLqWb(i).valid
389cd2ff98bShappy-lx    redirect.bits             := DontCare
39071489510SXuan Hu    redirect.bits.isRVC       := rollbackLqWb(i).bits.preDecodeInfo.isRVC
391cd2ff98bShappy-lx    redirect.bits.robIdx      := rollbackLqWb(i).bits.robIdx
3928241cb85SXuan Hu    redirect.bits.ftqIdx      := rollbackLqWb(i).bits.ftqPtr
3938241cb85SXuan Hu    redirect.bits.ftqOffset   := rollbackLqWb(i).bits.ftqOffset
394cd2ff98bShappy-lx    redirect.bits.stFtqIdx    := stFtqIdx(i)
395cd2ff98bShappy-lx    redirect.bits.stFtqOffset := stFtqOffset(i)
396cd2ff98bShappy-lx    redirect.bits.level       := RedirectLevel.flush
3978241cb85SXuan Hu    redirect.bits.cfiUpdate.target := rollbackLqWb(i).bits.pc
398cd2ff98bShappy-lx    redirect.bits.debug_runahead_checkpoint_id := rollbackLqWb(i).bits.debugInfo.runahead_checkpoint_id
399cd2ff98bShappy-lx    redirect
400cd2ff98bShappy-lx  })
401cd2ff98bShappy-lx  val oldestOneHot = selectOldestRedirect(allRedirect)
402cd2ff98bShappy-lx  val oldestRedirect = Mux1H(oldestOneHot, allRedirect)
403cd2ff98bShappy-lx  io.rollback := oldestRedirect
404e4f69d78Ssfencevma
405e4f69d78Ssfencevma  // perf cnt
406e4f69d78Ssfencevma  val canEnqCount = PopCount(io.query.map(_.req.fire))
407e4f69d78Ssfencevma  val validCount = freeList.io.validCount
408e4f69d78Ssfencevma  val allowEnqueue = validCount <= (LoadQueueRAWSize - LoadPipelineWidth).U
409e4f69d78Ssfencevma
410e4f69d78Ssfencevma  QueuePerf(LoadQueueRAWSize, validCount, !allowEnqueue)
411e4f69d78Ssfencevma  XSPerfAccumulate("enqs", canEnqCount)
412e4f69d78Ssfencevma  XSPerfAccumulate("stld_rollback", io.rollback.valid)
413e4f69d78Ssfencevma  val perfEvents: Seq[(String, UInt)] = Seq(
414e4f69d78Ssfencevma    ("enq ", canEnqCount),
415e4f69d78Ssfencevma    ("stld_rollback", io.rollback.valid),
416e4f69d78Ssfencevma  )
417e4f69d78Ssfencevma  generatePerfEvent()
418e4f69d78Ssfencevma  // end
419e4f69d78Ssfencevma}