xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala (revision c7658a75968a011d92bf164a1a55872e64f06d44)
1package xiangshan.mem
2
3import chisel3._
4import chisel3.util._
5import utils._
6import xiangshan._
7import xiangshan.cache._
8import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
9import xiangshan.backend.LSUOpType
10import xiangshan.backend.roq.RoqPtr
11
12
13class SqPtr extends CircularQueuePtr(SqPtr.StoreQueueSize) { }
14
15object SqPtr extends HasXSParameter {
16  def apply(f: Bool, v: UInt): SqPtr = {
17    val ptr = Wire(new SqPtr)
18    ptr.flag := f
19    ptr.value := v
20    ptr
21  }
22}
23
24// Store Queue
25class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
26  val io = IO(new Bundle() {
27    val dp1Req = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
28    val sqIdxs = Output(Vec(RenameWidth, new SqPtr))
29    val brqRedirect = Input(Valid(new Redirect))
30    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
31    val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq))
32    val stout = Vec(2, DecoupledIO(new ExuOutput)) // writeback store
33    val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
34    val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit)))
35    val uncache = new DCacheWordIO
36    val roqDeqPtr = Input(new RoqPtr)
37    // val refill = Flipped(Valid(new DCacheLineReq ))
38    val oldestStore = Output(Valid(new RoqPtr))
39    val exceptionAddr = new ExceptionAddrIO
40  })
41
42  val uop = Reg(Vec(StoreQueueSize, new MicroOp))
43  val data = Reg(Vec(StoreQueueSize, new LsRoqEntry)) // FIXME: use StoreQueueEntry instead
44  val allocated = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // sq entry has been allocated
45  val valid = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // data is valid
46  val writebacked = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // inst has been writebacked to CDB
47  val commited = Reg(Vec(StoreQueueSize, Bool())) // inst has been writebacked to CDB
48  val miss = Reg(Vec(StoreQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
49  val listening = Reg(Vec(StoreQueueSize, Bool())) // waiting for refill result
50  val pending = Reg(Vec(StoreQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
51
52  val ringBufferHeadExtended = RegInit(0.U.asTypeOf(new SqPtr))
53  val ringBufferTailExtended = RegInit(0.U.asTypeOf(new SqPtr))
54  val ringBufferHead = ringBufferHeadExtended.value
55  val ringBufferTail = ringBufferTailExtended.value
56  val ringBufferSameFlag = ringBufferHeadExtended.flag === ringBufferTailExtended.flag
57  val ringBufferEmpty = ringBufferHead === ringBufferTail && ringBufferSameFlag
58  val ringBufferFull = ringBufferHead === ringBufferTail && !ringBufferSameFlag
59  val ringBufferAllowin = !ringBufferFull
60
61  val storeCommit = (0 until CommitWidth).map(i => io.commits(i).valid && !io.commits(i).bits.isWalk && io.commits(i).bits.uop.ctrl.commitType === CommitType.STORE)
62  val mcommitIdx = (0 until CommitWidth).map(i => io.commits(i).bits.uop.sqIdx.value)
63
64  val tailMask = (((1.U((StoreQueueSize + 1).W)) << ringBufferTail).asUInt - 1.U)(StoreQueueSize - 1, 0)
65  val headMask = (((1.U((StoreQueueSize + 1).W)) << ringBufferHead).asUInt - 1.U)(StoreQueueSize - 1, 0)
66  val enqDeqMask1 = tailMask ^ headMask
67  val enqDeqMask = Mux(ringBufferSameFlag, enqDeqMask1, ~enqDeqMask1)
68
69  // TODO: misc arbitor
70
71  // Enqueue at dispatch
72  val emptyEntries = StoreQueueSize.U - distanceBetween(ringBufferHeadExtended, ringBufferTailExtended)
73  XSDebug("(ready, valid): ")
74  for (i <- 0 until RenameWidth) {
75    val offset = if (i == 0) 0.U else PopCount((0 until i).map(io.dp1Req(_).valid))
76    val sqIdx = ringBufferHeadExtended + offset
77    val index = sqIdx.value
78    when(io.dp1Req(i).fire()) {
79      uop(index) := io.dp1Req(i).bits
80      allocated(index) := true.B
81      valid(index) := false.B
82      writebacked(index) := false.B
83      commited(index) := false.B
84      miss(index) := false.B
85      listening(index) := false.B
86      pending(index) := false.B
87      // data(index).bwdMask := 0.U(8.W).asBools
88    }
89    val numTryEnqueue = offset +& io.dp1Req(i).valid
90    io.dp1Req(i).ready := numTryEnqueue <= emptyEntries
91    io.sqIdxs(i) := sqIdx
92    XSDebug(false, true.B, "(%d, %d) ", io.dp1Req(i).ready, io.dp1Req(i).valid)
93  }
94  XSDebug(false, true.B, "\n")
95
96  val firedDispatch = VecInit((0 until CommitWidth).map(io.dp1Req(_).fire())).asUInt
97  when(firedDispatch.orR) {
98    ringBufferHeadExtended := ringBufferHeadExtended + PopCount(firedDispatch)
99    XSInfo("dispatched %d insts to sq\n", PopCount(firedDispatch))
100  }
101
102  // writeback store
103  (0 until StorePipelineWidth).map(i => {
104    when(io.storeIn(i).fire()) {
105      val stWbIndex = io.storeIn(i).bits.uop.sqIdx.value
106      valid(stWbIndex) := !io.storeIn(i).bits.mmio
107      data(stWbIndex).paddr := io.storeIn(i).bits.paddr
108      data(stWbIndex).vaddr := io.storeIn(i).bits.vaddr
109      data(stWbIndex).mask := io.storeIn(i).bits.mask
110      data(stWbIndex).data := io.storeIn(i).bits.data
111      data(stWbIndex).mmio := io.storeIn(i).bits.mmio
112      data(stWbIndex).exception := io.storeIn(i).bits.uop.cf.exceptionVec.asUInt
113      miss(stWbIndex) := io.storeIn(i).bits.miss
114      pending(stWbIndex) := io.storeIn(i).bits.mmio
115      XSInfo("store write to sq idx %d pc 0x%x vaddr %x paddr %x data %x miss %x mmio %x roll %x exc %x\n",
116        io.storeIn(i).bits.uop.sqIdx.value,
117        io.storeIn(i).bits.uop.cf.pc,
118        io.storeIn(i).bits.vaddr,
119        io.storeIn(i).bits.paddr,
120        io.storeIn(i).bits.data,
121        io.storeIn(i).bits.miss,
122        io.storeIn(i).bits.mmio,
123        io.storeIn(i).bits.rollback,
124        io.storeIn(i).bits.uop.cf.exceptionVec.asUInt
125        )
126    }
127  })
128
129  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
130    val length = mask.length
131    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
132    val highBitsUint = Cat(highBits.reverse)
133    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
134  }
135
136  def getFirstOneWithFlag(mask: Vec[Bool], startMask: UInt, startFlag: Bool) = {
137    val length = mask.length
138    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
139    val highBitsUint = Cat(highBits.reverse)
140    val changeDirection = !highBitsUint.orR()
141    val index = PriorityEncoder(Mux(!changeDirection, highBitsUint, mask.asUInt))
142    SqPtr(startFlag ^ changeDirection, index)
143  }
144
145  def selectFirstTwo(valid: Vec[Bool], startMask: UInt) = {
146    val selVec = Wire(Vec(2, UInt(log2Up(StoreQueueSize).W)))
147    val selValid = Wire(Vec(2, Bool()))
148    selVec(0) := getFirstOne(valid, startMask)
149    val firstSelMask = UIntToOH(selVec(0))
150    val secondSelVec = VecInit((0 until valid.length).map(i => valid(i) && !firstSelMask(i)))
151    selVec(1) := getFirstOne(secondSelVec, startMask)
152    selValid(0) := Cat(valid).orR
153    selValid(1) := Cat(secondSelVec).orR
154    (selValid, selVec)
155  }
156
157  // select the last writebacked instruction
158  val validStoreVec = VecInit((0 until StoreQueueSize).map(i => !(allocated(i) && valid(i))))
159  val storeNotValid = SqPtr(false.B, getFirstOne(validStoreVec, tailMask))
160  val storeValidIndex = (storeNotValid - 1.U).value
161  io.oldestStore.valid := allocated(ringBufferTailExtended.value) && valid(ringBufferTailExtended.value) && !commited(storeValidIndex)
162  io.oldestStore.bits := uop(storeValidIndex).roqIdx
163
164  // writeback up to 2 store insts to CDB
165  // choose the first two valid store requests from deqPtr
166  val storeWbSelVec = VecInit((0 until StoreQueueSize).map(i => allocated(i) && valid(i) && !writebacked(i)))
167  val (storeWbValid, storeWbSel) = selectFirstTwo(storeWbSelVec, tailMask)
168
169  (0 until StorePipelineWidth).map(i => {
170    io.stout(i).bits.uop := uop(storeWbSel(i))
171    io.stout(i).bits.uop.sqIdx := storeWbSel(i).asTypeOf(new SqPtr)
172    io.stout(i).bits.uop.cf.exceptionVec := data(storeWbSel(i)).exception.asBools
173    io.stout(i).bits.data := data(storeWbSel(i)).data
174    io.stout(i).bits.redirectValid := false.B
175    io.stout(i).bits.redirect := DontCare
176    io.stout(i).bits.brUpdate := DontCare
177    io.stout(i).bits.debug.isMMIO := data(storeWbSel(i)).mmio
178    io.stout(i).valid := storeWbSelVec(storeWbSel(i)) && storeWbValid(i)
179    when(io.stout(i).fire()) {
180      writebacked(storeWbSel(i)) := true.B
181    }
182    io.stout(i).bits.fflags := DontCare
183  })
184
185  // remove retired insts from sq, add retired store to sbuffer
186
187  // move tailPtr
188  // allocatedMask: dequeuePtr can go to the next 1-bit
189  val allocatedMask = VecInit((0 until StoreQueueSize).map(i => allocated(i) || !enqDeqMask(i)))
190  // find the first one from deqPtr (ringBufferTail)
191  val nextTail1 = getFirstOneWithFlag(allocatedMask, tailMask, ringBufferTailExtended.flag)
192  val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, ringBufferHeadExtended)
193  ringBufferTailExtended := nextTail
194
195  // load forward query
196  // check over all lq entries and forward data from the first matched store
197  (0 until LoadPipelineWidth).map(i => {
198    io.forward(i).forwardMask := 0.U(8.W).asBools
199    io.forward(i).forwardData := DontCare
200
201    // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases:
202    // (1) if they have the same flag, we need to check range(tail, sqIdx)
203    // (2) if they have different flags, we need to check range(tail, LoadQueueSize) and range(0, sqIdx)
204    // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, LoadQueueSize))
205    // Forward2: Mux(same_flag, 0.U,                   range(0, sqIdx)    )
206    // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
207    val forwardMask1 = WireInit(VecInit(Seq.fill(8)(false.B)))
208    val forwardData1 = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))
209    val forwardMask2 = WireInit(VecInit(Seq.fill(8)(false.B)))
210    val forwardData2 = WireInit(VecInit(Seq.fill(8)(0.U(8.W))))
211
212    val differentFlag = ringBufferTailExtended.flag =/= io.forward(i).sqIdx.flag
213    val forwardMask = ((1.U((StoreQueueSize + 1).W)) << io.forward(i).sqIdx.value).asUInt - 1.U
214    val needForward1 = Mux(differentFlag, ~tailMask, tailMask ^ forwardMask)
215    val needForward2 = Mux(differentFlag, forwardMask, 0.U(StoreQueueSize.W))
216
217    XSDebug("" + i + " f1 %b f2 %b sqIdx %d pa %x\n", needForward1, needForward2, io.forward(i).sqIdx.asUInt, io.forward(i).paddr)
218
219    // entry with larger index should have higher priority since it's data is younger
220    for (j <- 0 until StoreQueueSize) {
221      val needCheck = valid(j) && allocated(j) && // all valid terms need to be checked
222        io.forward(i).paddr(PAddrBits - 1, 3) === data(j).paddr(PAddrBits - 1, 3)
223      (0 until XLEN / 8).foreach(k => {
224        when (needCheck && data(j).mask(k)) {
225          when (needForward1(j)) {
226            forwardMask1(k) := true.B
227            forwardData1(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
228          }
229          when (needForward2(j)) {
230            forwardMask2(k) := true.B
231            forwardData2(k) := data(j).data(8 * (k + 1) - 1, 8 * k)
232          }
233          XSDebug(needForward1(j) || needForward2(j),
234            p"forwarding $k-th byte ${Hexadecimal(data(j).data(8 * (k + 1) - 1, 8 * k))} " +
235            p"from ptr $j pc ${Hexadecimal(uop(j).cf.pc)}\n")
236        }
237      })
238    }
239
240    // merge forward lookup results
241    // forward2 is younger than forward1 and should have higher priority
242    (0 until XLEN / 8).map(k => {
243      io.forward(i).forwardMask(k) := forwardMask1(k) || forwardMask2(k)
244      io.forward(i).forwardData(k) := Mux(forwardMask2(k), forwardData2(k), forwardData1(k))
245    })
246  })
247
248  (0 until CommitWidth).map(i => {
249    when(storeCommit(i)) {
250      commited(mcommitIdx(i)) := true.B
251      XSDebug("store commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
252    }
253  })
254
255  val storeCommitSelVec = VecInit((0 until StoreQueueSize).map(i => {
256    allocated(i) && commited(i)
257  }))
258  val (storeCommitValid, storeCommitSel) = selectFirstTwo(storeCommitSelVec, tailMask)
259
260  // get no more than 2 commited store from storeCommitedQueue
261  // send selected store inst to sbuffer
262  (0 until 2).map(i => {
263    val ptr = storeCommitSel(i)
264    val mmio = data(ptr).mmio
265    io.sbuffer(i).valid := storeCommitValid(i) && !mmio
266    io.sbuffer(i).bits.cmd  := MemoryOpConstants.M_XWR
267    io.sbuffer(i).bits.addr := data(ptr).paddr
268    io.sbuffer(i).bits.data := data(ptr).data
269    io.sbuffer(i).bits.mask := data(ptr).mask
270    io.sbuffer(i).bits.meta          := DontCare
271    io.sbuffer(i).bits.meta.tlb_miss := false.B
272    io.sbuffer(i).bits.meta.uop      := uop(ptr)
273    io.sbuffer(i).bits.meta.mmio     := mmio
274    io.sbuffer(i).bits.meta.mask     := data(ptr).mask
275
276    XSDebug(io.sbuffer(i).fire(), "[SBUFFER STORE REQ] pa %x data %x\n", data(ptr).paddr, data(ptr).data)
277
278    // update sq meta if store inst is send to sbuffer
279    when(storeCommitValid(i) && (mmio || io.sbuffer(i).ready)) {
280      allocated(ptr) := false.B
281    }
282  })
283
284  // Memory mapped IO / other uncached operations
285
286  // setup misc mem access req
287  // mask / paddr / data can be get from sq.data
288  val commitType = io.commits(0).bits.uop.ctrl.commitType
289  io.uncache.req.valid := pending(ringBufferTail) && allocated(ringBufferTail) &&
290    commitType === CommitType.STORE &&
291    io.roqDeqPtr === uop(ringBufferTail).roqIdx &&
292    !io.commits(0).bits.isWalk
293
294  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XWR
295  io.uncache.req.bits.addr := data(ringBufferTail).paddr
296  io.uncache.req.bits.data := data(ringBufferTail).data
297  io.uncache.req.bits.mask := data(ringBufferTail).mask
298
299  io.uncache.req.bits.meta.id       := DontCare // TODO: // FIXME
300  io.uncache.req.bits.meta.vaddr    := DontCare
301  io.uncache.req.bits.meta.paddr    := data(ringBufferTail).paddr
302  io.uncache.req.bits.meta.uop      := uop(ringBufferTail)
303  io.uncache.req.bits.meta.mmio     := true.B // data(ringBufferTail).mmio
304  io.uncache.req.bits.meta.tlb_miss := false.B
305  io.uncache.req.bits.meta.mask     := data(ringBufferTail).mask
306  io.uncache.req.bits.meta.replay   := false.B
307
308  io.uncache.resp.ready := true.B
309
310  when(io.uncache.req.fire()){
311    pending(ringBufferTail) := false.B
312  }
313
314  when(io.uncache.resp.fire()){
315    valid(ringBufferTail) := true.B
316    data(ringBufferTail).data := io.uncache.resp.bits.data(XLEN-1, 0)
317    // TODO: write back exception info
318  }
319
320  when(io.uncache.req.fire()){
321    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
322      uop(ringBufferTail).cf.pc,
323      io.uncache.req.bits.addr,
324      io.uncache.req.bits.data,
325      io.uncache.req.bits.cmd,
326      io.uncache.req.bits.mask
327    )
328  }
329
330  // Read vaddr for mem exception
331  io.exceptionAddr.vaddr := data(io.exceptionAddr.lsIdx.sqIdx.value).vaddr
332
333  // misprediction recovery / exception redirect
334  // invalidate sq term using robIdx
335  val needCancel = Wire(Vec(StoreQueueSize, Bool()))
336  for (i <- 0 until StoreQueueSize) {
337    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
338    when(needCancel(i)) {
339      when(io.brqRedirect.bits.isReplay){
340        valid(i) := false.B
341        writebacked(i) := false.B
342        listening(i) := false.B
343        miss(i) := false.B
344        pending(i) := false.B
345      }.otherwise{
346        allocated(i) := false.B
347      }
348    }
349  }
350  when (io.brqRedirect.valid && io.brqRedirect.bits.isMisPred) {
351    ringBufferHeadExtended := ringBufferHeadExtended - PopCount(needCancel)
352  }
353
354  // debug info
355  XSDebug("head %d:%d tail %d:%d\n", ringBufferHeadExtended.flag, ringBufferHead, ringBufferTailExtended.flag, ringBufferTail)
356
357  def PrintFlag(flag: Bool, name: String): Unit = {
358    when(flag) {
359      XSDebug(false, true.B, name)
360    }.otherwise {
361      XSDebug(false, true.B, " ")
362    }
363  }
364
365  for (i <- 0 until StoreQueueSize) {
366    if (i % 4 == 0) XSDebug("")
367    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, data(i).paddr)
368    PrintFlag(allocated(i), "a")
369    PrintFlag(allocated(i) && valid(i), "v")
370    PrintFlag(allocated(i) && writebacked(i), "w")
371    PrintFlag(allocated(i) && commited(i), "c")
372    PrintFlag(allocated(i) && miss(i), "m")
373    PrintFlag(allocated(i) && listening(i), "l")
374    PrintFlag(allocated(i) && pending(i), "p")
375    XSDebug(false, true.B, " ")
376    if (i % 4 == 3 || i == StoreQueueSize - 1) XSDebug(false, true.B, "\n")
377  }
378
379}
380