xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala (revision 9a74b41d101efb68acf170220d44fa115f633f4b)
1package xiangshan.mem
2
3import chisel3._
4import chisel3.util._
5import utils._
6import xiangshan._
7import xiangshan.cache._
8import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
9import xiangshan.backend.LSUOpType
10import xiangshan.backend.roq.RoqPtr
11
12
13class SqPtr extends CircularQueuePtr(SqPtr.StoreQueueSize) { }
14
15object SqPtr extends HasXSParameter {
16  def apply(f: Bool, v: UInt): SqPtr = {
17    val ptr = Wire(new SqPtr)
18    ptr.flag := f
19    ptr.value := v
20    ptr
21  }
22}
23
24// Store Queue
25class StoreQueue extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
26  val io = IO(new Bundle() {
27    val dp1Req = Vec(RenameWidth, Flipped(DecoupledIO(new MicroOp)))
28    val lqReady = Input(Vec(RenameWidth, Bool()))
29    val sqReady = Output(Vec(RenameWidth, Bool()))
30    val sqIdxs = Output(Vec(RenameWidth, new SqPtr))
31    val brqRedirect = Input(Valid(new Redirect))
32    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
33    val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq))
34    val stout = Vec(2, DecoupledIO(new ExuOutput)) // writeback store
35    val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
36    val commits = Flipped(Vec(CommitWidth, Valid(new RoqCommit)))
37    val uncache = new DCacheWordIO
38    val roqDeqPtr = Input(new RoqPtr)
39    // val refill = Flipped(Valid(new DCacheLineReq ))
40    val oldestStore = Output(Valid(new RoqPtr))
41    val exceptionAddr = new ExceptionAddrIO
42  })
43
44  val uop = Reg(Vec(StoreQueueSize, new MicroOp))
45  // val data = Reg(Vec(StoreQueueSize, new LsqEntry))
46  val dataModule = Module(new LSQueueData(StoreQueueSize, StorePipelineWidth))
47  dataModule.io := DontCare
48  val allocated = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // sq entry has been allocated
49  val valid = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // data is valid
50  val writebacked = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // inst has been writebacked to CDB
51  val commited = Reg(Vec(StoreQueueSize, Bool())) // inst has been writebacked to CDB
52  val miss = Reg(Vec(StoreQueueSize, Bool())) // load inst missed, waiting for miss queue to accept miss request
53  val listening = Reg(Vec(StoreQueueSize, Bool())) // waiting for refill result
54  val pending = Reg(Vec(StoreQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
55
56  val ringBufferHeadExtended = RegInit(0.U.asTypeOf(new SqPtr))
57  val ringBufferTailExtended = RegInit(0.U.asTypeOf(new SqPtr))
58  val ringBufferHead = ringBufferHeadExtended.value
59  val ringBufferTail = ringBufferTailExtended.value
60  val ringBufferSameFlag = ringBufferHeadExtended.flag === ringBufferTailExtended.flag
61  val ringBufferEmpty = ringBufferHead === ringBufferTail && ringBufferSameFlag
62  val ringBufferFull = ringBufferHead === ringBufferTail && !ringBufferSameFlag
63  val ringBufferAllowin = !ringBufferFull
64
65  val storeCommit = (0 until CommitWidth).map(i => io.commits(i).valid && !io.commits(i).bits.isWalk && io.commits(i).bits.uop.ctrl.commitType === CommitType.STORE)
66  val mcommitIdx = (0 until CommitWidth).map(i => io.commits(i).bits.uop.sqIdx.value)
67
68  val tailMask = (((1.U((StoreQueueSize + 1).W)) << ringBufferTail).asUInt - 1.U)(StoreQueueSize - 1, 0)
69  val headMask = (((1.U((StoreQueueSize + 1).W)) << ringBufferHead).asUInt - 1.U)(StoreQueueSize - 1, 0)
70  val enqDeqMask1 = tailMask ^ headMask
71  val enqDeqMask = Mux(ringBufferSameFlag, enqDeqMask1, ~enqDeqMask1)
72
73  // TODO: misc arbitor
74
75  // Enqueue at dispatch
76  val emptyEntries = StoreQueueSize.U - distanceBetween(ringBufferHeadExtended, ringBufferTailExtended)
77  XSDebug("(ready, valid): ")
78  for (i <- 0 until RenameWidth) {
79    val offset = if (i == 0) 0.U else PopCount((0 until i).map(io.dp1Req(_).valid))
80    val sqIdx = ringBufferHeadExtended + offset
81    val index = sqIdx.value
82    when(io.dp1Req(i).fire()) {
83      uop(index) := io.dp1Req(i).bits
84      allocated(index) := true.B
85      valid(index) := false.B
86      writebacked(index) := false.B
87      commited(index) := false.B
88      miss(index) := false.B
89      listening(index) := false.B
90      pending(index) := false.B
91    }
92    val numTryEnqueue = offset +& io.dp1Req(i).valid
93    io.sqReady(i) := numTryEnqueue <= emptyEntries
94    io.dp1Req(i).ready := io.lqReady(i) && io.sqReady(i)
95    io.sqIdxs(i) := sqIdx
96    XSDebug(false, true.B, "(%d, %d) ", io.dp1Req(i).ready, io.dp1Req(i).valid)
97  }
98  XSDebug(false, true.B, "\n")
99
100  val firedDispatch = VecInit((0 until CommitWidth).map(io.dp1Req(_).fire())).asUInt
101  when(firedDispatch.orR) {
102    ringBufferHeadExtended := ringBufferHeadExtended + PopCount(firedDispatch)
103    XSInfo("dispatched %d insts to sq\n", PopCount(firedDispatch))
104  }
105
106  // writeback store
107  (0 until StorePipelineWidth).map(i => {
108    dataModule.io.wb(i).wen := false.B
109    when(io.storeIn(i).fire()) {
110      val stWbIndex = io.storeIn(i).bits.uop.sqIdx.value
111      valid(stWbIndex) := !io.storeIn(i).bits.mmio
112      miss(stWbIndex) := io.storeIn(i).bits.miss
113      pending(stWbIndex) := io.storeIn(i).bits.mmio
114
115      val storeWbData = Wire(new LsqEntry)
116      storeWbData := DontCare
117      storeWbData.paddr := io.storeIn(i).bits.paddr
118      storeWbData.vaddr := io.storeIn(i).bits.vaddr
119      storeWbData.mask := io.storeIn(i).bits.mask
120      storeWbData.data := io.storeIn(i).bits.data
121      storeWbData.mmio := io.storeIn(i).bits.mmio
122      storeWbData.exception := io.storeIn(i).bits.uop.cf.exceptionVec.asUInt
123
124      dataModule.io.wbWrite(i, stWbIndex, storeWbData)
125      dataModule.io.wb(i).wen := true.B
126
127      XSInfo("store write to sq idx %d pc 0x%x vaddr %x paddr %x data %x miss %x mmio %x roll %x exc %x\n",
128        io.storeIn(i).bits.uop.sqIdx.value,
129        io.storeIn(i).bits.uop.cf.pc,
130        io.storeIn(i).bits.vaddr,
131        io.storeIn(i).bits.paddr,
132        io.storeIn(i).bits.data,
133        io.storeIn(i).bits.miss,
134        io.storeIn(i).bits.mmio,
135        io.storeIn(i).bits.rollback,
136        io.storeIn(i).bits.uop.cf.exceptionVec.asUInt
137        )
138    }
139  })
140
141  def getFirstOne(mask: Vec[Bool], startMask: UInt) = {
142    val length = mask.length
143    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
144    val highBitsUint = Cat(highBits.reverse)
145    PriorityEncoder(Mux(highBitsUint.orR(), highBitsUint, mask.asUInt))
146  }
147
148  def getFirstOneWithFlag(mask: Vec[Bool], startMask: UInt, startFlag: Bool) = {
149    val length = mask.length
150    val highBits = (0 until length).map(i => mask(i) & ~startMask(i))
151    val highBitsUint = Cat(highBits.reverse)
152    val changeDirection = !highBitsUint.orR()
153    val index = PriorityEncoder(Mux(!changeDirection, highBitsUint, mask.asUInt))
154    SqPtr(startFlag ^ changeDirection, index)
155  }
156
157  def selectFirstTwo(valid: Vec[Bool], startMask: UInt) = {
158    val selVec = Wire(Vec(2, UInt(log2Up(StoreQueueSize).W)))
159    val selValid = Wire(Vec(2, Bool()))
160    selVec(0) := getFirstOne(valid, startMask)
161    val firstSelMask = UIntToOH(selVec(0))
162    val secondSelVec = VecInit((0 until valid.length).map(i => valid(i) && !firstSelMask(i)))
163    selVec(1) := getFirstOne(secondSelVec, startMask)
164    selValid(0) := Cat(valid).orR
165    selValid(1) := Cat(secondSelVec).orR
166    (selValid, selVec)
167  }
168
169  def selectFirstTwoRoughly(valid: Vec[Bool]) = {
170    // TODO: do not select according to seq, just select 2 valid bit randomly
171    val firstSelVec = valid
172    val notFirstVec = Wire(Vec(valid.length, Bool()))
173    (0 until valid.length).map(i =>
174      notFirstVec(i) := (if(i != 0) { valid(i) || !notFirstVec(i) } else { false.B })
175    )
176    val secondSelVec = VecInit((0 until valid.length).map(i => valid(i) && !notFirstVec(i)))
177
178    val selVec = Wire(Vec(2, UInt(log2Up(valid.length).W)))
179    val selValid = Wire(Vec(2, Bool()))
180    selVec(0) := PriorityEncoder(firstSelVec)
181    selVec(1) := PriorityEncoder(secondSelVec)
182    selValid(0) := Cat(firstSelVec).orR
183    selValid(1) := Cat(secondSelVec).orR
184    (selValid, selVec)
185  }
186
187  // select the last writebacked instruction
188  val validStoreVec = VecInit((0 until StoreQueueSize).map(i => !(allocated(i) && valid(i))))
189  val storeNotValid = SqPtr(false.B, getFirstOne(validStoreVec, tailMask))
190  val storeValidIndex = (storeNotValid - 1.U).value
191  io.oldestStore.valid := allocated(ringBufferTailExtended.value) && valid(ringBufferTailExtended.value) && !commited(storeValidIndex)
192  io.oldestStore.bits := uop(storeValidIndex).roqIdx
193
194  // writeback up to 2 store insts to CDB
195  // choose the first two valid store requests from deqPtr
196  val storeWbSelVec = VecInit((0 until StoreQueueSize).map(i => allocated(i) && valid(i) && !writebacked(i)))
197  val (storeWbValid, storeWbSel) = selectFirstTwo(storeWbSelVec, tailMask)
198
199  (0 until StorePipelineWidth).map(i => {
200    io.stout(i).bits.uop := uop(storeWbSel(i))
201    io.stout(i).bits.uop.sqIdx := storeWbSel(i).asTypeOf(new SqPtr)
202    io.stout(i).bits.uop.cf.exceptionVec := dataModule.io.rdata(storeWbSel(i)).exception.asBools
203    io.stout(i).bits.data := dataModule.io.rdata(storeWbSel(i)).data
204    io.stout(i).bits.redirectValid := false.B
205    io.stout(i).bits.redirect := DontCare
206    io.stout(i).bits.brUpdate := DontCare
207    io.stout(i).bits.debug.isMMIO := dataModule.io.rdata(storeWbSel(i)).mmio
208    io.stout(i).valid := storeWbSelVec(storeWbSel(i)) && storeWbValid(i)
209    when(io.stout(i).fire()) {
210      writebacked(storeWbSel(i)) := true.B
211    }
212    io.stout(i).bits.fflags := DontCare
213  })
214
215  // remove retired insts from sq, add retired store to sbuffer
216
217  // move tailPtr
218  // allocatedMask: dequeuePtr can go to the next 1-bit
219  val allocatedMask = VecInit((0 until StoreQueueSize).map(i => allocated(i) || !enqDeqMask(i)))
220  // find the first one from deqPtr (ringBufferTail)
221  val nextTail1 = getFirstOneWithFlag(allocatedMask, tailMask, ringBufferTailExtended.flag)
222  val nextTail = Mux(Cat(allocatedMask).orR, nextTail1, ringBufferHeadExtended)
223  ringBufferTailExtended := nextTail
224
225  // load forward query
226  // check over all lq entries and forward data from the first matched store
227  (0 until LoadPipelineWidth).map(i => {
228    io.forward(i).forwardMask := 0.U(8.W).asBools
229    io.forward(i).forwardData := DontCare
230
231    // Compare ringBufferTail (deqPtr) and forward.sqIdx, we have two cases:
232    // (1) if they have the same flag, we need to check range(tail, sqIdx)
233    // (2) if they have different flags, we need to check range(tail, LoadQueueSize) and range(0, sqIdx)
234    // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, LoadQueueSize))
235    // Forward2: Mux(same_flag, 0.U,                   range(0, sqIdx)    )
236    // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
237
238    val differentFlag = ringBufferTailExtended.flag =/= io.forward(i).sqIdx.flag
239    val forwardMask = ((1.U((StoreQueueSize + 1).W)) << io.forward(i).sqIdx.value).asUInt - 1.U
240    val storeWritebackedVec = WireInit(VecInit(Seq.fill(StoreQueueSize)(false.B)))
241    for (j <- 0 until StoreQueueSize) {
242      storeWritebackedVec(j) := valid(j) && allocated(j) // all valid terms need to be checked
243    }
244    val needForward1 = Mux(differentFlag, ~tailMask, tailMask ^ forwardMask) & storeWritebackedVec.asUInt
245    val needForward2 = Mux(differentFlag, forwardMask, 0.U(StoreQueueSize.W)) & storeWritebackedVec.asUInt
246
247    XSDebug("" + i + " f1 %b f2 %b sqIdx %d pa %x\n", needForward1, needForward2, io.forward(i).sqIdx.asUInt, io.forward(i).paddr)
248
249    // do real fwd query
250    dataModule.io.forwardQuery(
251      channel = i,
252      paddr = io.forward(i).paddr,
253      needForward1 = needForward1,
254      needForward2 = needForward2
255    )
256
257    io.forward(i).forwardMask := dataModule.io.forward(i).forwardMask
258    io.forward(i).forwardData := dataModule.io.forward(i).forwardData
259  })
260
261  // CommitedStoreQueue for timing opt
262  // send commited store inst to sbuffer
263  // select up to 2 writebacked store insts
264  val commitedStoreQueue = Module(new MIMOQueue(
265    UInt(log2Up(StoreQueueSize).W),
266    entries = 64, //FIXME
267    inCnt = 6,
268    outCnt = 2,
269    mem = false,
270    perf = true
271  ))
272  commitedStoreQueue.io.flush := false.B
273
274  // When store commited, mark it as commited (will not be influenced by redirect),
275  // then add store's sq ptr into commitedStoreQueue
276  (0 until CommitWidth).map(i => {
277    when(storeCommit(i)) {
278      commited(mcommitIdx(i)) := true.B
279      XSDebug("store commit %d: idx %d %x\n", i.U, mcommitIdx(i), uop(mcommitIdx(i)).cf.pc)
280    }
281    commitedStoreQueue.io.enq(i).valid := storeCommit(i)
282    commitedStoreQueue.io.enq(i).bits := mcommitIdx(i)
283    // We assume commitedStoreQueue.io.enq(i).ready === true.B,
284    // for commitedStoreQueue.size = 64
285  })
286
287  class SbufferCandidateEntry extends XSBundle{
288    val sbuffer = new DCacheWordReq
289    val sqIdx = UInt(log2Up(StoreQueueSize).W)
290  }
291
292  val ensbufferCandidateQueue = Module(new MIMOQueue(
293    new SbufferCandidateEntry,
294    entries = 2,
295    inCnt = 2,
296    outCnt = 2,
297    mem = false,
298    perf = true
299  ))
300  ensbufferCandidateQueue.io.flush := false.B
301
302  val sbufferCandidate = Wire(Vec(2, Decoupled(new SbufferCandidateEntry)))
303  (0 until 2).map(i => {
304    val ptr = commitedStoreQueue.io.deq(i).bits
305    val mmio = dataModule.io.rdata(ptr).mmio
306    sbufferCandidate(i).valid := commitedStoreQueue.io.deq(i).valid && !mmio
307    sbufferCandidate(i).bits.sqIdx := ptr
308    sbufferCandidate(i).bits.sbuffer.cmd  := MemoryOpConstants.M_XWR
309    sbufferCandidate(i).bits.sbuffer.addr := dataModule.io.rdata(ptr).paddr
310    sbufferCandidate(i).bits.sbuffer.data := dataModule.io.rdata(ptr).data
311    sbufferCandidate(i).bits.sbuffer.mask := dataModule.io.rdata(ptr).mask
312    sbufferCandidate(i).bits.sbuffer.meta          := DontCare
313    sbufferCandidate(i).bits.sbuffer.meta.tlb_miss := false.B
314    sbufferCandidate(i).bits.sbuffer.meta.uop      := DontCare
315    sbufferCandidate(i).bits.sbuffer.meta.mmio     := mmio
316    sbufferCandidate(i).bits.sbuffer.meta.mask     := dataModule.io.rdata(ptr).mask
317
318    when(mmio && commitedStoreQueue.io.deq(i).valid) {
319      allocated(ptr) := false.B
320    }
321
322    commitedStoreQueue.io.deq(i).ready := sbufferCandidate(i).fire() || mmio
323    sbufferCandidate(i).ready := ensbufferCandidateQueue.io.enq(i).ready
324    ensbufferCandidateQueue.io.enq(i).valid := sbufferCandidate(i).valid
325    ensbufferCandidateQueue.io.enq(i).bits.sqIdx := sbufferCandidate(i).bits.sqIdx
326    ensbufferCandidateQueue.io.enq(i).bits.sbuffer := sbufferCandidate(i).bits.sbuffer
327
328    ensbufferCandidateQueue.io.deq(i).ready := io.sbuffer(i).fire()
329    io.sbuffer(i).valid := ensbufferCandidateQueue.io.deq(i).valid
330    io.sbuffer(i).bits  := ensbufferCandidateQueue.io.deq(i).bits.sbuffer
331
332    // update sq meta if store inst is send to sbuffer
333    when(ensbufferCandidateQueue.io.deq(i).valid && io.sbuffer(i).ready) {
334      allocated(ensbufferCandidateQueue.io.deq(i).bits.sqIdx) := false.B
335    }
336  })
337
338  // Memory mapped IO / other uncached operations
339
340  // setup misc mem access req
341  // mask / paddr / data can be get from sq.data
342  val commitType = io.commits(0).bits.uop.ctrl.commitType
343  io.uncache.req.valid := pending(ringBufferTail) && allocated(ringBufferTail) &&
344    commitType === CommitType.STORE &&
345    io.roqDeqPtr === uop(ringBufferTail).roqIdx &&
346    !io.commits(0).bits.isWalk
347
348  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XWR
349  io.uncache.req.bits.addr := dataModule.io.rdata(ringBufferTail).paddr
350  io.uncache.req.bits.data := dataModule.io.rdata(ringBufferTail).data
351  io.uncache.req.bits.mask := dataModule.io.rdata(ringBufferTail).mask
352
353  io.uncache.req.bits.meta.id       := DontCare // TODO: // FIXME
354  io.uncache.req.bits.meta.vaddr    := DontCare
355  io.uncache.req.bits.meta.paddr    := dataModule.io.rdata(ringBufferTail).paddr
356  io.uncache.req.bits.meta.uop      := uop(ringBufferTail)
357  io.uncache.req.bits.meta.mmio     := true.B // dataModule.io.rdata(ringBufferTail).mmio
358  io.uncache.req.bits.meta.tlb_miss := false.B
359  io.uncache.req.bits.meta.mask     := dataModule.io.rdata(ringBufferTail).mask
360  io.uncache.req.bits.meta.replay   := false.B
361
362  io.uncache.resp.ready := true.B
363
364  when(io.uncache.req.fire()){
365    pending(ringBufferTail) := false.B
366  }
367
368  when(io.uncache.resp.fire()){
369    valid(ringBufferTail) := true.B
370    // TODO: write back exception info
371  }
372
373  when(io.uncache.req.fire()){
374    XSDebug("uncache req: pc %x addr %x data %x op %x mask %x\n",
375      uop(ringBufferTail).cf.pc,
376      io.uncache.req.bits.addr,
377      io.uncache.req.bits.data,
378      io.uncache.req.bits.cmd,
379      io.uncache.req.bits.mask
380    )
381  }
382
383  // Read vaddr for mem exception
384  io.exceptionAddr.vaddr := dataModule.io.rdata(io.exceptionAddr.lsIdx.sqIdx.value).vaddr
385
386  // misprediction recovery / exception redirect
387  // invalidate sq term using robIdx
388  val needCancel = Wire(Vec(StoreQueueSize, Bool()))
389  for (i <- 0 until StoreQueueSize) {
390    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect) && allocated(i) && !commited(i)
391    when(needCancel(i)) {
392      when(io.brqRedirect.bits.isReplay){
393        valid(i) := false.B
394        writebacked(i) := false.B
395        listening(i) := false.B
396        miss(i) := false.B
397        pending(i) := false.B
398      }.otherwise{
399        allocated(i) := false.B
400      }
401    }
402  }
403  when (io.brqRedirect.valid && io.brqRedirect.bits.isMisPred) {
404    ringBufferHeadExtended := ringBufferHeadExtended - PopCount(needCancel)
405  }
406
407  // debug info
408  XSDebug("head %d:%d tail %d:%d\n", ringBufferHeadExtended.flag, ringBufferHead, ringBufferTailExtended.flag, ringBufferTail)
409
410  def PrintFlag(flag: Bool, name: String): Unit = {
411    when(flag) {
412      XSDebug(false, true.B, name)
413    }.otherwise {
414      XSDebug(false, true.B, " ")
415    }
416  }
417
418  for (i <- 0 until StoreQueueSize) {
419    if (i % 4 == 0) XSDebug("")
420    XSDebug(false, true.B, "%x [%x] ", uop(i).cf.pc, dataModule.io.rdata(i).paddr)
421    PrintFlag(allocated(i), "a")
422    PrintFlag(allocated(i) && valid(i), "v")
423    PrintFlag(allocated(i) && writebacked(i), "w")
424    PrintFlag(allocated(i) && commited(i), "c")
425    PrintFlag(allocated(i) && miss(i), "m")
426    PrintFlag(allocated(i) && listening(i), "l")
427    PrintFlag(allocated(i) && pending(i), "p")
428    XSDebug(false, true.B, " ")
429    if (i % 4 == 3 || i == StoreQueueSize - 1) XSDebug(false, true.B, "\n")
430  }
431
432}
433