xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala (revision 2225d46ebbe2fd16b9b29963c27a7d0385a42709)
1package xiangshan.mem
2
3import chipsalliance.rocketchip.config.Parameters
4import chisel3._
5import chisel3.util._
6import utils._
7import xiangshan._
8import xiangshan.cache._
9import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants}
10import xiangshan.backend.roq.RoqLsqIO
11import difftest._
12
13class SqPtr(implicit p: Parameters) extends CircularQueuePtr[SqPtr](
14  p => p(XSCoreParamsKey).StoreQueueSize
15){
16  override def cloneType = (new SqPtr).asInstanceOf[this.type]
17}
18
19object SqPtr {
20  def apply(f: Bool, v: UInt)(implicit p: Parameters): SqPtr = {
21    val ptr = Wire(new SqPtr)
22    ptr.flag := f
23    ptr.value := v
24    ptr
25  }
26}
27
28class SqEnqIO(implicit p: Parameters) extends XSBundle {
29  val canAccept = Output(Bool())
30  val lqCanAccept = Input(Bool())
31  val needAlloc = Vec(RenameWidth, Input(Bool()))
32  val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp)))
33  val resp = Vec(RenameWidth, Output(new SqPtr))
34}
35
36// Store Queue
37class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper {
38  val io = IO(new Bundle() {
39    val enq = new SqEnqIO
40    val brqRedirect = Flipped(ValidIO(new Redirect))
41    val flush = Input(Bool())
42    val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle)))
43    val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq))
44    val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store
45    val forward = Vec(LoadPipelineWidth, Flipped(new MaskedLoadForwardQueryIO))
46    val roq = Flipped(new RoqLsqIO)
47    val uncache = new DCacheWordIO
48    // val refill = Flipped(Valid(new DCacheLineReq ))
49    val exceptionAddr = new ExceptionAddrIO
50    val sqempty = Output(Bool())
51    val issuePtrExt = Output(new SqPtr)
52    val storeIssue = Vec(StorePipelineWidth, Flipped(Valid(new ExuInput)))
53    val sqFull = Output(Bool())
54  })
55
56  // data modules
57  val uop = Reg(Vec(StoreQueueSize, new MicroOp))
58  // val data = Reg(Vec(StoreQueueSize, new LsqEntry))
59  val dataModule = Module(new StoreQueueData(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth))
60  dataModule.io := DontCare
61  val paddrModule = Module(new SQPaddrModule(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth))
62  paddrModule.io := DontCare
63  val vaddrModule = Module(new SyncDataModuleTemplate(UInt(VAddrBits.W), StoreQueueSize, numRead = 1, numWrite = StorePipelineWidth))
64  vaddrModule.io := DontCare
65
66  // state & misc
67  val allocated = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // sq entry has been allocated
68  val datavalid = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // non-mmio data is valid
69  val writebacked = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // inst has been writebacked to CDB
70  val issued = Reg(Vec(StoreQueueSize, Bool())) // inst has been issued by rs
71  val commited = Reg(Vec(StoreQueueSize, Bool())) // inst has been commited by roq
72  val pending = Reg(Vec(StoreQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq
73  val mmio = Reg(Vec(StoreQueueSize, Bool())) // mmio: inst is an mmio inst
74
75  // ptr
76  require(StoreQueueSize > RenameWidth)
77  val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new SqPtr))))
78  val deqPtrExt = RegInit(VecInit((0 until StorePipelineWidth).map(_.U.asTypeOf(new SqPtr))))
79  val cmtPtrExt = RegInit(VecInit((0 until CommitWidth).map(_.U.asTypeOf(new SqPtr))))
80  val issuePtrExt = RegInit(0.U.asTypeOf(new SqPtr))
81  val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W))
82  val allowEnqueue = RegInit(true.B)
83
84  val enqPtr = enqPtrExt(0).value
85  val deqPtr = deqPtrExt(0).value
86  val cmtPtr = cmtPtrExt(0).value
87
88  val deqMask = UIntToMask(deqPtr, StoreQueueSize)
89  val enqMask = UIntToMask(enqPtr, StoreQueueSize)
90
91  val commitCount = RegNext(io.roq.scommit)
92
93  // Read dataModule
94  // deqPtrExtNext and deqPtrExtNext+1 entry will be read from dataModule
95  // if !sbuffer.fire(), read the same ptr
96  // if sbuffer.fire(), read next
97  val deqPtrExtNext = WireInit(Mux(io.sbuffer(1).fire(),
98    VecInit(deqPtrExt.map(_ + 2.U)),
99    Mux(io.sbuffer(0).fire() || io.mmioStout.fire(),
100      VecInit(deqPtrExt.map(_ + 1.U)),
101      deqPtrExt
102    )
103  ))
104  for (i <- 0 until StorePipelineWidth) {
105    dataModule.io.raddr(i) := deqPtrExtNext(i).value
106    paddrModule.io.raddr(i) := deqPtrExtNext(i).value
107  }
108
109  // no inst will be commited 1 cycle before tval update
110  vaddrModule.io.raddr(0) := (cmtPtrExt(0) + commitCount).value
111
112  /**
113    * Enqueue at dispatch
114    *
115    * Currently, StoreQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth)
116    */
117  io.enq.canAccept := allowEnqueue
118  for (i <- 0 until RenameWidth) {
119    val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i))
120    val sqIdx = enqPtrExt(offset)
121    val index = sqIdx.value
122    when (io.enq.req(i).valid && io.enq.canAccept && io.enq.lqCanAccept && !(io.brqRedirect.valid || io.flush)) {
123      uop(index) := io.enq.req(i).bits
124      allocated(index) := true.B
125      datavalid(index) := false.B
126      writebacked(index) := false.B
127      issued(index) := false.B
128      commited(index) := false.B
129      pending(index) := false.B
130    }
131    io.enq.resp(i) := sqIdx
132  }
133  XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n")
134
135  /**
136    * Update issuePtr when issue from rs
137    */
138
139  // update state bit issued
140  for (i <- 0 until StorePipelineWidth) {
141    when (io.storeIssue(i).valid) {
142      issued(io.storeIssue(i).bits.uop.sqIdx.value) := true.B
143    }
144  }
145
146  // update issuePtr
147  val IssuePtrMoveStride = 4
148  require(IssuePtrMoveStride >= 2)
149
150  val issueLookupVec = (0 until IssuePtrMoveStride).map(issuePtrExt + _.U)
151  val issueLookup = issueLookupVec.map(ptr => allocated(ptr.value) && issued(ptr.value) && ptr =/= enqPtrExt(0))
152  val nextIssuePtr = issuePtrExt + PriorityEncoder(VecInit(issueLookup.map(!_) :+ true.B))
153  issuePtrExt := nextIssuePtr
154
155  when (io.brqRedirect.valid || io.flush) {
156    issuePtrExt := Mux(
157      isAfter(cmtPtrExt(0), deqPtrExt(0)),
158      cmtPtrExt(0),
159      deqPtrExtNext(0) // for mmio insts, deqPtr may be ahead of cmtPtr
160    )
161  }
162  // send issuePtrExt to rs
163  // io.issuePtrExt := cmtPtrExt(0)
164  io.issuePtrExt := issuePtrExt
165
166  /**
167    * Writeback store from store units
168    *
169    * Most store instructions writeback to regfile in the previous cycle.
170    * However,
171    *   (1) For an mmio instruction with exceptions, we need to mark it as datavalid
172    * (in this way it will trigger an exception when it reaches ROB's head)
173    * instead of pending to avoid sending them to lower level.
174    *   (2) For an mmio instruction without exceptions, we mark it as pending.
175    * When the instruction reaches ROB's head, StoreQueue sends it to uncache channel.
176    * Upon receiving the response, StoreQueue writes back the instruction
177    * through arbiter with store units. It will later commit as normal.
178    */
179  for (i <- 0 until StorePipelineWidth) {
180    dataModule.io.wen(i) := false.B
181    paddrModule.io.wen(i) := false.B
182    val stWbIndex = io.storeIn(i).bits.uop.sqIdx.value
183    when (io.storeIn(i).fire()) {
184      datavalid(stWbIndex) := !io.storeIn(i).bits.mmio
185      writebacked(stWbIndex) := !io.storeIn(i).bits.mmio
186      pending(stWbIndex) := io.storeIn(i).bits.mmio
187
188      val storeWbData = Wire(new SQDataEntry)
189      storeWbData := DontCare
190      storeWbData.mask := io.storeIn(i).bits.mask
191      storeWbData.data := io.storeIn(i).bits.data
192
193      dataModule.io.waddr(i) := stWbIndex
194      dataModule.io.wdata(i) := storeWbData
195      dataModule.io.wen(i) := true.B
196
197      paddrModule.io.waddr(i) := stWbIndex
198      paddrModule.io.wdata(i) := io.storeIn(i).bits.paddr
199      paddrModule.io.wen(i) := true.B
200
201
202      mmio(stWbIndex) := io.storeIn(i).bits.mmio
203
204      XSInfo("store write to sq idx %d pc 0x%x vaddr %x paddr %x data %x mmio %x\n",
205        io.storeIn(i).bits.uop.sqIdx.value,
206        io.storeIn(i).bits.uop.cf.pc,
207        io.storeIn(i).bits.vaddr,
208        io.storeIn(i).bits.paddr,
209        io.storeIn(i).bits.data,
210        io.storeIn(i).bits.mmio
211        )
212    }
213    // vaddrModule write is delayed, as vaddrModule will not be read right after write
214    vaddrModule.io.waddr(i) := RegNext(stWbIndex)
215    vaddrModule.io.wdata(i) := RegNext(io.storeIn(i).bits.vaddr)
216    vaddrModule.io.wen(i) := RegNext(io.storeIn(i).fire())
217  }
218
219  /**
220    * load forward query
221    *
222    * Check store queue for instructions that is older than the load.
223    * The response will be valid at the next cycle after req.
224    */
225  // check over all lq entries and forward data from the first matched store
226  for (i <- 0 until LoadPipelineWidth) {
227    io.forward(i).forwardMask := 0.U(8.W).asBools
228    io.forward(i).forwardData := DontCare
229
230    // Compare deqPtr (deqPtr) and forward.sqIdx, we have two cases:
231    // (1) if they have the same flag, we need to check range(tail, sqIdx)
232    // (2) if they have different flags, we need to check range(tail, LoadQueueSize) and range(0, sqIdx)
233    // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, LoadQueueSize))
234    // Forward2: Mux(same_flag, 0.U,                   range(0, sqIdx)    )
235    // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise
236    val differentFlag = deqPtrExt(0).flag =/= io.forward(i).sqIdx.flag
237    val forwardMask = io.forward(i).sqIdxMask
238    val storeWritebackedVec = WireInit(VecInit(Seq.fill(StoreQueueSize)(false.B)))
239    for (j <- 0 until StoreQueueSize) {
240      storeWritebackedVec(j) := datavalid(j) && allocated(j) // all datavalid terms need to be checked
241    }
242    val needForward1 = Mux(differentFlag, ~deqMask, deqMask ^ forwardMask) & storeWritebackedVec.asUInt
243    val needForward2 = Mux(differentFlag, forwardMask, 0.U(StoreQueueSize.W)) & storeWritebackedVec.asUInt
244
245    XSDebug(p"$i f1 ${Binary(needForward1)} f2 ${Binary(needForward2)} " +
246      p"sqIdx ${io.forward(i).sqIdx} pa ${Hexadecimal(io.forward(i).paddr)}\n"
247    )
248
249    // do real fwd query
250    dataModule.io.needForward(i)(0) := needForward1 & paddrModule.io.forwardMmask(i).asUInt
251    dataModule.io.needForward(i)(1) := needForward2 & paddrModule.io.forwardMmask(i).asUInt
252
253    paddrModule.io.forwardMdata(i) := io.forward(i).paddr
254
255    io.forward(i).forwardMask := dataModule.io.forwardMask(i)
256    io.forward(i).forwardData := dataModule.io.forwardData(i)
257  }
258
259  /**
260    * Memory mapped IO / other uncached operations
261    *
262    * States:
263    * (1) writeback from store units: mark as pending
264    * (2) when they reach ROB's head, they can be sent to uncache channel
265    * (3) response from uncache channel: mark as datavalid
266    * (4) writeback to ROB (and other units): mark as writebacked
267    * (5) ROB commits the instruction: same as normal instructions
268    */
269  //(2) when they reach ROB's head, they can be sent to uncache channel
270  val s_idle :: s_req :: s_resp :: s_wb :: s_wait :: Nil = Enum(5)
271  val uncacheState = RegInit(s_idle)
272  switch(uncacheState) {
273    is(s_idle) {
274      when(io.roq.pendingst && pending(deqPtr) && allocated(deqPtr)) {
275        uncacheState := s_req
276      }
277    }
278    is(s_req) {
279      when(io.uncache.req.fire()) {
280        uncacheState := s_resp
281      }
282    }
283    is(s_resp) {
284      when(io.uncache.resp.fire()) {
285        uncacheState := s_wb
286      }
287    }
288    is(s_wb) {
289      when (io.mmioStout.fire()) {
290        uncacheState := s_wait
291      }
292    }
293    is(s_wait) {
294      when(io.roq.commit) {
295        uncacheState := s_idle // ready for next mmio
296      }
297    }
298  }
299  io.uncache.req.valid := uncacheState === s_req
300
301  io.uncache.req.bits.cmd  := MemoryOpConstants.M_XWR
302  io.uncache.req.bits.addr := paddrModule.io.rdata(0) // data(deqPtr) -> rdata(0)
303  io.uncache.req.bits.data := dataModule.io.rdata(0).data
304  io.uncache.req.bits.mask := dataModule.io.rdata(0).mask
305
306  io.uncache.req.bits.id   := DontCare
307
308  when(io.uncache.req.fire()){
309    pending(deqPtr) := false.B
310
311    XSDebug(
312      p"uncache req: pc ${Hexadecimal(uop(deqPtr).cf.pc)} " +
313      p"addr ${Hexadecimal(io.uncache.req.bits.addr)} " +
314      p"data ${Hexadecimal(io.uncache.req.bits.data)} " +
315      p"op ${Hexadecimal(io.uncache.req.bits.cmd)} " +
316      p"mask ${Hexadecimal(io.uncache.req.bits.mask)}\n"
317    )
318  }
319
320  // (3) response from uncache channel: mark as datavalid
321  io.uncache.resp.ready := true.B
322  when (io.uncache.resp.fire()) {
323    datavalid(deqPtr) := true.B
324  }
325
326  // (4) writeback to ROB (and other units): mark as writebacked
327  io.mmioStout.valid := uncacheState === s_wb // allocated(deqPtr) && datavalid(deqPtr) && !writebacked(deqPtr)
328  io.mmioStout.bits.uop := uop(deqPtr)
329  io.mmioStout.bits.uop.sqIdx := deqPtrExt(0)
330  io.mmioStout.bits.data := dataModule.io.rdata(0).data // dataModule.io.rdata.read(deqPtr)
331  io.mmioStout.bits.redirectValid := false.B
332  io.mmioStout.bits.redirect := DontCare
333  io.mmioStout.bits.debug.isMMIO := true.B
334  io.mmioStout.bits.debug.paddr := DontCare
335  io.mmioStout.bits.debug.isPerfCnt := false.B
336  io.mmioStout.bits.fflags := DontCare
337  when (io.mmioStout.fire()) {
338    writebacked(deqPtr) := true.B
339    allocated(deqPtr) := false.B
340  }
341
342  /**
343    * ROB commits store instructions (mark them as commited)
344    *
345    * (1) When store commits, mark it as commited.
346    * (2) They will not be cancelled and can be sent to lower level.
347    */
348  for (i <- 0 until CommitWidth) {
349    when (commitCount > i.U) {
350      commited(cmtPtrExt(i).value) := true.B
351    }
352  }
353  cmtPtrExt := cmtPtrExt.map(_ + commitCount)
354
355  // Commited stores will not be cancelled and can be sent to lower level.
356  // remove retired insts from sq, add retired store to sbuffer
357  for (i <- 0 until StorePipelineWidth) {
358    // We use RegNext to prepare data for sbuffer
359    val ptr = deqPtrExt(i).value
360    // if !sbuffer.fire(), read the same ptr
361    // if sbuffer.fire(), read next
362    io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !mmio(ptr)
363    io.sbuffer(i).bits.cmd  := MemoryOpConstants.M_XWR
364    io.sbuffer(i).bits.addr := paddrModule.io.rdata(i)
365    io.sbuffer(i).bits.data := dataModule.io.rdata(i).data
366    io.sbuffer(i).bits.mask := dataModule.io.rdata(i).mask
367    io.sbuffer(i).bits.id   := DontCare
368
369    when (io.sbuffer(i).fire()) {
370      allocated(ptr) := false.B
371      XSDebug("sbuffer "+i+" fire: ptr %d\n", ptr)
372    }
373  }
374  when (io.sbuffer(1).fire()) {
375    assert(io.sbuffer(0).fire())
376  }
377
378  if (!env.FPGAPlatform) {
379    for (i <- 0 until StorePipelineWidth) {
380      val storeCommit = io.sbuffer(i).fire()
381      val waddr = SignExt(io.sbuffer(i).bits.addr, 64)
382      val wdata = io.sbuffer(i).bits.data & MaskExpand(io.sbuffer(i).bits.mask)
383      val wmask = io.sbuffer(i).bits.mask
384
385      val difftest = Module(new DifftestStoreEvent)
386      difftest.io.clock       := clock
387      difftest.io.coreid      := 0.U
388      difftest.io.index       := i.U
389      difftest.io.valid       := storeCommit
390      difftest.io.storeAddr   := waddr
391      difftest.io.storeData   := wdata
392      difftest.io.storeMask   := wmask
393    }
394  }
395
396  // Read vaddr for mem exception
397  io.exceptionAddr.vaddr := vaddrModule.io.rdata(0)
398
399  // misprediction recovery / exception redirect
400  // invalidate sq term using robIdx
401  val needCancel = Wire(Vec(StoreQueueSize, Bool()))
402  for (i <- 0 until StoreQueueSize) {
403    needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect, io.flush) && allocated(i) && !commited(i)
404    when (needCancel(i)) {
405        allocated(i) := false.B
406    }
407  }
408
409  /**
410    * update pointers
411    */
412  val lastCycleRedirect = RegNext(io.brqRedirect.valid)
413  val lastCycleFlush = RegNext(io.flush)
414  val lastCycleCancelCount = PopCount(RegNext(needCancel))
415  // when io.brqRedirect.valid, we don't allow eneuque even though it may fire.
416  val enqNumber = Mux(io.enq.canAccept && io.enq.lqCanAccept && !(io.brqRedirect.valid || io.flush), PopCount(io.enq.req.map(_.valid)), 0.U)
417  when (lastCycleRedirect || lastCycleFlush) {
418    // we recover the pointers in the next cycle after redirect
419    enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount))
420  }.otherwise {
421    enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber))
422  }
423
424  deqPtrExt := deqPtrExtNext
425
426  val dequeueCount = Mux(io.sbuffer(1).fire(), 2.U, Mux(io.sbuffer(0).fire() || io.mmioStout.fire(), 1.U, 0.U))
427  val validCount = distanceBetween(enqPtrExt(0), deqPtrExt(0))
428
429  allowEnqueue := validCount + enqNumber <= (StoreQueueSize - RenameWidth).U
430
431  // io.sqempty will be used by sbuffer
432  // We delay it for 1 cycle for better timing
433  // When sbuffer need to check if it is empty, the pipeline is blocked, which means delay io.sqempty
434  // for 1 cycle will also promise that sq is empty in that cycle
435  io.sqempty := RegNext(enqPtrExt(0).value === deqPtrExt(0).value && enqPtrExt(0).flag === deqPtrExt(0).flag)
436
437  // perf counter
438  QueuePerf(StoreQueueSize, validCount, !allowEnqueue)
439  io.sqFull := !allowEnqueue
440  XSPerfAccumulate("mmioCycle", uncacheState =/= s_idle) // lq is busy dealing with uncache req
441  XSPerfAccumulate("mmioCnt", io.uncache.req.fire())
442  XSPerfAccumulate("mmio_wb_success", io.mmioStout.fire())
443  XSPerfAccumulate("mmio_wb_blocked", io.mmioStout.valid && !io.mmioStout.ready)
444  XSPerfAccumulate("validEntryCnt", distanceBetween(enqPtrExt(0), deqPtrExt(0)))
445  XSPerfAccumulate("cmtEntryCnt", distanceBetween(cmtPtrExt(0), deqPtrExt(0)))
446  XSPerfAccumulate("nCmtEntryCnt", distanceBetween(enqPtrExt(0), cmtPtrExt(0)))
447
448  // debug info
449  XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt(0).flag, deqPtr)
450
451  def PrintFlag(flag: Bool, name: String): Unit = {
452    when(flag) {
453      XSDebug(false, true.B, name)
454    }.otherwise {
455      XSDebug(false, true.B, " ")
456    }
457  }
458
459  for (i <- 0 until StoreQueueSize) {
460    if (i % 4 == 0) XSDebug("")
461    XSDebug(false, true.B, "%x ", uop(i).cf.pc)
462    PrintFlag(allocated(i), "a")
463    PrintFlag(allocated(i) && datavalid(i), "v")
464    PrintFlag(allocated(i) && writebacked(i), "w")
465    PrintFlag(allocated(i) && commited(i), "c")
466    PrintFlag(allocated(i) && pending(i), "p")
467    XSDebug(false, true.B, " ")
468    if (i % 4 == 3 || i == StoreQueueSize - 1) XSDebug(false, true.B, "\n")
469  }
470
471}
472