xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/StoreMisalignBuffer.scala (revision 211d620b07edb797ba35b635d24fef4e7294bae2)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.fu.FuConfig._
26import xiangshan.backend.fu.fpu.FPU
27import xiangshan.backend.rob.RobLsqIO
28import xiangshan.cache._
29import xiangshan.frontend.FtqPtr
30import xiangshan.ExceptionNO._
31import xiangshan.cache.wpu.ReplayCarry
32import xiangshan.backend.rob.RobPtr
33import xiangshan.backend.Bundles.{MemExuOutput, DynInst}
34
35class StoreMisalignBuffer(implicit p: Parameters) extends XSModule
36  with HasCircularQueuePtrHelper
37{
38  private val enqPortNum = StorePipelineWidth
39  private val maxSplitNum = 2
40
41  require(maxSplitNum == 2)
42
43  private val SB = "b00".U(2.W)
44  private val SH = "b01".U(2.W)
45  private val SW = "b10".U(2.W)
46  private val SD = "b11".U(2.W)
47
48  // encode of how many bytes to shift or truncate
49  private val BYTE0 = "b000".U(3.W)
50  private val BYTE1 = "b001".U(3.W)
51  private val BYTE2 = "b010".U(3.W)
52  private val BYTE3 = "b011".U(3.W)
53  private val BYTE4 = "b100".U(3.W)
54  private val BYTE5 = "b101".U(3.W)
55  private val BYTE6 = "b110".U(3.W)
56  private val BYTE7 = "b111".U(3.W)
57
58  def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List(
59    SB -> 0x1.U,
60    SH -> 0x3.U,
61    SW -> 0xf.U,
62    SD -> 0xff.U
63  ))
64
65  def selectOldest[T <: LsPipelineBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
66    assert(valid.length == bits.length)
67    if (valid.length == 0 || valid.length == 1) {
68      (valid, bits)
69    } else if (valid.length == 2) {
70      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
71      for (i <- res.indices) {
72        res(i).valid := valid(i)
73        res(i).bits := bits(i)
74      }
75      val oldest = Mux(valid(0) && valid(1),
76        Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) ||
77          (isNotBefore(bits(0).uop.robIdx, bits(1).uop.robIdx) && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)),
78        Mux(valid(0) && !valid(1), res(0), res(1)))
79      (Seq(oldest.valid), Seq(oldest.bits))
80    } else {
81      val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
82      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
83      selectOldest(left._1 ++ right._1, left._2 ++ right._2)
84    }
85  }
86
87  val io = IO(new Bundle() {
88    val redirect        = Flipped(Valid(new Redirect))
89    val req             = Vec(enqPortNum, Flipped(Valid(new LsPipelineBundle)))
90    val rob             = Flipped(new RobLsqIO)
91    val splitStoreReq   = Decoupled(new LsPipelineBundle)
92    val splitStoreResp  = Flipped(Valid(new SqWriteBundle))
93    val writeBack       = Decoupled(new MemExuOutput)
94    val overwriteExpBuf = Output(new XSBundle {
95      val valid = Bool()
96      val vaddr = UInt(XLEN.W)
97      val isHyper = Bool()
98      val gpaddr = UInt(XLEN.W)
99      val isForVSnonLeafPTE = Bool()
100    })
101    val sqControl       = new StoreMaBufToSqControlIO
102  })
103
104  io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool()))
105  io.rob.uop  := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst))
106
107  val req_valid = RegInit(false.B)
108  val req = Reg(new LsPipelineBundle)
109
110  // enqueue
111  // s1:
112  val s1_req = VecInit(io.req.map(_.bits))
113  val s1_valid = VecInit(io.req.map(x => x.valid))
114
115  // s2: delay 1 cycle
116  val s2_req = RegNext(s1_req)
117  val s2_valid = (0 until enqPortNum).map(i =>
118    RegNext(s1_valid(i)) &&
119    !s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) &&
120    !s2_req(i).uop.robIdx.needFlush(io.redirect)
121  )
122  val s2_miss_aligned = s2_req.map(x =>
123    x.uop.exceptionVec(storeAddrMisaligned) && !x.uop.exceptionVec(breakPoint) && !TriggerAction.isDmode(x.uop.trigger)
124  )
125
126  val s2_enqueue = Wire(Vec(enqPortNum, Bool()))
127  for (w <- 0 until enqPortNum) {
128    s2_enqueue(w) := s2_valid(w) && s2_miss_aligned(w)
129  }
130
131  when (req_valid && req.uop.robIdx.needFlush(io.redirect)) {
132    req_valid := s2_enqueue.asUInt.orR
133  } .elsewhen (s2_enqueue.asUInt.orR) {
134    req_valid := req_valid || true.B
135  }
136
137  val reqSel = selectOldest(s2_enqueue, s2_req)
138
139  when (req_valid) {
140    req := Mux(
141      reqSel._1(0) && (isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx) || (isNotBefore(req.uop.robIdx, reqSel._2(0).uop.robIdx) && req.uop.uopIdx > reqSel._2(0).uop.uopIdx)),
142      reqSel._2(0),
143      req)
144  } .elsewhen (s2_enqueue.asUInt.orR) {
145    req := reqSel._2(0)
146  }
147
148  val robMatch = req_valid && io.rob.pendingst && (io.rob.pendingPtr === req.uop.robIdx)
149
150  // buffer control:
151  //  - split miss-aligned store into aligned stores
152  //  - send split store to sta and get result from sta
153  //  - control sq write to sb
154  //  - control sq write this store back
155  val s_idle :: s_split :: s_req :: s_resp :: s_cal :: s_sq_req :: s_wb :: s_wait :: Nil = Enum(8)
156  val bufferState = RegInit(s_idle)
157  val splitStoreReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle))))
158  val splitStoreResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new SqWriteBundle))))
159  val unSentStores  = RegInit(0.U(maxSplitNum.W))
160  val unWriteStores = RegInit(0.U(maxSplitNum.W))
161  val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W))
162
163  // if there is exception or mmio in split store
164  val globalException = RegInit(false.B)
165  val globalMMIO = RegInit(false.B)
166
167  val hasException = ExceptionNO.selectByFu(io.splitStoreResp.bits.uop.exceptionVec, StaCfg).asUInt.orR && !io.splitStoreResp.bits.need_rep
168  val isMMIO = io.splitStoreResp.bits.mmio && !io.splitStoreResp.bits.need_rep
169
170  switch(bufferState) {
171    is (s_idle) {
172      when (robMatch) {
173        bufferState := s_split
174      }
175    }
176
177    is (s_split) {
178      bufferState := s_req
179    }
180
181    is (s_req) {
182      when (io.splitStoreReq.fire) {
183        bufferState := s_resp
184      }
185    }
186
187    is (s_resp) {
188      when (io.splitStoreResp.valid) {
189        val clearOh = UIntToOH(curPtr)
190        when (hasException || isMMIO) {
191          // commit directly when exception ocurs
192          // if any split store reaches mmio space, delegate to software storeAddrMisaligned exception
193          bufferState := s_wb
194          globalException := hasException
195          globalMMIO := isMMIO
196        } .elsewhen(io.splitStoreResp.bits.need_rep || (unSentStores & ~clearOh).orR) {
197          // need replay or still has unsent requests
198          bufferState := s_req
199        } .otherwise {
200          // got result, goto calculate data and control sq
201          bufferState := s_cal
202        }
203      }
204    }
205
206    is (s_cal) {
207      when (io.sqControl.storeInfo.dataReady) {
208        bufferState := s_sq_req
209        curPtr := 0.U
210      }
211    }
212
213    is (s_sq_req) {
214      when (io.sqControl.storeInfo.completeSbTrans) {
215        when (!((unWriteStores & ~UIntToOH(curPtr)).orR)) {
216          bufferState := s_wb
217        }
218      }
219    }
220
221    is (s_wb) {
222      when (io.writeBack.fire) {
223        bufferState := s_wait
224      }
225    }
226
227    is (s_wait) {
228      when (io.rob.scommit =/= 0.U || req.uop.robIdx.needFlush(io.redirect)) {
229        // rob commits the unaligned store or handled the exception, reset all state
230        bufferState := s_idle
231        req_valid := false.B
232        curPtr := 0.U
233        unSentStores := 0.U
234        unWriteStores := 0.U
235        globalException := false.B
236        globalMMIO := false.B
237      }
238    }
239  }
240
241  val highAddress = LookupTree(req.uop.fuOpType(1, 0), List(
242    SB -> 0.U,
243    SH -> 1.U,
244    SW -> 3.U,
245    SD -> 7.U
246  )) + req.vaddr(4, 0)
247  // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region
248  val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4))
249  val aligned16BytesAddr   = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U)
250  val aligned16BytesSel    = req.vaddr(3, 0)
251
252  // meta of 128 bit store
253  val new128Store = WireInit(0.U.asTypeOf(new LsPipelineBundle))
254  // meta of split loads
255  val lowAddrStore  = WireInit(0.U.asTypeOf(new LsPipelineBundle))
256  val highAddrStore = WireInit(0.U.asTypeOf(new LsPipelineBundle))
257  // final lowResult = Cat(`lowResultWidth` of store data, 0.U(make it to fill total length of Vlen))
258  val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from the store data
259  // final highResult = Zero extend to Vlen(`highResultWidth` of (store data >> lowResultWidth))
260  val highResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from the store data
261
262  when (bufferState === s_split) {
263    when (!cross16BytesBoundary) {
264      // change this unaligned store into a 128 bits store
265      unWriteStores := 1.U
266      unSentStores := 1.U
267      curPtr := 0.U
268      new128Store.vaddr := aligned16BytesAddr
269      // new128Store.mask  := (getMask(req.uop.fuOpType(1, 0)) << aligned16BytesSel).asUInt
270      new128Store.mask  := 0xffff.U
271      new128Store.uop   := req.uop
272      new128Store.uop.exceptionVec(storeAddrMisaligned) := false.B
273      new128Store.is128bit := true.B
274      splitStoreReqs(0) := new128Store
275    } .otherwise {
276      // split this unaligned store into `maxSplitNum` aligned stores
277      unWriteStores := Fill(maxSplitNum, 1.U(1.W))
278      unSentStores := Fill(maxSplitNum, 1.U(1.W))
279      curPtr := 0.U
280      lowAddrStore.uop := req.uop
281      lowAddrStore.uop.exceptionVec(storeAddrMisaligned) := false.B
282      highAddrStore.uop := req.uop
283      highAddrStore.uop.exceptionVec(storeAddrMisaligned) := false.B
284
285      switch (req.uop.fuOpType(1, 0)) {
286        is (SB) {
287          assert(false.B, "lb should not trigger miss align")
288        }
289
290        is (SH) {
291          lowAddrStore.uop.fuOpType := SB
292          lowAddrStore.vaddr := req.vaddr
293          lowAddrStore.mask  := 0x1.U << lowAddrStore.vaddr(3, 0)
294          lowResultWidth    := BYTE1
295
296          highAddrStore.uop.fuOpType := SB
297          highAddrStore.vaddr := req.vaddr + 1.U
298          highAddrStore.mask  := 0x1.U << highAddrStore.vaddr(3, 0)
299          highResultWidth    := BYTE1
300        }
301
302        is (SW) {
303          switch (req.vaddr(1, 0)) {
304            is ("b00".U) {
305              assert(false.B, "should not trigger miss align")
306            }
307
308            is ("b01".U) {
309              lowAddrStore.uop.fuOpType := SW
310              lowAddrStore.vaddr := req.vaddr - 1.U
311              lowAddrStore.mask  := 0xf.U << lowAddrStore.vaddr(3, 0)
312              lowResultWidth    := BYTE3
313
314              highAddrStore.uop.fuOpType := SB
315              highAddrStore.vaddr := req.vaddr + 3.U
316              highAddrStore.mask  := 0x1.U << highAddrStore.vaddr(3, 0)
317              highResultWidth    := BYTE1
318            }
319
320            is ("b10".U) {
321              lowAddrStore.uop.fuOpType := SH
322              lowAddrStore.vaddr := req.vaddr
323              lowAddrStore.mask  := 0x3.U << lowAddrStore.vaddr(3, 0)
324              lowResultWidth    := BYTE2
325
326              highAddrStore.uop.fuOpType := SH
327              highAddrStore.vaddr := req.vaddr + 2.U
328              highAddrStore.mask  := 0x3.U << highAddrStore.vaddr(3, 0)
329              highResultWidth    := BYTE2
330            }
331
332            is ("b11".U) {
333              lowAddrStore.uop.fuOpType := SB
334              lowAddrStore.vaddr := req.vaddr
335              lowAddrStore.mask  := 0x1.U << lowAddrStore.vaddr(3, 0)
336              lowResultWidth    := BYTE1
337
338              highAddrStore.uop.fuOpType := SW
339              highAddrStore.vaddr := req.vaddr + 1.U
340              highAddrStore.mask  := 0xf.U << highAddrStore.vaddr(3, 0)
341              highResultWidth    := BYTE3
342            }
343          }
344        }
345
346        is (SD) {
347          switch (req.vaddr(2, 0)) {
348            is ("b000".U) {
349              assert(false.B, "should not trigger miss align")
350            }
351
352            is ("b001".U) {
353              lowAddrStore.uop.fuOpType := SD
354              lowAddrStore.vaddr := req.vaddr - 1.U
355              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
356              lowResultWidth    := BYTE7
357
358              highAddrStore.uop.fuOpType := SB
359              highAddrStore.vaddr := req.vaddr + 7.U
360              highAddrStore.mask  := 0x1.U << highAddrStore.vaddr(3, 0)
361              highResultWidth    := BYTE1
362            }
363
364            is ("b010".U) {
365              lowAddrStore.uop.fuOpType := SD
366              lowAddrStore.vaddr := req.vaddr - 2.U
367              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
368              lowResultWidth    := BYTE6
369
370              highAddrStore.uop.fuOpType := SH
371              highAddrStore.vaddr := req.vaddr + 6.U
372              highAddrStore.mask  := 0x3.U << highAddrStore.vaddr(3, 0)
373              highResultWidth    := BYTE2
374            }
375
376            is ("b011".U) {
377              lowAddrStore.uop.fuOpType := SD
378              lowAddrStore.vaddr := req.vaddr - 3.U
379              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
380              lowResultWidth    := BYTE5
381
382              highAddrStore.uop.fuOpType := SW
383              highAddrStore.vaddr := req.vaddr + 5.U
384              highAddrStore.mask  := 0xf.U << highAddrStore.vaddr(3, 0)
385              highResultWidth    := BYTE3
386            }
387
388            is ("b100".U) {
389              lowAddrStore.uop.fuOpType := SW
390              lowAddrStore.vaddr := req.vaddr
391              lowAddrStore.mask  := 0xf.U << lowAddrStore.vaddr(3, 0)
392              lowResultWidth    := BYTE4
393
394              highAddrStore.uop.fuOpType := SW
395              highAddrStore.vaddr := req.vaddr + 4.U
396              highAddrStore.mask  := 0xf.U << highAddrStore.vaddr(3, 0)
397              highResultWidth    := BYTE4
398            }
399
400            is ("b101".U) {
401              lowAddrStore.uop.fuOpType := SD
402              lowAddrStore.vaddr := req.vaddr - 5.U
403              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
404              lowResultWidth    := BYTE3
405
406              highAddrStore.uop.fuOpType := SD
407              highAddrStore.vaddr := req.vaddr + 3.U
408              highAddrStore.mask  := 0xff.U << highAddrStore.vaddr(3, 0)
409              highResultWidth    := BYTE5
410            }
411
412            is ("b110".U) {
413              lowAddrStore.uop.fuOpType := SD
414              lowAddrStore.vaddr := req.vaddr - 6.U
415              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
416              lowResultWidth    := BYTE2
417
418              highAddrStore.uop.fuOpType := SD
419              highAddrStore.vaddr := req.vaddr + 2.U
420              highAddrStore.mask  := 0xff.U << highAddrStore.vaddr(3, 0)
421              highResultWidth    := BYTE6
422            }
423
424            is ("b111".U) {
425              lowAddrStore.uop.fuOpType := SD
426              lowAddrStore.vaddr := req.vaddr - 7.U
427              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
428              lowResultWidth    := BYTE1
429
430              highAddrStore.uop.fuOpType := SD
431              highAddrStore.vaddr := req.vaddr + 1.U
432              highAddrStore.mask  := 0xff.U << highAddrStore.vaddr(3, 0)
433              highResultWidth    := BYTE7
434            }
435          }
436        }
437      }
438
439      splitStoreReqs(0) := lowAddrStore
440      splitStoreReqs(1) := highAddrStore
441    }
442  }
443
444  io.splitStoreReq.valid := req_valid && (bufferState === s_req)
445  io.splitStoreReq.bits  := splitStoreReqs(curPtr)
446  // Restore the information of H extension store
447  // bit encoding: | hsv 1 | store 00 | size(2bit) |
448  val reqIsHsv  = LSUOpType.isHsv(req.uop.fuOpType)
449  io.splitStoreReq.bits.uop.fuOpType := Cat(reqIsHsv, 0.U(2.W), splitStoreReqs(curPtr).uop.fuOpType(1, 0))
450
451  when (io.splitStoreResp.valid) {
452    splitStoreResp(curPtr) := io.splitStoreResp.bits
453    when (isMMIO) {
454      unWriteStores := 0.U
455      unSentStores := 0.U
456      splitStoreResp(curPtr).uop.exceptionVec := 0.U.asTypeOf(ExceptionVec())
457      // delegate to software
458      splitStoreResp(curPtr).uop.exceptionVec(storeAddrMisaligned) := true.B
459    } .elsewhen (hasException) {
460      unWriteStores := 0.U
461      unSentStores := 0.U
462    } .elsewhen (!io.splitStoreResp.bits.need_rep) {
463      unSentStores := unSentStores & ~UIntToOH(curPtr)
464      curPtr := curPtr + 1.U
465    }
466  }
467
468  val splitStoreData = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new XSBundle {
469    val wdata = UInt(VLEN.W)
470    val wmask = UInt((VLEN / 8).W)
471  }))))
472
473  val unalignedStoreData = io.sqControl.storeInfo.data
474  val wmaskLow  = Wire(Vec(VLEN / 8, Bool()))
475  val wmaskHigh = Wire(Vec(VLEN / 8, Bool()))
476  (0 until (VLEN / 8)).map {
477    case i  => {
478      when (i.U < highResultWidth) {
479        wmaskHigh(i) := true.B
480      } .otherwise {
481        wmaskHigh(i) := false.B
482      }
483      when (i.U < lowResultWidth) {
484        wmaskLow(i) := true.B
485      } .otherwise {
486        wmaskLow(i) := false.B
487      }
488    }
489  }
490
491  when (bufferState === s_cal) {
492    when (!cross16BytesBoundary) {
493      splitStoreData(0).wdata := LookupTree(aligned16BytesSel, List(
494        "b0000".U ->     unalignedStoreData,
495        "b0001".U -> Cat(unalignedStoreData, 0.U(( 1 * 8).W)),
496        "b0010".U -> Cat(unalignedStoreData, 0.U(( 2 * 8).W)),
497        "b0011".U -> Cat(unalignedStoreData, 0.U(( 3 * 8).W)),
498        "b0100".U -> Cat(unalignedStoreData, 0.U(( 4 * 8).W)),
499        "b0101".U -> Cat(unalignedStoreData, 0.U(( 5 * 8).W)),
500        "b0110".U -> Cat(unalignedStoreData, 0.U(( 6 * 8).W)),
501        "b0111".U -> Cat(unalignedStoreData, 0.U(( 7 * 8).W)),
502        "b1000".U -> Cat(unalignedStoreData, 0.U(( 8 * 8).W)),
503        "b1001".U -> Cat(unalignedStoreData, 0.U(( 9 * 8).W)),
504        "b1010".U -> Cat(unalignedStoreData, 0.U((10 * 8).W)),
505        "b1011".U -> Cat(unalignedStoreData, 0.U((11 * 8).W)),
506        "b1100".U -> Cat(unalignedStoreData, 0.U((12 * 8).W)),
507        "b1101".U -> Cat(unalignedStoreData, 0.U((13 * 8).W)),
508        "b1110".U -> Cat(unalignedStoreData, 0.U((14 * 8).W)),
509        "b1111".U -> Cat(unalignedStoreData, 0.U((15 * 8).W))
510      ))(VLEN - 1, 0)
511      splitStoreData(0).wmask := getMask(req.uop.fuOpType(1, 0)) << aligned16BytesSel
512    } .otherwise {
513      // low 16bytes part
514      val catData = LookupTree(lowResultWidth, List(
515        BYTE0 -> unalignedStoreData,
516        BYTE1 -> Cat(unalignedStoreData, 0.U((8 * 15).W)),
517        BYTE2 -> Cat(unalignedStoreData, 0.U((8 * 14).W)),
518        BYTE3 -> Cat(unalignedStoreData, 0.U((8 * 13).W)),
519        BYTE4 -> Cat(unalignedStoreData, 0.U((8 * 12).W)),
520        BYTE5 -> Cat(unalignedStoreData, 0.U((8 * 11).W)),
521        BYTE6 -> Cat(unalignedStoreData, 0.U((8 * 10).W)),
522        BYTE7 -> Cat(unalignedStoreData, 0.U((8 *  9).W))
523      ))
524      splitStoreData(0).wdata := catData(VLEN - 1, 0)
525      splitStoreData(0).wmask := VecInit(wmaskLow.reverse).asUInt
526      // high 16bytes part
527      val shiftData = LookupTree(lowResultWidth, List(
528        BYTE0 -> unalignedStoreData(VLEN - 1,    0),
529        BYTE1 -> unalignedStoreData(VLEN - 1,    8),
530        BYTE2 -> unalignedStoreData(VLEN - 1,   16),
531        BYTE3 -> unalignedStoreData(VLEN - 1,   24),
532        BYTE4 -> unalignedStoreData(VLEN - 1,   32),
533        BYTE5 -> unalignedStoreData(VLEN - 1,   40),
534        BYTE6 -> unalignedStoreData(VLEN - 1,   48),
535        BYTE7 -> unalignedStoreData(VLEN - 1,   56)
536      ))
537      splitStoreData(1).wdata := LookupTree(highResultWidth, List(
538        BYTE0 -> ZeroExt(shiftData, VLEN),
539        BYTE1 -> ZeroExt(shiftData(7,    0), VLEN),
540        BYTE2 -> ZeroExt(shiftData(15,   0), VLEN),
541        BYTE3 -> ZeroExt(shiftData(23,   0), VLEN),
542        BYTE4 -> ZeroExt(shiftData(31,   0), VLEN),
543        BYTE5 -> ZeroExt(shiftData(39,   0), VLEN),
544        BYTE6 -> ZeroExt(shiftData(47,   0), VLEN),
545        BYTE7 -> ZeroExt(shiftData(55,   0), VLEN)
546      ))
547      splitStoreData(1).wmask := wmaskHigh.asUInt
548    }
549  }
550
551  io.sqControl.control.hasException := req_valid && globalException
552
553  io.sqControl.control.writeSb := bufferState === s_sq_req
554  io.sqControl.control.wdata   := splitStoreData(curPtr).wdata
555  io.sqControl.control.wmask   := splitStoreData(curPtr).wmask
556  // the paddr and vaddr is not corresponding to the exact addr of
557  io.sqControl.control.paddr   := splitStoreResp(curPtr).paddr
558  io.sqControl.control.vaddr   := splitStoreResp(curPtr).vaddr
559  io.sqControl.control.last    := !((unWriteStores & ~UIntToOH(curPtr)).orR)
560
561  when (bufferState === s_sq_req) {
562    when (io.sqControl.storeInfo.completeSbTrans) {
563      unWriteStores := unWriteStores & ~UIntToOH(curPtr)
564      curPtr := curPtr + 1.U
565    }
566  }
567  io.writeBack.valid := req_valid && (bufferState === s_wb) && io.sqControl.storeInfo.dataReady
568  io.writeBack.bits.uop := req.uop
569  io.writeBack.bits.uop.exceptionVec := ExceptionNO.selectByFu(Mux(
570    globalMMIO || globalException,
571    splitStoreResp(curPtr).uop.exceptionVec,
572    0.U.asTypeOf(ExceptionVec()) // TODO: is this ok?
573  ), StaCfg)
574  io.writeBack.bits.uop.flushPipe := Mux(globalMMIO || globalException, false.B, true.B)
575  io.writeBack.bits.uop.replayInst := false.B
576  io.writeBack.bits.data := unalignedStoreData
577  io.writeBack.bits.isFromLoadUnit := DontCare
578  io.writeBack.bits.debug.isMMIO := globalMMIO
579  io.writeBack.bits.debug.isPerfCnt := false.B
580  io.writeBack.bits.debug.paddr := req.paddr
581  io.writeBack.bits.debug.vaddr := req.vaddr
582
583  io.sqControl.control.removeSq := req_valid && (bufferState === s_wait) && !(globalMMIO || globalException) && (io.rob.scommit =/= 0.U)
584
585  val flush = req_valid && req.uop.robIdx.needFlush(io.redirect)
586
587  when (flush && (bufferState =/= s_idle)) {
588    bufferState := s_idle
589    req_valid := false.B
590    curPtr := 0.U
591    unSentStores := 0.U
592    unWriteStores := 0.U
593    globalException := false.B
594    globalMMIO := false.B
595  }
596
597  // NOTE: spectial case (unaligned store cross page, page fault happens in next page)
598  // if exception happens in the higher page address part, overwrite the storeExceptionBuffer vaddr
599  val shouldOverwrite = req_valid && cross16BytesBoundary && globalException && (curPtr === 1.U)
600  val overwriteExpBuf = GatedValidRegNext(shouldOverwrite)
601  val overwriteVaddr = RegEnable(splitStoreResp(curPtr).vaddr, shouldOverwrite)
602  val overwriteIsHyper = RegEnable(splitStoreResp(curPtr).isHyper, shouldOverwrite)
603  val overwriteGpaddr = RegEnable(splitStoreResp(curPtr).gpaddr, shouldOverwrite)
604  val overwriteIsForVSnonLeafPTE = RegEnable(splitStoreResp(curPtr).isForVSnonLeafPTE, shouldOverwrite)
605
606  io.overwriteExpBuf.valid := overwriteExpBuf
607  io.overwriteExpBuf.vaddr := overwriteVaddr
608  io.overwriteExpBuf.isHyper := overwriteIsHyper
609  io.overwriteExpBuf.gpaddr := overwriteGpaddr
610  io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE
611
612  XSPerfAccumulate("alloc",                  RegNext(!req_valid) && req_valid)
613  XSPerfAccumulate("flush",                  flush)
614  XSPerfAccumulate("flush_idle",             flush && (bufferState === s_idle))
615  XSPerfAccumulate("flush_non_idle",         flush && (bufferState =/= s_idle))
616}