xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/StoreMisalignBuffer.scala (revision 491c16ade93d4956fec6dde187943d72bb010bc4)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.fu.FuConfig._
26import xiangshan.backend.fu.fpu.FPU
27import xiangshan.backend.rob.RobLsqIO
28import xiangshan.cache._
29import xiangshan.frontend.FtqPtr
30import xiangshan.ExceptionNO._
31import xiangshan.cache.wpu.ReplayCarry
32import xiangshan.backend.rob.RobPtr
33import xiangshan.backend.Bundles._
34import xiangshan.backend.fu.FuConfig.StaCfg
35import xiangshan.backend.fu.FuType.isVStore
36
37class StoreMisalignBuffer(implicit p: Parameters) extends XSModule
38  with HasCircularQueuePtrHelper
39{
40  private val enqPortNum = StorePipelineWidth
41  private val maxSplitNum = 2
42
43  require(maxSplitNum == 2)
44
45  private val SB = "b00".U(2.W)
46  private val SH = "b01".U(2.W)
47  private val SW = "b10".U(2.W)
48  private val SD = "b11".U(2.W)
49
50  // encode of how many bytes to shift or truncate
51  private val BYTE0 = "b000".U(3.W)
52  private val BYTE1 = "b001".U(3.W)
53  private val BYTE2 = "b010".U(3.W)
54  private val BYTE3 = "b011".U(3.W)
55  private val BYTE4 = "b100".U(3.W)
56  private val BYTE5 = "b101".U(3.W)
57  private val BYTE6 = "b110".U(3.W)
58  private val BYTE7 = "b111".U(3.W)
59
60  def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List(
61    SB -> 0x1.U,
62    SH -> 0x3.U,
63    SW -> 0xf.U,
64    SD -> 0xff.U
65  ))
66
67  def selectOldest[T <: LsPipelineBundle](valid: Seq[Bool], bits: Seq[T], index: Seq[UInt]): (Seq[Bool], Seq[T], Seq[UInt]) = {
68    assert(valid.length == bits.length)
69    if (valid.length == 0 || valid.length == 1) {
70      (valid, bits, index)
71    } else if (valid.length == 2) {
72      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
73      val resIndex = Seq.fill(2)(Wire(chiselTypeOf(index(0))))
74      for (i <- res.indices) {
75        res(i).valid := valid(i)
76        res(i).bits := bits(i)
77        resIndex(i) := index(i)
78      }
79      val oldest = Mux(valid(0) && valid(1),
80        Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) ||
81          (isNotBefore(bits(0).uop.robIdx, bits(1).uop.robIdx) && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)),
82        Mux(valid(0) && !valid(1), res(0), res(1)))
83
84      val oldestIndex = Mux(valid(0) && valid(1),
85        Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) ||
86          (bits(0).uop.robIdx === bits(1).uop.robIdx && bits(0).uop.uopIdx > bits(1).uop.uopIdx), resIndex(1), resIndex(0)),
87        Mux(valid(0) && !valid(1), resIndex(0), resIndex(1)))
88      (Seq(oldest.valid), Seq(oldest.bits), Seq(oldestIndex))
89    } else {
90      val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2), index.take(index.length / 2))
91      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)), index.takeRight(index.length - (index.length / 2)))
92      selectOldest(left._1 ++ right._1, left._2 ++ right._2, left._3 ++ right._3)
93    }
94  }
95
96  val io = IO(new Bundle() {
97    val redirect        = Flipped(Valid(new Redirect))
98    val req             = Vec(enqPortNum, Flipped(Decoupled(new LsPipelineBundle)))
99    val rob             = Flipped(new RobLsqIO)
100    val splitStoreReq   = Decoupled(new LsPipelineBundle)
101    val splitStoreResp  = Flipped(Valid(new SqWriteBundle))
102    val writeBack       = Decoupled(new MemExuOutput)
103    val vecWriteBack    = Vec(VecStorePipelineWidth, Decoupled(new VecPipelineFeedbackIO(isVStore = true)))
104    val storeOutValid    = Input(Bool())
105    val storeVecOutValid = Input(Bool())
106    val overwriteExpBuf = Output(new XSBundle {
107      val valid = Bool()
108      val vaddr = UInt(XLEN.W)
109      val isHyper = Bool()
110      val gpaddr = UInt(XLEN.W)
111      val isForVSnonLeafPTE = Bool()
112    })
113    val sqControl       = new StoreMaBufToSqControlIO
114
115    val toVecStoreMergeBuffer = Vec(VecStorePipelineWidth, new StoreMaBufToVecStoreMergeBufferIO)
116    val full = Bool()
117  })
118
119  io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool()))
120  io.rob.uop  := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst))
121
122  class StoreMisalignBufferEntry(implicit p: Parameters) extends LsPipelineBundle {
123    val portIndex = UInt(log2Up(enqPortNum).W)
124  }
125  val req_valid = RegInit(false.B)
126  val req = Reg(new StoreMisalignBufferEntry)
127
128  val cross4KBPageBoundary = Wire(Bool())
129  val needFlushPipe = RegInit(false.B)
130
131  // buffer control:
132  //  - s_idle:  Idle
133  //  - s_split: Split miss-aligned store into aligned stores
134  //  - s_req:   Send split store to sta and get result from sta
135  //  - s_resp:  Responds to a split store access request
136  //  - s_wb:    writeback yo rob/vecMergeBuffer
137  //  - s_block: Wait for this instr to reach the head of Rob.
138  val s_idle :: s_split :: s_req :: s_resp :: s_wb :: s_block :: Nil = Enum(6)
139  val bufferState    = RegInit(s_idle)
140
141  // enqueue
142  // s1:
143  val s1_req = VecInit(io.req.map(_.bits))
144  val s1_valid = VecInit(io.req.map(x => x.valid))
145
146  val s1_index = (0 until io.req.length).map(_.asUInt)
147  val reqSel = selectOldest(s1_valid, s1_req, s1_index)
148
149  val reqSelValid = reqSel._1(0)
150  val reqSelBits  = reqSel._2(0)
151  val reqSelPort  = reqSel._3(0)
152
153  val reqRedirect = reqSelBits.uop.robIdx.needFlush(io.redirect)
154
155  val canEnq = !req_valid && !reqRedirect && reqSelValid
156  val robMatch = req_valid && io.rob.pendingst && (io.rob.pendingPtr === req.uop.robIdx)
157
158  when(canEnq) {
159    connectSamePort(req, reqSelBits)
160    req.portIndex := reqSelPort
161    req_valid := true.B
162  }
163  val cross4KBPageEnq = WireInit(false.B)
164  when (cross4KBPageBoundary && !reqRedirect) {
165    when(
166      reqSelValid &&
167      (isAfter(req.uop.robIdx, reqSelBits.uop.robIdx) || (isNotBefore(req.uop.robIdx, reqSelBits.uop.robIdx) && req.uop.uopIdx > reqSelBits.uop.uopIdx)) &&
168      bufferState === s_idle
169    ) {
170      connectSamePort(req, reqSelBits)
171      req.portIndex := reqSelPort
172      cross4KBPageEnq := true.B
173      needFlushPipe   := true.B
174    } .otherwise {
175      req := req
176      cross4KBPageEnq := false.B
177    }
178  }
179
180  val reqSelCanEnq = UIntToOH(reqSelPort)
181
182  io.req.zipWithIndex.map{
183    case (reqPort, index) => reqPort.ready := reqSelCanEnq(index) && (!req_valid || cross4KBPageBoundary && cross4KBPageEnq)
184  }
185
186  io.toVecStoreMergeBuffer.zipWithIndex.map{
187    case (toStMB, index) => {
188      toStMB.flush   := req_valid && cross4KBPageBoundary && cross4KBPageEnq && UIntToOH(req.portIndex)(index)
189      toStMB.mbIndex := req.mbIndex
190    }
191  }
192  io.full := req_valid
193
194  //logic
195  val splitStoreReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle))))
196  val splitStoreResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new SqWriteBundle))))
197  val isCrossPage    = RegInit(false.B)
198  val exceptionVec   = RegInit(0.U.asTypeOf(ExceptionVec()))
199  val unSentStores   = RegInit(0.U(maxSplitNum.W))
200  val unWriteStores  = RegInit(0.U(maxSplitNum.W))
201  val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W))
202
203  // if there is exception or mmio in split store
204  val globalException = RegInit(false.B)
205  val globalMMIO = RegInit(false.B)
206
207  val hasException = io.splitStoreResp.bits.vecActive && !io.splitStoreResp.bits.need_rep &&
208    ExceptionNO.selectByFu(io.splitStoreResp.bits.uop.exceptionVec, StaCfg).asUInt.orR || TriggerAction.isDmode(io.splitStoreResp.bits.uop.trigger)
209  val isMMIO = io.splitStoreResp.bits.mmio && !io.splitStoreResp.bits.need_rep
210
211  io.sqControl.toStoreQueue.crossPageWithHit := io.sqControl.toStoreMisalignBuffer.sqPtr === req.uop.sqIdx && isCrossPage
212  io.sqControl.toStoreQueue.crossPageCanDeq := !isCrossPage || bufferState === s_block
213  io.sqControl.toStoreQueue.paddr := Cat(splitStoreResp(1).paddr(splitStoreResp(1).paddr.getWidth - 1, 3), 0.U(3.W))
214
215  io.sqControl.toStoreQueue.withSameUop := io.sqControl.toStoreMisalignBuffer.uop.robIdx === req.uop.robIdx && io.sqControl.toStoreMisalignBuffer.uop.uopIdx === req.uop.uopIdx && req.isvec && robMatch && isCrossPage
216
217  //state transition
218  switch(bufferState) {
219    is (s_idle) {
220      when(cross4KBPageBoundary) {
221        when(robMatch) {
222          bufferState := s_split
223          isCrossPage := true.B
224        }
225      } .otherwise {
226        when (req_valid) {
227          bufferState := s_split
228          isCrossPage := false.B
229        }
230      }
231
232    }
233
234    is (s_split) {
235      bufferState := s_req
236    }
237
238    is (s_req) {
239      when (io.splitStoreReq.fire) {
240        bufferState := s_resp
241      }
242    }
243
244    is (s_resp) {
245      when (io.splitStoreResp.valid) {
246        val clearOh = UIntToOH(curPtr)
247        when (hasException || isMMIO) {
248          // commit directly when exception ocurs
249          // if any split store reaches mmio space, delegate to software storeAddrMisaligned exception
250          bufferState := s_wb
251          globalException := hasException
252          globalMMIO := isMMIO
253        } .elsewhen(io.splitStoreResp.bits.need_rep || (unSentStores & (~clearOh).asUInt).orR) {
254          // need replay or still has unsent requests
255          bufferState := s_req
256        } .otherwise {
257          // got result, goto calculate data and control sq
258          bufferState := s_wb
259        }
260      }
261    }
262
263    is (s_wb) {
264      when (req.isvec) {
265        when (io.vecWriteBack.map(x => x.fire).reduce( _ || _)) {
266          bufferState := s_idle
267          req_valid := false.B
268          curPtr := 0.U
269          unSentStores := 0.U
270          unWriteStores := 0.U
271          globalException := false.B
272          globalMMIO := false.B
273          isCrossPage := false.B
274          needFlushPipe := false.B
275        }
276      }
277      when (io.writeBack.fire && (!isCrossPage || globalMMIO || globalException)) {
278        bufferState := s_idle
279        req_valid := false.B
280        curPtr := 0.U
281        unSentStores := 0.U
282        unWriteStores := 0.U
283        globalException := false.B
284        globalMMIO := false.B
285        isCrossPage := false.B
286        needFlushPipe := false.B
287      } .elsewhen(io.writeBack.fire && isCrossPage) {
288        bufferState := s_block
289      } .otherwise {
290        bufferState := s_wb
291      }
292    }
293
294    is (s_block) {
295      when (io.sqControl.toStoreMisalignBuffer.doDeq) {
296        bufferState := s_idle
297        req_valid := false.B
298        curPtr := 0.U
299        unSentStores := 0.U
300        unWriteStores := 0.U
301        globalException := false.B
302        globalMMIO := false.B
303        isCrossPage := false.B
304      }
305    }
306  }
307
308  val alignedType = Mux(req.isvec, req.alignedType(1,0), req.uop.fuOpType(1, 0))
309
310  val highAddress = LookupTree(alignedType, List(
311    SB -> 0.U,
312    SH -> 1.U,
313    SW -> 3.U,
314    SD -> 7.U
315  )) + req.vaddr(4, 0)
316
317  val highPageAddress = LookupTree(alignedType, List(
318    SB -> 0.U,
319    SH -> 1.U,
320    SW -> 3.U,
321    SD -> 7.U
322  )) + req.vaddr(12, 0)
323  // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region
324  val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4))
325  cross4KBPageBoundary := req_valid && (highPageAddress(12) =/= req.vaddr(12))
326  val aligned16BytesAddr   = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U)
327  val aligned16BytesSel    = req.vaddr(3, 0)
328
329  // meta of 128 bit store
330  val new128Store = WireInit(0.U.asTypeOf(new LsPipelineBundle))
331  // meta of split loads
332  val lowAddrStore  = WireInit(0.U.asTypeOf(new LsPipelineBundle))
333  val highAddrStore = WireInit(0.U.asTypeOf(new LsPipelineBundle))
334  // final lowResult = Cat(`lowResultWidth` of store data, 0.U(make it to fill total length of Vlen))
335  val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from the store data
336  // final highResult = Zero extend to Vlen(`highResultWidth` of (store data >> lowResultWidth))
337  val highResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from the store data
338
339  when (bufferState === s_split) {
340    when (!cross16BytesBoundary) {
341      assert(false.B, s"There should be no non-aligned access that does not cross 16Byte boundaries.")
342    } .otherwise {
343      // split this unaligned store into `maxSplitNum` aligned stores
344      unWriteStores := Fill(maxSplitNum, 1.U(1.W))
345      unSentStores := Fill(maxSplitNum, 1.U(1.W))
346      curPtr := 0.U
347      lowAddrStore.uop := req.uop
348      lowAddrStore.uop.exceptionVec(storeAddrMisaligned) := false.B
349      highAddrStore.uop := req.uop
350      highAddrStore.uop.exceptionVec(storeAddrMisaligned) := false.B
351
352      switch (alignedType(1, 0)) {
353        is (SB) {
354          assert(false.B, "lb should not trigger miss align")
355        }
356
357        is (SH) {
358          lowAddrStore.uop.fuOpType := SB
359          lowAddrStore.vaddr := req.vaddr
360          lowAddrStore.mask  := 0x1.U << lowAddrStore.vaddr(3, 0)
361          lowResultWidth    := BYTE1
362
363          highAddrStore.uop.fuOpType := SB
364          highAddrStore.vaddr := req.vaddr + 1.U
365          highAddrStore.mask  := 0x1.U << highAddrStore.vaddr(3, 0)
366          highResultWidth    := BYTE1
367        }
368
369        is (SW) {
370          switch (req.vaddr(1, 0)) {
371            is ("b00".U) {
372              assert(false.B, "should not trigger miss align")
373            }
374
375            is ("b01".U) {
376              lowAddrStore.uop.fuOpType := SW
377              lowAddrStore.vaddr := req.vaddr - 1.U
378              lowAddrStore.mask  := 0xf.U << lowAddrStore.vaddr(3, 0)
379              lowResultWidth    := BYTE3
380
381              highAddrStore.uop.fuOpType := SB
382              highAddrStore.vaddr := req.vaddr + 3.U
383              highAddrStore.mask  := 0x1.U << highAddrStore.vaddr(3, 0)
384              highResultWidth    := BYTE1
385            }
386
387            is ("b10".U) {
388              lowAddrStore.uop.fuOpType := SH
389              lowAddrStore.vaddr := req.vaddr
390              lowAddrStore.mask  := 0x3.U << lowAddrStore.vaddr(3, 0)
391              lowResultWidth    := BYTE2
392
393              highAddrStore.uop.fuOpType := SH
394              highAddrStore.vaddr := req.vaddr + 2.U
395              highAddrStore.mask  := 0x3.U << highAddrStore.vaddr(3, 0)
396              highResultWidth    := BYTE2
397            }
398
399            is ("b11".U) {
400              lowAddrStore.uop.fuOpType := SB
401              lowAddrStore.vaddr := req.vaddr
402              lowAddrStore.mask  := 0x1.U << lowAddrStore.vaddr(3, 0)
403              lowResultWidth    := BYTE1
404
405              highAddrStore.uop.fuOpType := SW
406              highAddrStore.vaddr := req.vaddr + 1.U
407              highAddrStore.mask  := 0xf.U << highAddrStore.vaddr(3, 0)
408              highResultWidth    := BYTE3
409            }
410          }
411        }
412
413        is (SD) {
414          switch (req.vaddr(2, 0)) {
415            is ("b000".U) {
416              assert(false.B, "should not trigger miss align")
417            }
418
419            is ("b001".U) {
420              lowAddrStore.uop.fuOpType := SD
421              lowAddrStore.vaddr := req.vaddr - 1.U
422              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
423              lowResultWidth    := BYTE7
424
425              highAddrStore.uop.fuOpType := SB
426              highAddrStore.vaddr := req.vaddr + 7.U
427              highAddrStore.mask  := 0x1.U << highAddrStore.vaddr(3, 0)
428              highResultWidth    := BYTE1
429            }
430
431            is ("b010".U) {
432              lowAddrStore.uop.fuOpType := SD
433              lowAddrStore.vaddr := req.vaddr - 2.U
434              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
435              lowResultWidth    := BYTE6
436
437              highAddrStore.uop.fuOpType := SH
438              highAddrStore.vaddr := req.vaddr + 6.U
439              highAddrStore.mask  := 0x3.U << highAddrStore.vaddr(3, 0)
440              highResultWidth    := BYTE2
441            }
442
443            is ("b011".U) {
444              lowAddrStore.uop.fuOpType := SD
445              lowAddrStore.vaddr := req.vaddr - 3.U
446              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
447              lowResultWidth    := BYTE5
448
449              highAddrStore.uop.fuOpType := SW
450              highAddrStore.vaddr := req.vaddr + 5.U
451              highAddrStore.mask  := 0xf.U << highAddrStore.vaddr(3, 0)
452              highResultWidth    := BYTE3
453            }
454
455            is ("b100".U) {
456              lowAddrStore.uop.fuOpType := SW
457              lowAddrStore.vaddr := req.vaddr
458              lowAddrStore.mask  := 0xf.U << lowAddrStore.vaddr(3, 0)
459              lowResultWidth    := BYTE4
460
461              highAddrStore.uop.fuOpType := SW
462              highAddrStore.vaddr := req.vaddr + 4.U
463              highAddrStore.mask  := 0xf.U << highAddrStore.vaddr(3, 0)
464              highResultWidth    := BYTE4
465            }
466
467            is ("b101".U) {
468              lowAddrStore.uop.fuOpType := SD
469              lowAddrStore.vaddr := req.vaddr - 5.U
470              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
471              lowResultWidth    := BYTE3
472
473              highAddrStore.uop.fuOpType := SD
474              highAddrStore.vaddr := req.vaddr + 3.U
475              highAddrStore.mask  := 0xff.U << highAddrStore.vaddr(3, 0)
476              highResultWidth    := BYTE5
477            }
478
479            is ("b110".U) {
480              lowAddrStore.uop.fuOpType := SD
481              lowAddrStore.vaddr := req.vaddr - 6.U
482              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
483              lowResultWidth    := BYTE2
484
485              highAddrStore.uop.fuOpType := SD
486              highAddrStore.vaddr := req.vaddr + 2.U
487              highAddrStore.mask  := 0xff.U << highAddrStore.vaddr(3, 0)
488              highResultWidth    := BYTE6
489            }
490
491            is ("b111".U) {
492              lowAddrStore.uop.fuOpType := SD
493              lowAddrStore.vaddr := req.vaddr - 7.U
494              lowAddrStore.mask  := 0xff.U << lowAddrStore.vaddr(3, 0)
495              lowResultWidth    := BYTE1
496
497              highAddrStore.uop.fuOpType := SD
498              highAddrStore.vaddr := req.vaddr + 1.U
499              highAddrStore.mask  := 0xff.U << highAddrStore.vaddr(3, 0)
500              highResultWidth    := BYTE7
501            }
502          }
503        }
504      }
505
506      splitStoreReqs(0) := lowAddrStore
507      splitStoreReqs(1) := highAddrStore
508    }
509  }
510
511  io.splitStoreReq.valid := req_valid && (bufferState === s_req)
512  io.splitStoreReq.bits  := splitStoreReqs(curPtr)
513  io.splitStoreReq.bits.isvec  := req.isvec
514  // Restore the information of H extension store
515  // bit encoding: | hsv 1 | store 00 | size(2bit) |
516  val reqIsHsv  = LSUOpType.isHsv(req.uop.fuOpType)
517  io.splitStoreReq.bits.uop.fuOpType := Mux(req.isvec, req.uop.fuOpType, Cat(reqIsHsv, 0.U(2.W), splitStoreReqs(curPtr).uop.fuOpType(1, 0)))
518  io.splitStoreReq.bits.alignedType  := Mux(req.isvec, splitStoreReqs(curPtr).uop.fuOpType(1, 0), req.alignedType)
519  io.splitStoreReq.bits.isFinalSplit := curPtr(0)
520
521  when (io.splitStoreResp.valid) {
522    val resp = io.splitStoreResp.bits
523    splitStoreResp(curPtr) := io.splitStoreResp.bits
524    when (isMMIO) {
525      unWriteStores := 0.U
526      unSentStores := 0.U
527      exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(exceptionVec.cloneType), StaCfg)
528      // delegate to software
529      exceptionVec(storeAddrMisaligned) := true.B
530    } .elsewhen (hasException) {
531      unWriteStores := 0.U
532      unSentStores := 0.U
533      StaCfg.exceptionOut.map(no => exceptionVec(no) := exceptionVec(no) || resp.uop.exceptionVec(no))
534    } .elsewhen (!io.splitStoreResp.bits.need_rep) {
535      unSentStores := unSentStores & (~UIntToOH(curPtr)).asUInt
536      curPtr := curPtr + 1.U
537      exceptionVec := 0.U.asTypeOf(ExceptionVec())
538    }
539  }
540
541  val splitStoreData = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new XSBundle {
542    val wdata = UInt(VLEN.W)
543    val wmask = UInt((VLEN / 8).W)
544  }))))
545
546  val wmaskLow  = Wire(Vec(VLEN / 8, Bool()))
547  val wmaskHigh = Wire(Vec(VLEN / 8, Bool()))
548  (0 until (VLEN / 8)).map {
549    case i  => {
550      when (i.U < highResultWidth) {
551        wmaskHigh(i) := true.B
552      } .otherwise {
553        wmaskHigh(i) := false.B
554      }
555      when (i.U < lowResultWidth) {
556        wmaskLow(i) := true.B
557      } .otherwise {
558        wmaskLow(i) := false.B
559      }
560    }
561  }
562
563  io.writeBack.valid := req_valid && (bufferState === s_wb) && !io.storeOutValid && !req.isvec
564  io.writeBack.bits.uop := req.uop
565  io.writeBack.bits.uop.exceptionVec := DontCare
566  StaCfg.exceptionOut.map(no => io.writeBack.bits.uop.exceptionVec(no) := (globalMMIO || globalException) && exceptionVec(no))
567  io.writeBack.bits.uop.flushPipe := needFlushPipe
568  io.writeBack.bits.uop.replayInst := false.B
569  io.writeBack.bits.data := DontCare
570  io.writeBack.bits.isFromLoadUnit := DontCare
571  io.writeBack.bits.debug.isMMIO := globalMMIO
572  // FIXME lyq: temporarily set to false
573  io.writeBack.bits.debug.isNC := false.B
574  io.writeBack.bits.debug.isPerfCnt := false.B
575  io.writeBack.bits.debug.paddr := req.paddr
576  io.writeBack.bits.debug.vaddr := req.vaddr
577
578  io.vecWriteBack.zipWithIndex.map{
579    case (wb, index) => {
580      wb.valid := req_valid && (bufferState === s_wb) && req.isvec && !io.storeVecOutValid && UIntToOH(req.portIndex)(index)
581
582      wb.bits.mBIndex           := req.mbIndex
583      wb.bits.hit               := true.B
584      wb.bits.isvec             := true.B
585      wb.bits.sourceType        := RSFeedbackType.tlbMiss
586      wb.bits.flushState        := DontCare
587      wb.bits.trigger           := TriggerAction.None
588      wb.bits.mmio              := globalMMIO
589      wb.bits.exceptionVec      := ExceptionNO.selectByFu(exceptionVec, VstuCfg)
590      wb.bits.hasException      := globalException
591      wb.bits.usSecondInv       := req.usSecondInv
592      wb.bits.vecFeedback       := true.B
593      wb.bits.elemIdx           := req.elemIdx
594      wb.bits.alignedType       := req.alignedType
595      wb.bits.mask              := req.mask
596      wb.bits.vaddr             := req.vaddr
597      wb.bits.vaNeedExt         := req.vaNeedExt
598      wb.bits.gpaddr            := req.gpaddr
599      wb.bits.isForVSnonLeafPTE := req.isForVSnonLeafPTE
600      wb.bits.vstart            := req.uop.vpu.vstart
601      wb.bits.vecTriggerMask    := 0.U
602      wb.bits.nc                := false.B
603    }
604  }
605
606  val flush = req_valid && req.uop.robIdx.needFlush(io.redirect)
607
608  when (flush) {
609    bufferState := s_idle
610    req_valid := Mux(cross4KBPageEnq && cross4KBPageBoundary && !reqRedirect, req_valid, false.B)
611    curPtr := 0.U
612    unSentStores := 0.U
613    unWriteStores := 0.U
614    globalException := false.B
615    globalMMIO := false.B
616    isCrossPage := false.B
617    needFlushPipe := false.B
618  }
619
620  // NOTE: spectial case (unaligned store cross page, page fault happens in next page)
621  // if exception happens in the higher page address part, overwrite the storeExceptionBuffer vaddr
622  val shouldOverwrite = req_valid && cross16BytesBoundary && globalException && (curPtr === 1.U)
623  val overwriteExpBuf = GatedValidRegNext(shouldOverwrite)
624  val overwriteVaddr = RegEnable(splitStoreResp(curPtr).vaddr, shouldOverwrite)
625  val overwriteIsHyper = RegEnable(splitStoreResp(curPtr).isHyper, shouldOverwrite)
626  val overwriteGpaddr = RegEnable(splitStoreResp(curPtr).gpaddr, shouldOverwrite)
627  val overwriteIsForVSnonLeafPTE = RegEnable(splitStoreResp(curPtr).isForVSnonLeafPTE, shouldOverwrite)
628
629  //TODO In theory, there is no need to overwrite, but for now, the signal is retained in the code in this way.
630  // and the signal will be removed after sufficient verification.
631  io.overwriteExpBuf.valid := false.B
632  io.overwriteExpBuf.vaddr := overwriteVaddr
633  io.overwriteExpBuf.isHyper := overwriteIsHyper
634  io.overwriteExpBuf.gpaddr := overwriteGpaddr
635  io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE
636
637  XSPerfAccumulate("alloc",                  RegNext(!req_valid) && req_valid)
638  XSPerfAccumulate("flush",                  flush)
639  XSPerfAccumulate("flush_idle",             flush && (bufferState === s_idle))
640  XSPerfAccumulate("flush_non_idle",         flush && (bufferState =/= s_idle))
641}