xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VMergeBuffer.scala (revision c41f725a91c55e75c95c55b4bb0d2649f43e4c83)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.diplomacy.BufferParams
23import utils._
24import utility._
25import xiangshan._
26import xiangshan.ExceptionNO._
27import xiangshan.backend.rob.RobPtr
28import xiangshan.backend.Bundles._
29import xiangshan.backend.fu.FuType
30import xiangshan.backend.fu.FuConfig._
31import xiangshan.backend.datapath.NewPipelineConnect
32import xiangshan.backend.fu.vector.Bundles.VType
33import xiangshan.mem._
34import xiangshan.mem.Bundles._
35
36class MBufferBundle(implicit p: Parameters) extends VLSUBundle{
37  val data             = UInt(VLEN.W)
38  val mask             = UInt(VLENB.W)
39  val flowNum          = UInt(flowIdxBits.W)
40  val exceptionVec     = ExceptionVec()
41  val uop              = new DynInst
42  // val vdOffset         = UInt(vOffsetBits.W)
43  val sourceType       = VSFQFeedbackType()
44  val flushState       = Bool()
45  val vdIdx            = UInt(3.W)
46  val elemIdx          = UInt(elemIdxBits.W) // element index
47  // for exception
48  val vstart           = UInt(elemIdxBits.W)
49  val vl               = UInt(elemIdxBits.W)
50  val vaNeedExt        = Bool()
51  val vaddr            = UInt(XLEN.W)
52  val gpaddr           = UInt(GPAddrBits.W)
53  val isForVSnonLeafPTE= Bool()
54  val fof              = Bool()
55  val vlmax            = UInt(elemIdxBits.W)
56
57  def allReady(): Bool = (flowNum === 0.U)
58}
59
60abstract class BaseVMergeBuffer(isVStore: Boolean=false)(implicit p: Parameters) extends VLSUModule{
61  val io = IO(new VMergeBufferIO(isVStore))
62
63  // freeliset: store valid entries index.
64  // +---+---+--------------+-----+-----+
65  // | 0 | 1 |      ......  | n-2 | n-1 |
66  // +---+---+--------------+-----+-----+
67  val freeList: FreeList
68  val uopSize: Int
69  val enqWidth = io.fromSplit.length
70  val deqWidth = io.uopWriteback.length
71  val pipeWidth = io.fromPipeline.length
72  lazy val fuCfg = if (isVStore) VstuCfg else VlduCfg
73
74  def EnqConnect(source: MergeBufferReq, sink: MBufferBundle) = {
75    sink.data         := source.data
76    sink.mask         := source.mask
77    sink.flowNum      := source.flowNum
78    sink.exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(ExceptionVec()), fuCfg)
79    sink.uop          := source.uop
80    sink.sourceType   := 0.U.asTypeOf(VSFQFeedbackType())
81    sink.flushState   := false.B
82    sink.vdIdx        := source.vdIdx
83    sink.elemIdx      := Fill(elemIdxBits, 1.U)
84    sink.fof          := source.fof
85    sink.vlmax        := source.vlmax
86    sink.vl           := source.uop.vpu.vl
87    sink.vaddr        := source.vaddr
88    sink.vstart       := 0.U
89  }
90  def DeqConnect(source: MBufferBundle): MemExuOutput = {
91    val sink               = WireInit(0.U.asTypeOf(new MemExuOutput(isVector = true)))
92    sink.data             := source.data
93    sink.mask.get         := source.mask
94    sink.uop              := source.uop
95    sink.uop.exceptionVec := ExceptionNO.selectByFu(source.exceptionVec, fuCfg)
96    sink.uop.vpu.vmask    := source.mask
97    sink.debug            := 0.U.asTypeOf(new DebugBundle)
98    sink.vdIdxInField.get := source.vdIdx // Mgu needs to use this.
99    sink.vdIdx.get        := source.vdIdx
100    sink.uop.vpu.vstart   := source.vstart
101    sink.uop.vpu.vl       := source.vl
102    sink
103  }
104  def ToLsqConnect(source: MBufferBundle): FeedbackToLsqIO = {
105    val sink                                 = WireInit(0.U.asTypeOf(new FeedbackToLsqIO))
106    val hasExp                               = ExceptionNO.selectByFu(source.exceptionVec, fuCfg).asUInt.orR
107    sink.robidx                             := source.uop.robIdx
108    sink.uopidx                             := source.uop.uopIdx
109    sink.feedback(VecFeedbacks.COMMIT)      := !hasExp
110    sink.feedback(VecFeedbacks.FLUSH)       := hasExp
111    sink.feedback(VecFeedbacks.LAST)        := true.B
112    sink.vstart                             := source.vstart // TODO: if lsq need vl for fof?
113    sink.vaddr                              := source.vaddr
114    sink.vaNeedExt                          := source.vaNeedExt
115    sink.gpaddr                             := source.gpaddr
116    sink.isForVSnonLeafPTE                  := source.isForVSnonLeafPTE
117    sink.vl                                 := source.vl
118    sink.exceptionVec                       := ExceptionNO.selectByFu(source.exceptionVec, fuCfg)
119    sink
120  }
121
122
123  val entries      = Reg(Vec(uopSize, new MBufferBundle))
124  val needCancel   = WireInit(VecInit(Seq.fill(uopSize)(false.B)))
125  val allocated    = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
126  val freeMaskVec  = WireInit(VecInit(Seq.fill(uopSize)(false.B)))
127  val uopFinish    = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
128  val needRSReplay = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
129  // enq, from splitPipeline
130  // val allowEnqueue =
131  val cancelEnq    = io.fromSplit.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
132  val canEnqueue   = io.fromSplit.map(_.req.valid)
133  val needEnqueue  = (0 until enqWidth).map{i =>
134    canEnqueue(i) && !cancelEnq(i)
135  }
136
137  val freeCount    = uopSize.U - freeList.io.validCount
138
139  for ((enq, i) <- io.fromSplit.zipWithIndex){
140    freeList.io.doAllocate(i) := false.B
141
142    freeList.io.allocateReq(i) := true.B
143
144    val offset    = PopCount(needEnqueue.take(i))
145    val canAccept = freeList.io.canAllocate(offset)
146    val enqIndex  = freeList.io.allocateSlot(offset)
147    enq.req.ready := freeCount >= (i + 1).U // for better timing
148
149    when(needEnqueue(i) && enq.req.ready){
150      freeList.io.doAllocate(i) := true.B
151      // enqueue
152      allocated(enqIndex)       := true.B
153      uopFinish(enqIndex)       := false.B
154      needRSReplay(enqIndex)    := false.B
155
156      EnqConnect(enq.req.bits, entries(enqIndex))// initial entry
157    }
158
159    enq.resp.bits.mBIndex := enqIndex
160    enq.resp.bits.fail    := false.B
161    enq.resp.valid        := freeCount >= (i + 1).U // for better timing
162  }
163
164  //redirect
165  for (i <- 0 until uopSize){
166    needCancel(i) := entries(i).uop.robIdx.needFlush(io.redirect) && allocated(i)
167    when (needCancel(i)) {
168      allocated(i)   := false.B
169      freeMaskVec(i) := true.B
170      uopFinish(i)   := false.B
171      needRSReplay(i):= false.B
172    }
173  }
174  freeList.io.free := freeMaskVec.asUInt
175  //pipelineWriteback
176  // handle the situation where multiple ports are going to write the same uop queue entry
177  // select the oldest exception and count the flownum of the pipeline writeback.
178  val mergePortMatrix        = Wire(Vec(pipeWidth, Vec(pipeWidth, Bool())))
179  val mergePortMatrixHasExcp = Wire(Vec(pipeWidth, Vec(pipeWidth, Bool())))
180  val mergedByPrevPortVec    = Wire(Vec(pipeWidth, Bool()))
181  (0 until pipeWidth).map{case i => (0 until pipeWidth).map{case j =>
182    val mergePortValid = (j == i).B ||
183      (j > i).B &&
184      io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex &&
185      io.fromPipeline(j).valid
186
187    mergePortMatrix(i)(j)        := mergePortValid
188    mergePortMatrixHasExcp(i)(j) := mergePortValid && io.fromPipeline(j).bits.hasException
189  }}
190  (0 until pipeWidth).map{case i =>
191    mergedByPrevPortVec(i) := (i != 0).B && Cat((0 until i).map(j =>
192      io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex &&
193      io.fromPipeline(j).valid)).orR
194  }
195
196  val mergePortMatrixWrap        = if(isVStore) mergePortMatrix else RegNext(mergePortMatrix)
197  val mergePortMatrixHasExcpWrap = if(isVStore) mergePortMatrixHasExcp else RegNext(mergePortMatrixHasExcp)
198  val mergedByPrevPortVecWrap    = if(isVStore) mergedByPrevPortVec else RegNext(mergedByPrevPortVec)
199  if (backendParams.debugEn){
200    dontTouch(mergePortMatrix)
201    dontTouch(mergePortMatrixHasExcp)
202    dontTouch(mergedByPrevPortVec)
203  }
204
205  // for exception, select exception, when multi port writeback exception, we need select oldest one
206  def selectOldest[T <: VecPipelineFeedbackIO](valid: Seq[Bool], bits: Seq[T], sel: Seq[UInt]): (Seq[Bool], Seq[T], Seq[UInt]) = {
207    assert(valid.length == bits.length)
208    assert(valid.length == sel.length)
209    if (valid.length == 0 || valid.length == 1) {
210      (valid, bits, sel)
211    } else if (valid.length == 2) {
212      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
213      for (i <- res.indices) {
214        res(i).valid := valid(i)
215        res(i).bits := bits(i)
216      }
217      val oldest = Mux(valid(0) && valid(1),
218        Mux(sel(0) < sel(1),
219            res(0), res(1)),
220        Mux(valid(0) && !valid(1), res(0), res(1)))
221
222      val oldidx = Mux(valid(0) && valid(1),
223        Mux(sel(0) < sel(1),
224          sel(0), sel(1)),
225        Mux(valid(0) && !valid(1), sel(0), sel(1)))
226      (Seq(oldest.valid), Seq(oldest.bits), Seq(oldidx))
227    } else {
228      val left  = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2), sel.take(sel.length / 2))
229      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)), sel.takeRight(sel.length - (sel.length / 2)))
230      selectOldest(left._1 ++ right._1, left._2 ++ right._2, left._3 ++ right._3)
231    }
232  }
233
234  val pipeValid        = io.fromPipeline.map(_.valid)
235  val pipeBits         = io.fromPipeline.map(_.bits)
236  val pipeValidReg     = io.fromPipeline.map(x => RegNext(x.valid))
237  val pipeBitsReg      = io.fromPipeline.map(x => RegEnable(x.bits, x.valid))
238  val wbElemIdx        = pipeBits.map(_.elemIdx)
239  val wbMbIndex        = pipeBits.map(_.mBIndex)
240  val wbElemIdxInField = wbElemIdx.zip(wbMbIndex).map{x =>
241    val elemIdxInField = x._1 & (entries(x._2).vlmax - 1.U)
242    if(isVStore) elemIdxInField else RegNext(elemIdxInField)
243  }
244  val selBits          = if(isVStore) pipeBits else pipeBitsReg
245
246  // this port have exception or merged port have exception
247  val portHasExcp       = mergePortMatrixHasExcpWrap.map{_.reduce(_ || _)}
248
249  for(i <- io.fromPipeline.indices){
250    val pipewbvalid         = if(isVStore) pipeValid(i) else pipeValidReg(i)
251    val pipewb              = if(isVStore) pipeBits(i)  else pipeBitsReg(i)
252    val pipeWbMbIndex       = pipewb.mBIndex
253    val entry               = entries(pipeWbMbIndex)
254    val entryVeew           = entry.uop.vpu.veew
255    val entryIsUS           = LSUOpType.isAllUS(entry.uop.fuOpType)
256    val entryHasException   = ExceptionNO.selectByFu(entry.exceptionVec, fuCfg).asUInt.orR
257    val entryExcp           = entryHasException && entry.mask.orR
258    val entryVaddr          = entry.vaddr
259    val entryVstart         = entry.vstart
260    val entryElemIdx        = entry.elemIdx
261
262    val sel                    = selectOldest(mergePortMatrixHasExcpWrap(i), selBits, wbElemIdxInField)
263    val selPort                = sel._2
264    val selElemInfield         = selPort(0).elemIdx & (entries(pipeWbMbIndex).vlmax - 1.U)
265    val selExceptionVec        = selPort(0).exceptionVec
266    val selVaddr               = selPort(0).vaddr
267    val selElemIdx             = selPort(0).elemIdx
268
269    val isUSFirstUop           = !selPort(0).elemIdx.orR
270    // Only the first unaligned uop of unit-stride needs to be offset.
271    // When unaligned, the lowest bit of mask is 0.
272    //  example: 16'b1111_1111_1111_0000
273    val firstUnmask            = genVFirstUnmask(selPort(0).mask).asUInt
274    val vaddrOffset            = Mux(entryIsUS, firstUnmask, 0.U)
275    val vaddr                  = selVaddr + vaddrOffset
276    val vstart                 = Mux(entryIsUS, selPort(0).vstart, selElemInfield)
277
278    // select oldest port to raise exception
279    when((((entryElemIdx >= selElemIdx) && entryExcp && portHasExcp(i)) || (!entryExcp && portHasExcp(i))) && pipewbvalid && !mergedByPrevPortVecWrap(i)) {
280      entry.uop.trigger     := selPort(0).trigger
281      entry.elemIdx         := selElemIdx
282      when(!entry.fof || vstart === 0.U){
283        // For fof loads, if element 0 raises an exception, vl is not modified, and the trap is taken.
284        entry.vstart       := vstart
285        entry.exceptionVec := ExceptionNO.selectByFu(selExceptionVec, fuCfg)
286        entry.vaddr        := vaddr
287        entry.vaNeedExt    := selPort(0).vaNeedExt
288        entry.gpaddr       := selPort(0).gpaddr
289        entry.isForVSnonLeafPTE := selPort(0).isForVSnonLeafPTE
290      }.otherwise{
291        entry.uop.vpu.vta  := VType.tu
292        entry.vl           := Mux(entry.vl < vstart, entry.vl, vstart)
293      }
294    }
295  }
296
297  // for pipeline writeback
298  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
299    val wbIndex          = pipewb.bits.mBIndex
300    val flowNumOffset    = PopCount(mergePortMatrix(i))
301    val sourceTypeNext   = entries(wbIndex).sourceType | pipewb.bits.sourceType
302    val hasExp           = ExceptionNO.selectByFu(pipewb.bits.exceptionVec, fuCfg).asUInt.orR
303
304    // if is VLoad, need latch 1 cycle to merge data. only flowNum and wbIndex need to latch
305    val latchWbValid     = if(isVStore) pipewb.valid else RegNext(pipewb.valid)
306    val latchWbIndex     = if(isVStore) wbIndex      else RegEnable(wbIndex, pipewb.valid)
307    val latchFlowNum     = if(isVStore) flowNumOffset else RegEnable(flowNumOffset, pipewb.valid)
308    val latchMergeByPre  = if(isVStore) mergedByPrevPortVec(i) else RegEnable(mergedByPrevPortVec(i), pipewb.valid)
309    when(latchWbValid && !latchMergeByPre){
310      entries(latchWbIndex).flowNum := entries(latchWbIndex).flowNum - latchFlowNum
311    }
312
313    when(pipewb.valid){
314      entries(wbIndex).sourceType   := sourceTypeNext
315      entries(wbIndex).flushState   := pipewb.bits.flushState
316    }
317    when(pipewb.valid && !pipewb.bits.hit){
318      needRSReplay(wbIndex) := true.B
319    }
320    pipewb.ready := true.B
321    XSError((entries(latchWbIndex).flowNum - latchFlowNum > entries(latchWbIndex).flowNum) && latchWbValid && !latchMergeByPre, s"entry: $latchWbIndex, FlowWriteback overflow!!\n")
322    XSError(!allocated(latchWbIndex) && latchWbValid, s"entry: $latchWbIndex, Writeback error flow!!\n")
323  }
324
325  //uopwriteback(deq)
326  for (i <- 0 until uopSize){
327    when(allocated(i) && entries(i).allReady() && !needCancel(i)){
328      uopFinish(i) := true.B
329    }
330  }
331   val selPolicy = SelectOne("circ", uopFinish, deqWidth) // select one entry to deq
332   private val pipelineOut              = Wire(Vec(deqWidth, DecoupledIO(new MemExuOutput(isVector = true))))
333   private val writeBackOut             = Wire(Vec(deqWidth, DecoupledIO(new MemExuOutput(isVector = true))))
334   private val writeBackOutExceptionVec = writeBackOut.map(_.bits.uop.exceptionVec)
335   for(((port, lsqport), i) <- (pipelineOut zip io.toLsq).zipWithIndex){
336    val canGo    = port.ready
337    val (selValid, selOHVec) = selPolicy.getNthOH(i + 1)
338    val entryIdx = OHToUInt(selOHVec)
339    val selEntry = entries(entryIdx)
340    val selAllocated = allocated(entryIdx)
341    val selFire  = selValid && canGo
342    when(selFire){
343      freeMaskVec(entryIdx) := selAllocated
344      allocated(entryIdx)   := false.B
345      uopFinish(entryIdx)   := false.B
346      needRSReplay(entryIdx):= false.B
347    }
348    //writeback connect
349    port.valid   := selFire && selAllocated && !needRSReplay(entryIdx) && !selEntry.uop.robIdx.needFlush(io.redirect)
350    port.bits    := DeqConnect(selEntry)
351    //to lsq
352    lsqport.bits := ToLsqConnect(selEntry) // when uopwriteback, free MBuffer entry, write to lsq
353    lsqport.valid:= selFire && selAllocated && !needRSReplay(entryIdx)
354    //to RS
355    val feedbackOut                       = WireInit(0.U.asTypeOf(io.feedback(i).bits)).suggestName(s"feedbackOut_${i}")
356    val feedbackValid                     = selFire && selAllocated
357    feedbackOut.hit                      := !needRSReplay(entryIdx)
358    feedbackOut.robIdx                   := selEntry.uop.robIdx
359    feedbackOut.sourceType               := selEntry.sourceType
360    feedbackOut.flushState               := selEntry.flushState
361    feedbackOut.dataInvalidSqIdx         := DontCare
362    feedbackOut.sqIdx                    := selEntry.uop.sqIdx
363    feedbackOut.lqIdx                    := selEntry.uop.lqIdx
364
365    io.feedback(i).valid                 := RegNext(feedbackValid)
366    io.feedback(i).bits                  := RegEnable(feedbackOut, feedbackValid)
367
368    NewPipelineConnect(
369      port, writeBackOut(i), writeBackOut(i).fire,
370      Mux(port.fire,
371        selEntry.uop.robIdx.needFlush(io.redirect),
372        writeBackOut(i).bits.uop.robIdx.needFlush(io.redirect)),
373      Option(s"VMergebufferPipelineConnect${i}")
374    )
375     io.uopWriteback(i)                  <> writeBackOut(i)
376     io.uopWriteback(i).bits.uop.exceptionVec := ExceptionNO.selectByFu(writeBackOutExceptionVec(i), fuCfg)
377   }
378
379  QueuePerf(uopSize, freeList.io.validCount, freeList.io.validCount === 0.U)
380}
381
382class VLMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=false){
383  override lazy val uopSize = VlMergeBufferSize
384  println(s"VLMergeBuffer Size: ${VlMergeBufferSize}")
385  override lazy val freeList = Module(new FreeList(
386    size = uopSize,
387    allocWidth = VecLoadPipelineWidth,
388    freeWidth = deqWidth,
389    enablePreAlloc = false,
390    moduleName = "VLoad MergeBuffer freelist"
391  ))
392  io.toSplit.get.threshold := freeCount <= 6.U
393
394  //merge data
395  val flowWbElemIdx     = Wire(Vec(pipeWidth, UInt(elemIdxBits.W)))
396  val flowWbElemIdxInVd = Wire(Vec(pipeWidth, UInt(elemIdxBits.W)))
397  val pipewbValidReg    = Wire(Vec(pipeWidth, Bool()))
398  val wbIndexReg        = Wire(Vec(pipeWidth, UInt(vlmBindexBits.W)))
399  val mergeDataReg      = Wire(Vec(pipeWidth, UInt(VLEN.W)))
400
401  val maskWithexceptionMask = io.fromPipeline.map{ x=>
402    Mux(
403      TriggerAction.isExp(x.bits.trigger) || TriggerAction.isDmode(x.bits.trigger),
404      ~x.bits.vecTriggerMask,
405      Fill(x.bits.mask.getWidth, !ExceptionNO.selectByFuAndUnSelect(x.bits.exceptionVec, fuCfg, Seq(breakPoint)).asUInt.orR)
406    ).asUInt & x.bits.mask
407  }
408
409  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
410    /** step0 **/
411    val wbIndex = pipewb.bits.mBIndex
412    val alignedType = pipewb.bits.alignedType
413    val elemIdxInsideVd = pipewb.bits.elemIdxInsideVd
414    flowWbElemIdx(i) := pipewb.bits.elemIdx
415    flowWbElemIdxInVd(i) := elemIdxInsideVd.get
416
417    val oldData = PriorityMux(Seq(
418      (pipewbValidReg(0) && (wbIndexReg(0) === wbIndex)) -> mergeDataReg(0),
419      (pipewbValidReg(1) && (wbIndexReg(1) === wbIndex)) -> mergeDataReg(1),
420      (pipewbValidReg(2) && (wbIndexReg(2) === wbIndex)) -> mergeDataReg(2),
421      true.B                                             -> entries(wbIndex).data // default use entries_data
422    ))
423    val mergedData = mergeDataWithElemIdx(
424      oldData = oldData,
425      newData = io.fromPipeline.map(_.bits.vecdata.get),
426      alignedType = alignedType(1,0),
427      elemIdx = flowWbElemIdxInVd,
428      valids = mergePortMatrix(i)
429    )
430    /* this only for unit-stride load data merge
431     * cycle0: broden 128-bits to 256-bits (max 6 to 1)
432     * cycle1: select 128-bits data from 256-bits (16 to 1)
433     */
434    val (brodenMergeData, brodenMergeMask)     = mergeDataByIndex(
435      data    = io.fromPipeline.map(_.bits.vecdata.get).drop(i),
436      mask    = maskWithexceptionMask.drop(i),
437      index   = io.fromPipeline(i).bits.elemIdxInsideVd.get,
438      valids  = mergePortMatrix(i).drop(i)
439    )
440    /** step1 **/
441    pipewbValidReg(i)      := RegNext(pipewb.valid)
442    wbIndexReg(i)          := RegEnable(wbIndex, pipewb.valid)
443    mergeDataReg(i)        := RegEnable(mergedData, pipewb.valid) // for not Unit-stride
444    val brodenMergeDataReg  = RegEnable(brodenMergeData, pipewb.valid) // only for Unit-stride
445    val brodenMergeMaskReg  = RegEnable(brodenMergeMask, pipewb.valid)
446    val mergedByPrevPortReg = RegEnable(mergedByPrevPortVec(i), pipewb.valid)
447    val regOffsetReg        = RegEnable(pipewb.bits.reg_offset.get, pipewb.valid) // only for Unit-stride
448    val isusMerge           = RegEnable(alignedType(2), pipewb.valid)
449
450    val usSelData           = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => getNoAlignedSlice(brodenMergeDataReg, i, 128)})
451    val usSelMask           = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => brodenMergeMaskReg(16 + i - 1, i)})
452    val usMergeData         = mergeDataByByte(entries(wbIndexReg(i)).data, usSelData, usSelMask)
453    when(pipewbValidReg(i) && !mergedByPrevPortReg){
454      entries(wbIndexReg(i)).data := Mux(isusMerge, usMergeData, mergeDataReg(i)) // if aligned(2) == 1, is Unit-Stride inst
455    }
456  }
457}
458
459class VSMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=true){
460  override lazy val uopSize = VsMergeBufferSize
461  println(s"VSMergeBuffer Size: ${VsMergeBufferSize}")
462  override lazy val freeList = Module(new FreeList(
463    size = uopSize,
464    allocWidth = VecStorePipelineWidth,
465    freeWidth = deqWidth,
466    enablePreAlloc = false,
467    moduleName = "VStore MergeBuffer freelist"
468  ))
469  override def DeqConnect(source: MBufferBundle): MemExuOutput = {
470    val sink               = Wire(new MemExuOutput(isVector = true))
471    sink.data             := DontCare
472    sink.mask.get         := DontCare
473    sink.uop              := source.uop
474    sink.uop.exceptionVec := source.exceptionVec
475    sink.debug            := 0.U.asTypeOf(new DebugBundle)
476    sink.vdIdxInField.get := DontCare
477    sink.vdIdx.get        := DontCare
478    sink.isFromLoadUnit   := DontCare
479    sink.uop.vpu.vstart   := source.vstart
480    sink
481  }
482
483  // from misalignBuffer flush
484  when(io.fromMisalignBuffer.get.flush){
485    needRSReplay(io.fromMisalignBuffer.get.mbIndex) := true.B
486  }
487}
488