xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VMergeBuffer.scala (revision 9e12e8edb26ee7dce62315a8f279ea9f61aa239d)
13952421bSweiding liu/***************************************************************************************
23952421bSweiding liu  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
33952421bSweiding liu  * Copyright (c) 2020-2021 Peng Cheng Laboratory
43952421bSweiding liu  *
53952421bSweiding liu  * XiangShan is licensed under Mulan PSL v2.
63952421bSweiding liu  * You can use this software according to the terms and conditions of the Mulan PSL v2.
73952421bSweiding liu  * You may obtain a copy of Mulan PSL v2 at:
83952421bSweiding liu  *          http://license.coscl.org.cn/MulanPSL2
93952421bSweiding liu  *
103952421bSweiding liu  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
113952421bSweiding liu  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
123952421bSweiding liu  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
133952421bSweiding liu  *
143952421bSweiding liu  * See the Mulan PSL v2 for more details.
153952421bSweiding liu  ***************************************************************************************/
163952421bSweiding liu
173952421bSweiding liupackage xiangshan.mem
183952421bSweiding liu
193952421bSweiding liuimport org.chipsalliance.cde.config.Parameters
203952421bSweiding liuimport chisel3._
213952421bSweiding liuimport chisel3.util._
22*9e12e8edScz4eimport freechips.rocketchip.diplomacy.BufferParams
233952421bSweiding liuimport utils._
243952421bSweiding liuimport utility._
253952421bSweiding liuimport xiangshan._
26*9e12e8edScz4eimport xiangshan.ExceptionNO._
273952421bSweiding liuimport xiangshan.backend.rob.RobPtr
283952421bSweiding liuimport xiangshan.backend.Bundles._
29b7618691Sweiding liuimport xiangshan.backend.fu.FuType
30f58495a5Sweiding liuimport xiangshan.backend.fu.FuConfig._
31f58495a5Sweiding liuimport xiangshan.backend.datapath.NewPipelineConnect
32785e3bfdSXuan Huimport xiangshan.backend.fu.vector.Bundles.VType
33*9e12e8edScz4eimport xiangshan.mem._
34*9e12e8edScz4eimport xiangshan.mem.Bundles._
353952421bSweiding liu
363952421bSweiding liuclass MBufferBundle(implicit p: Parameters) extends VLSUBundle{
373952421bSweiding liu  val data             = UInt(VLEN.W)
383952421bSweiding liu  val mask             = UInt(VLENB.W)
393952421bSweiding liu  val flowNum          = UInt(flowIdxBits.W)
403952421bSweiding liu  val exceptionVec     = ExceptionVec()
413952421bSweiding liu  val uop              = new DynInst
42b7618691Sweiding liu  // val vdOffset         = UInt(vOffsetBits.W)
43ebb914e7Sweiding liu  val sourceType       = VSFQFeedbackType()
44ebb914e7Sweiding liu  val flushState       = Bool()
452e1c1560Sweiding liu  val vdIdx            = UInt(3.W)
46c0355297SAnzooooo  val elemIdx          = UInt(elemIdxBits.W) // element index
4755178b77Sweiding liu  // for exception
4855178b77Sweiding liu  val vstart           = UInt(elemIdxBits.W)
4955178b77Sweiding liu  val vl               = UInt(elemIdxBits.W)
5046e9ee74SHaoyuan Feng  val vaNeedExt        = Bool()
5187b463aaSAnzo  val vaddr            = UInt(XLEN.W)
52a53daa0fSHaoyuan Feng  val gpaddr           = UInt(GPAddrBits.W)
53ad415ae0SXiaokun-Pei  val isForVSnonLeafPTE= Bool()
5455178b77Sweiding liu  val fof              = Bool()
5555178b77Sweiding liu  val vlmax            = UInt(elemIdxBits.W)
563952421bSweiding liu
573952421bSweiding liu  def allReady(): Bool = (flowNum === 0.U)
583952421bSweiding liu}
593952421bSweiding liu
603952421bSweiding liuabstract class BaseVMergeBuffer(isVStore: Boolean=false)(implicit p: Parameters) extends VLSUModule{
6126af847eSgood-circle  val io = IO(new VMergeBufferIO(isVStore))
623952421bSweiding liu
63102b377bSweiding liu  // freeliset: store valid entries index.
64102b377bSweiding liu  // +---+---+--------------+-----+-----+
65102b377bSweiding liu  // | 0 | 1 |      ......  | n-2 | n-1 |
66102b377bSweiding liu  // +---+---+--------------+-----+-----+
67102b377bSweiding liu  val freeList: FreeList
68102b377bSweiding liu  val uopSize: Int
69102b377bSweiding liu  val enqWidth = io.fromSplit.length
70102b377bSweiding liu  val deqWidth = io.uopWriteback.length
71102b377bSweiding liu  val pipeWidth = io.fromPipeline.length
72102b377bSweiding liu  lazy val fuCfg = if (isVStore) VstuCfg else VlduCfg
73102b377bSweiding liu
74828e88c8Sweiding liu  def EnqConnect(source: MergeBufferReq, sink: MBufferBundle) = {
753952421bSweiding liu    sink.data         := source.data
763952421bSweiding liu    sink.mask         := source.mask
773952421bSweiding liu    sink.flowNum      := source.flowNum
78102b377bSweiding liu    sink.exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(ExceptionVec()), fuCfg)
793952421bSweiding liu    sink.uop          := source.uop
80ebb914e7Sweiding liu    sink.sourceType   := 0.U.asTypeOf(VSFQFeedbackType())
81ebb914e7Sweiding liu    sink.flushState   := false.B
822e1c1560Sweiding liu    sink.vdIdx        := source.vdIdx
83c0355297SAnzooooo    sink.elemIdx      := Fill(elemIdxBits, 1.U)
8455178b77Sweiding liu    sink.fof          := source.fof
8555178b77Sweiding liu    sink.vlmax        := source.vlmax
8655178b77Sweiding liu    sink.vl           := source.uop.vpu.vl
87c0355297SAnzooooo    sink.vaddr        := source.vaddr
8855178b77Sweiding liu    sink.vstart       := 0.U
893952421bSweiding liu  }
90b7618691Sweiding liu  def DeqConnect(source: MBufferBundle): MemExuOutput = {
91b7618691Sweiding liu    val sink               = WireInit(0.U.asTypeOf(new MemExuOutput(isVector = true)))
923952421bSweiding liu    sink.data             := source.data
933952421bSweiding liu    sink.mask.get         := source.mask
943952421bSweiding liu    sink.uop              := source.uop
95102b377bSweiding liu    sink.uop.exceptionVec := ExceptionNO.selectByFu(source.exceptionVec, fuCfg)
962e1c1560Sweiding liu    sink.uop.vpu.vmask    := source.mask
97b7618691Sweiding liu    sink.debug            := 0.U.asTypeOf(new DebugBundle)
9884286fdbSAnzooooo    sink.vdIdxInField.get := source.vdIdx // Mgu needs to use this.
992e1c1560Sweiding liu    sink.vdIdx.get        := source.vdIdx
10055178b77Sweiding liu    sink.uop.vpu.vstart   := source.vstart
10155178b77Sweiding liu    sink.uop.vpu.vl       := source.vl
102b7618691Sweiding liu    sink
1033952421bSweiding liu  }
104b7618691Sweiding liu  def ToLsqConnect(source: MBufferBundle): FeedbackToLsqIO = {
105b7618691Sweiding liu    val sink                                 = WireInit(0.U.asTypeOf(new FeedbackToLsqIO))
106102b377bSweiding liu    val hasExp                               = ExceptionNO.selectByFu(source.exceptionVec, fuCfg).asUInt.orR
1073952421bSweiding liu    sink.robidx                             := source.uop.robIdx
1083952421bSweiding liu    sink.uopidx                             := source.uop.uopIdx
10955178b77Sweiding liu    sink.feedback(VecFeedbacks.COMMIT)      := !hasExp
11055178b77Sweiding liu    sink.feedback(VecFeedbacks.FLUSH)       := hasExp
11126af847eSgood-circle    sink.feedback(VecFeedbacks.LAST)        := true.B
11255178b77Sweiding liu    sink.vstart                             := source.vstart // TODO: if lsq need vl for fof?
11355178b77Sweiding liu    sink.vaddr                              := source.vaddr
11446e9ee74SHaoyuan Feng    sink.vaNeedExt                          := source.vaNeedExt
115a53daa0fSHaoyuan Feng    sink.gpaddr                             := source.gpaddr
116ad415ae0SXiaokun-Pei    sink.isForVSnonLeafPTE                  := source.isForVSnonLeafPTE
11755178b77Sweiding liu    sink.vl                                 := source.vl
118102b377bSweiding liu    sink.exceptionVec                       := ExceptionNO.selectByFu(source.exceptionVec, fuCfg)
119b7618691Sweiding liu    sink
1203952421bSweiding liu  }
121102b377bSweiding liu
1223952421bSweiding liu
1233952421bSweiding liu  val entries      = Reg(Vec(uopSize, new MBufferBundle))
1243952421bSweiding liu  val needCancel   = WireInit(VecInit(Seq.fill(uopSize)(false.B)))
1253952421bSweiding liu  val allocated    = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
1263952421bSweiding liu  val freeMaskVec  = WireInit(VecInit(Seq.fill(uopSize)(false.B)))
1273952421bSweiding liu  val uopFinish    = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
128ebb914e7Sweiding liu  val needRSReplay = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
1293952421bSweiding liu  // enq, from splitPipeline
1303952421bSweiding liu  // val allowEnqueue =
1313952421bSweiding liu  val cancelEnq    = io.fromSplit.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
1323952421bSweiding liu  val canEnqueue   = io.fromSplit.map(_.req.valid)
1333952421bSweiding liu  val needEnqueue  = (0 until enqWidth).map{i =>
1343952421bSweiding liu    canEnqueue(i) && !cancelEnq(i)
1353952421bSweiding liu  }
1363952421bSweiding liu
1372037db4fSweiding liu  val freeCount    = uopSize.U - freeList.io.validCount
1382037db4fSweiding liu
1393952421bSweiding liu  for ((enq, i) <- io.fromSplit.zipWithIndex){
1403952421bSweiding liu    freeList.io.doAllocate(i) := false.B
1413952421bSweiding liu
1423952421bSweiding liu    freeList.io.allocateReq(i) := true.B
1433952421bSweiding liu
1443952421bSweiding liu    val offset    = PopCount(needEnqueue.take(i))
1453952421bSweiding liu    val canAccept = freeList.io.canAllocate(offset)
1463952421bSweiding liu    val enqIndex  = freeList.io.allocateSlot(offset)
1472037db4fSweiding liu    enq.req.ready := freeCount >= (i + 1).U // for better timing
1483952421bSweiding liu
1493952421bSweiding liu    when(needEnqueue(i) && enq.req.ready){
1503952421bSweiding liu      freeList.io.doAllocate(i) := true.B
1513952421bSweiding liu      // enqueue
1523952421bSweiding liu      allocated(enqIndex)       := true.B
1533952421bSweiding liu      uopFinish(enqIndex)       := false.B
154ebb914e7Sweiding liu      needRSReplay(enqIndex)    := false.B
15584ddb809Sweiding liu
156828e88c8Sweiding liu      EnqConnect(enq.req.bits, entries(enqIndex))// initial entry
15784ddb809Sweiding liu    }
1583952421bSweiding liu
1593952421bSweiding liu    enq.resp.bits.mBIndex := enqIndex
160b7618691Sweiding liu    enq.resp.bits.fail    := false.B
1612037db4fSweiding liu    enq.resp.valid        := freeCount >= (i + 1).U // for better timing
1623952421bSweiding liu  }
1633952421bSweiding liu
1643952421bSweiding liu  //redirect
1653952421bSweiding liu  for (i <- 0 until uopSize){
1663952421bSweiding liu    needCancel(i) := entries(i).uop.robIdx.needFlush(io.redirect) && allocated(i)
1673952421bSweiding liu    when (needCancel(i)) {
1683952421bSweiding liu      allocated(i)   := false.B
1693952421bSweiding liu      freeMaskVec(i) := true.B
1703952421bSweiding liu      uopFinish(i)   := false.B
171ebb914e7Sweiding liu      needRSReplay(i):= false.B
1723952421bSweiding liu    }
1733952421bSweiding liu  }
1743952421bSweiding liu  freeList.io.free := freeMaskVec.asUInt
1753952421bSweiding liu  //pipelineWriteback
176b5d66726Sweiding liu  // handle the situation where multiple ports are going to write the same uop queue entry
177df3b4b92SAnzooooo  // select the oldest exception and count the flownum of the pipeline writeback.
178b5d66726Sweiding liu  val mergePortMatrix        = Wire(Vec(pipeWidth, Vec(pipeWidth, Bool())))
179da51a7acSAnzo  val mergePortMatrixHasExcp = Wire(Vec(pipeWidth, Vec(pipeWidth, Bool())))
180b5d66726Sweiding liu  val mergedByPrevPortVec    = Wire(Vec(pipeWidth, Bool()))
181b5d66726Sweiding liu  (0 until pipeWidth).map{case i => (0 until pipeWidth).map{case j =>
182da51a7acSAnzo    val mergePortValid = (j == i).B ||
183b5d66726Sweiding liu      (j > i).B &&
184b5d66726Sweiding liu      io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex &&
185b5d66726Sweiding liu      io.fromPipeline(j).valid
186da51a7acSAnzo
187da51a7acSAnzo    mergePortMatrix(i)(j)        := mergePortValid
188da51a7acSAnzo    mergePortMatrixHasExcp(i)(j) := mergePortValid && io.fromPipeline(j).bits.hasException
189b5d66726Sweiding liu  }}
190b5d66726Sweiding liu  (0 until pipeWidth).map{case i =>
191b5d66726Sweiding liu    mergedByPrevPortVec(i) := (i != 0).B && Cat((0 until i).map(j =>
192b5d66726Sweiding liu      io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex &&
193b5d66726Sweiding liu      io.fromPipeline(j).valid)).orR
194b5d66726Sweiding liu  }
195189d8d00SAnzo
19616c2d8bbSAnzo  val mergePortMatrixWrap        = if(isVStore) mergePortMatrix else RegNext(mergePortMatrix)
19716c2d8bbSAnzo  val mergePortMatrixHasExcpWrap = if(isVStore) mergePortMatrixHasExcp else RegNext(mergePortMatrixHasExcp)
19816c2d8bbSAnzo  val mergedByPrevPortVecWrap    = if(isVStore) mergedByPrevPortVec else RegNext(mergedByPrevPortVec)
199189d8d00SAnzo  if (backendParams.debugEn){
200b5d66726Sweiding liu    dontTouch(mergePortMatrix)
20116c2d8bbSAnzo    dontTouch(mergePortMatrixHasExcp)
202b5d66726Sweiding liu    dontTouch(mergedByPrevPortVec)
203189d8d00SAnzo  }
20455178b77Sweiding liu
20555178b77Sweiding liu  // for exception, select exception, when multi port writeback exception, we need select oldest one
20655178b77Sweiding liu  def selectOldest[T <: VecPipelineFeedbackIO](valid: Seq[Bool], bits: Seq[T], sel: Seq[UInt]): (Seq[Bool], Seq[T], Seq[UInt]) = {
20755178b77Sweiding liu    assert(valid.length == bits.length)
20855178b77Sweiding liu    assert(valid.length == sel.length)
20955178b77Sweiding liu    if (valid.length == 0 || valid.length == 1) {
21055178b77Sweiding liu      (valid, bits, sel)
21155178b77Sweiding liu    } else if (valid.length == 2) {
21255178b77Sweiding liu      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
21355178b77Sweiding liu      for (i <- res.indices) {
21455178b77Sweiding liu        res(i).valid := valid(i)
21555178b77Sweiding liu        res(i).bits := bits(i)
21655178b77Sweiding liu      }
21755178b77Sweiding liu      val oldest = Mux(valid(0) && valid(1),
21855178b77Sweiding liu        Mux(sel(0) < sel(1),
21955178b77Sweiding liu            res(0), res(1)),
22055178b77Sweiding liu        Mux(valid(0) && !valid(1), res(0), res(1)))
22144b4df4fSAnzooooo
22244b4df4fSAnzooooo      val oldidx = Mux(valid(0) && valid(1),
22344b4df4fSAnzooooo        Mux(sel(0) < sel(1),
22444b4df4fSAnzooooo          sel(0), sel(1)),
22544b4df4fSAnzooooo        Mux(valid(0) && !valid(1), sel(0), sel(1)))
22644b4df4fSAnzooooo      (Seq(oldest.valid), Seq(oldest.bits), Seq(oldidx))
22755178b77Sweiding liu    } else {
22855178b77Sweiding liu      val left  = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2), sel.take(sel.length / 2))
22955178b77Sweiding liu      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)), sel.takeRight(sel.length - (sel.length / 2)))
23055178b77Sweiding liu      selectOldest(left._1 ++ right._1, left._2 ++ right._2, left._3 ++ right._3)
23155178b77Sweiding liu    }
23255178b77Sweiding liu  }
23355178b77Sweiding liu
23455178b77Sweiding liu  val pipeValid        = io.fromPipeline.map(_.valid)
235506ca2a3SAnzooooo  val pipeBits         = io.fromPipeline.map(_.bits)
23616c2d8bbSAnzo  val pipeValidReg     = io.fromPipeline.map(x => RegNext(x.valid))
23716c2d8bbSAnzo  val pipeBitsReg      = io.fromPipeline.map(x => RegEnable(x.bits, x.valid))
23855178b77Sweiding liu  val wbElemIdx        = pipeBits.map(_.elemIdx)
23955178b77Sweiding liu  val wbMbIndex        = pipeBits.map(_.mBIndex)
24016c2d8bbSAnzo  val wbElemIdxInField = wbElemIdx.zip(wbMbIndex).map{x =>
24116c2d8bbSAnzo    val elemIdxInField = x._1 & (entries(x._2).vlmax - 1.U)
24216c2d8bbSAnzo    if(isVStore) elemIdxInField else RegNext(elemIdxInField)
24316c2d8bbSAnzo  }
24416c2d8bbSAnzo  val selBits          = if(isVStore) pipeBits else pipeBitsReg
24555178b77Sweiding liu
246da51a7acSAnzo  // this port have exception or merged port have exception
24716c2d8bbSAnzo  val portHasExcp       = mergePortMatrixHasExcpWrap.map{_.reduce(_ || _)}
24855178b77Sweiding liu
24916c2d8bbSAnzo  for(i <- io.fromPipeline.indices){
25016c2d8bbSAnzo    val pipewbvalid         = if(isVStore) pipeValid(i) else pipeValidReg(i)
25116c2d8bbSAnzo    val pipewb              = if(isVStore) pipeBits(i)  else pipeBitsReg(i)
25216c2d8bbSAnzo    val pipeWbMbIndex       = pipewb.mBIndex
25316c2d8bbSAnzo    val entry               = entries(pipeWbMbIndex)
2545dc0f712SAnzooooo    val entryVeew           = entry.uop.vpu.veew
25579656063Slwd    val entryIsUS           = LSUOpType.isAllUS(entry.uop.fuOpType)
256c0355297SAnzooooo    val entryHasException   = ExceptionNO.selectByFu(entry.exceptionVec, fuCfg).asUInt.orR
257102b377bSweiding liu    val entryExcp           = entryHasException && entry.mask.orR
258c0355297SAnzooooo    val entryVaddr          = entry.vaddr
259c0355297SAnzooooo    val entryVstart         = entry.vstart
260c0355297SAnzooooo    val entryElemIdx        = entry.elemIdx
2615dc0f712SAnzooooo
26216c2d8bbSAnzo    val sel                    = selectOldest(mergePortMatrixHasExcpWrap(i), selBits, wbElemIdxInField)
263828e88c8Sweiding liu    val selPort                = sel._2
26416c2d8bbSAnzo    val selElemInfield         = selPort(0).elemIdx & (entries(pipeWbMbIndex).vlmax - 1.U)
265828e88c8Sweiding liu    val selExceptionVec        = selPort(0).exceptionVec
266c0355297SAnzooooo    val selVaddr               = selPort(0).vaddr
267c0355297SAnzooooo    val selElemIdx             = selPort(0).elemIdx
26855178b77Sweiding liu
2695dc0f712SAnzooooo    val isUSFirstUop           = !selPort(0).elemIdx.orR
27047986d36SAnzo    // Only the first unaligned uop of unit-stride needs to be offset.
27147986d36SAnzo    // When unaligned, the lowest bit of mask is 0.
27247986d36SAnzo    //  example: 16'b1111_1111_1111_0000
273c0355297SAnzooooo    val firstUnmask            = genVFirstUnmask(selPort(0).mask).asUInt
274c0355297SAnzooooo    val vaddrOffset            = Mux(entryIsUS, firstUnmask, 0.U)
275c0355297SAnzooooo    val vaddr                  = selVaddr + vaddrOffset
27641c5202dSAnzooooo    val vstart                 = Mux(entryIsUS, selPort(0).vstart, selElemInfield)
2775dc0f712SAnzooooo
2785dc0f712SAnzooooo    // select oldest port to raise exception
27916c2d8bbSAnzo    when((((entryElemIdx >= selElemIdx) && entryExcp && portHasExcp(i)) || (!entryExcp && portHasExcp(i))) && pipewbvalid && !mergedByPrevPortVecWrap(i)) {
280c0355297SAnzooooo      entry.uop.trigger     := selPort(0).trigger
281c0355297SAnzooooo      entry.elemIdx         := selElemIdx
282c0355297SAnzooooo      when(!entry.fof || vstart === 0.U){
28355178b77Sweiding liu        // For fof loads, if element 0 raises an exception, vl is not modified, and the trap is taken.
284c0355297SAnzooooo        entry.vstart       := vstart
285c0355297SAnzooooo        entry.exceptionVec := ExceptionNO.selectByFu(selExceptionVec, fuCfg)
286c0355297SAnzooooo        entry.vaddr        := vaddr
287c0355297SAnzooooo        entry.vaNeedExt    := selPort(0).vaNeedExt
288c0355297SAnzooooo        entry.gpaddr       := selPort(0).gpaddr
289c0355297SAnzooooo        entry.isForVSnonLeafPTE := selPort(0).isForVSnonLeafPTE
29055178b77Sweiding liu      }.otherwise{
291785e3bfdSXuan Hu        entry.uop.vpu.vta  := VType.tu
292cbbad3d9SAnzo        entry.vl           := Mux(entry.vl < vstart, entry.vl, vstart)
29355178b77Sweiding liu      }
29455178b77Sweiding liu    }
29555178b77Sweiding liu  }
29655178b77Sweiding liu
29755178b77Sweiding liu  // for pipeline writeback
298b5d66726Sweiding liu  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
2993952421bSweiding liu    val wbIndex          = pipewb.bits.mBIndex
3009f329f86SAnzo    val flowNumOffset    = PopCount(mergePortMatrix(i))
301ebb914e7Sweiding liu    val sourceTypeNext   = entries(wbIndex).sourceType | pipewb.bits.sourceType
302102b377bSweiding liu    val hasExp           = ExceptionNO.selectByFu(pipewb.bits.exceptionVec, fuCfg).asUInt.orR
303b5d66726Sweiding liu
304b5d66726Sweiding liu    // if is VLoad, need latch 1 cycle to merge data. only flowNum and wbIndex need to latch
305b5d66726Sweiding liu    val latchWbValid     = if(isVStore) pipewb.valid else RegNext(pipewb.valid)
306b5d66726Sweiding liu    val latchWbIndex     = if(isVStore) wbIndex      else RegEnable(wbIndex, pipewb.valid)
307b5d66726Sweiding liu    val latchFlowNum     = if(isVStore) flowNumOffset else RegEnable(flowNumOffset, pipewb.valid)
308b5d66726Sweiding liu    val latchMergeByPre  = if(isVStore) mergedByPrevPortVec(i) else RegEnable(mergedByPrevPortVec(i), pipewb.valid)
309b5d66726Sweiding liu    when(latchWbValid && !latchMergeByPre){
310b5d66726Sweiding liu      entries(latchWbIndex).flowNum := entries(latchWbIndex).flowNum - latchFlowNum
311b5d66726Sweiding liu    }
312b5d66726Sweiding liu
313ebb914e7Sweiding liu    when(pipewb.valid){
314ebb914e7Sweiding liu      entries(wbIndex).sourceType   := sourceTypeNext
315ebb914e7Sweiding liu      entries(wbIndex).flushState   := pipewb.bits.flushState
316ebb914e7Sweiding liu    }
317ebb914e7Sweiding liu    when(pipewb.valid && !pipewb.bits.hit){
318ebb914e7Sweiding liu      needRSReplay(wbIndex) := true.B
3193952421bSweiding liu    }
320b7618691Sweiding liu    pipewb.ready := true.B
321b240e1c0SAnzooooo    XSError((entries(latchWbIndex).flowNum - latchFlowNum > entries(latchWbIndex).flowNum) && latchWbValid && !latchMergeByPre, s"entry: $latchWbIndex, FlowWriteback overflow!!\n")
322b240e1c0SAnzooooo    XSError(!allocated(latchWbIndex) && latchWbValid, s"entry: $latchWbIndex, Writeback error flow!!\n")
3233952421bSweiding liu  }
3243952421bSweiding liu
3253952421bSweiding liu  //uopwriteback(deq)
3263952421bSweiding liu  for (i <- 0 until uopSize){
327d8761895SAnzo    when(allocated(i) && entries(i).allReady() && !needCancel(i)){
3283952421bSweiding liu      uopFinish(i) := true.B
3293952421bSweiding liu    }
3303952421bSweiding liu  }
3313952421bSweiding liu   val selPolicy = SelectOne("circ", uopFinish, deqWidth) // select one entry to deq
332f58495a5Sweiding liu   private val pipelineOut              = Wire(Vec(deqWidth, DecoupledIO(new MemExuOutput(isVector = true))))
333f58495a5Sweiding liu   private val writeBackOut             = Wire(Vec(deqWidth, DecoupledIO(new MemExuOutput(isVector = true))))
334102b377bSweiding liu   private val writeBackOutExceptionVec = writeBackOut.map(_.bits.uop.exceptionVec)
335f58495a5Sweiding liu   for(((port, lsqport), i) <- (pipelineOut zip io.toLsq).zipWithIndex){
336a481d5beSweiding liu    val canGo    = port.ready
3373952421bSweiding liu    val (selValid, selOHVec) = selPolicy.getNthOH(i + 1)
3383952421bSweiding liu    val entryIdx = OHToUInt(selOHVec)
3393952421bSweiding liu    val selEntry = entries(entryIdx)
340c3b38c55SAnzooooo    val selAllocated = allocated(entryIdx)
341a481d5beSweiding liu    val selFire  = selValid && canGo
342a481d5beSweiding liu    when(selFire){
343c3b38c55SAnzooooo      freeMaskVec(entryIdx) := selAllocated
3443952421bSweiding liu      allocated(entryIdx)   := false.B
3456bd8baa1Sweiding liu      uopFinish(entryIdx)   := false.B
346ebb914e7Sweiding liu      needRSReplay(entryIdx):= false.B
3473952421bSweiding liu    }
348b7618691Sweiding liu    //writeback connect
349c3b38c55SAnzooooo    port.valid   := selFire && selAllocated && !needRSReplay(entryIdx) && !selEntry.uop.robIdx.needFlush(io.redirect)
350b7618691Sweiding liu    port.bits    := DeqConnect(selEntry)
351b7618691Sweiding liu    //to lsq
352b7618691Sweiding liu    lsqport.bits := ToLsqConnect(selEntry) // when uopwriteback, free MBuffer entry, write to lsq
353c3b38c55SAnzooooo    lsqport.valid:= selFire && selAllocated && !needRSReplay(entryIdx)
354ebb914e7Sweiding liu    //to RS
35508b0bc30Shappy-lx    val feedbackOut                       = WireInit(0.U.asTypeOf(io.feedback(i).bits)).suggestName(s"feedbackOut_${i}")
35608b0bc30Shappy-lx    val feedbackValid                     = selFire && selAllocated
35708b0bc30Shappy-lx    feedbackOut.hit                      := !needRSReplay(entryIdx)
35808b0bc30Shappy-lx    feedbackOut.robIdx                   := selEntry.uop.robIdx
35908b0bc30Shappy-lx    feedbackOut.sourceType               := selEntry.sourceType
36008b0bc30Shappy-lx    feedbackOut.flushState               := selEntry.flushState
36108b0bc30Shappy-lx    feedbackOut.dataInvalidSqIdx         := DontCare
36208b0bc30Shappy-lx    feedbackOut.sqIdx                    := selEntry.uop.sqIdx
36308b0bc30Shappy-lx    feedbackOut.lqIdx                    := selEntry.uop.lqIdx
36408b0bc30Shappy-lx
36508b0bc30Shappy-lx    io.feedback(i).valid                 := RegNext(feedbackValid)
36608b0bc30Shappy-lx    io.feedback(i).bits                  := RegEnable(feedbackOut, feedbackValid)
36708b0bc30Shappy-lx
368f58495a5Sweiding liu    NewPipelineConnect(
369f58495a5Sweiding liu      port, writeBackOut(i), writeBackOut(i).fire,
370f58495a5Sweiding liu      Mux(port.fire,
371f58495a5Sweiding liu        selEntry.uop.robIdx.needFlush(io.redirect),
372f58495a5Sweiding liu        writeBackOut(i).bits.uop.robIdx.needFlush(io.redirect)),
373f58495a5Sweiding liu      Option(s"VMergebufferPipelineConnect${i}")
374f58495a5Sweiding liu    )
375f58495a5Sweiding liu     io.uopWriteback(i)                  <> writeBackOut(i)
376102b377bSweiding liu     io.uopWriteback(i).bits.uop.exceptionVec := ExceptionNO.selectByFu(writeBackOutExceptionVec(i), fuCfg)
3773952421bSweiding liu   }
378b2d6d8e7Sgood-circle
379b2d6d8e7Sgood-circle  QueuePerf(uopSize, freeList.io.validCount, freeList.io.validCount === 0.U)
3803952421bSweiding liu}
3813952421bSweiding liu
3823952421bSweiding liuclass VLMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=false){
38326af847eSgood-circle  override lazy val uopSize = VlMergeBufferSize
38426af847eSgood-circle  println(s"VLMergeBuffer Size: ${VlMergeBufferSize}")
38526af847eSgood-circle  override lazy val freeList = Module(new FreeList(
3863952421bSweiding liu    size = uopSize,
387b7618691Sweiding liu    allocWidth = VecLoadPipelineWidth,
3883952421bSweiding liu    freeWidth = deqWidth,
3893952421bSweiding liu    enablePreAlloc = false,
3903952421bSweiding liu    moduleName = "VLoad MergeBuffer freelist"
3913952421bSweiding liu  ))
3922d8a0b4aSAnzo  io.toSplit.get.threshold := freeCount <= 6.U
3933952421bSweiding liu
3943952421bSweiding liu  //merge data
395bfdc3576Sweiding liu  val flowWbElemIdx     = Wire(Vec(pipeWidth, UInt(elemIdxBits.W)))
396bfdc3576Sweiding liu  val flowWbElemIdxInVd = Wire(Vec(pipeWidth, UInt(elemIdxBits.W)))
397bfdc3576Sweiding liu  val pipewbValidReg    = Wire(Vec(pipeWidth, Bool()))
398bfdc3576Sweiding liu  val wbIndexReg        = Wire(Vec(pipeWidth, UInt(vlmBindexBits.W)))
399bfdc3576Sweiding liu  val mergeDataReg      = Wire(Vec(pipeWidth, UInt(VLEN.W)))
4003952421bSweiding liu
401d0d2c22dSAnzooooo  val maskWithexceptionMask = io.fromPipeline.map{ x=>
402d0d2c22dSAnzooooo    Mux(
403d0d2c22dSAnzooooo      TriggerAction.isExp(x.bits.trigger) || TriggerAction.isDmode(x.bits.trigger),
404d0d2c22dSAnzooooo      ~x.bits.vecTriggerMask,
405d0d2c22dSAnzooooo      Fill(x.bits.mask.getWidth, !ExceptionNO.selectByFuAndUnSelect(x.bits.exceptionVec, fuCfg, Seq(breakPoint)).asUInt.orR)
406d0d2c22dSAnzooooo    ).asUInt & x.bits.mask
407d0d2c22dSAnzooooo  }
408d0d2c22dSAnzooooo
4093952421bSweiding liu  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
410b5d66726Sweiding liu    /** step0 **/
4113952421bSweiding liu    val wbIndex = pipewb.bits.mBIndex
41255178b77Sweiding liu    val alignedType = pipewb.bits.alignedType
41326af847eSgood-circle    val elemIdxInsideVd = pipewb.bits.elemIdxInsideVd
41455178b77Sweiding liu    flowWbElemIdx(i) := pipewb.bits.elemIdx
41526af847eSgood-circle    flowWbElemIdxInVd(i) := elemIdxInsideVd.get
416b5d66726Sweiding liu
417bfdc3576Sweiding liu    val oldData = PriorityMux(Seq(
418bfdc3576Sweiding liu      (pipewbValidReg(0) && (wbIndexReg(0) === wbIndex)) -> mergeDataReg(0),
419bfdc3576Sweiding liu      (pipewbValidReg(1) && (wbIndexReg(1) === wbIndex)) -> mergeDataReg(1),
420bfdc3576Sweiding liu      (pipewbValidReg(2) && (wbIndexReg(2) === wbIndex)) -> mergeDataReg(2),
421bfdc3576Sweiding liu      true.B                                             -> entries(wbIndex).data // default use entries_data
422bfdc3576Sweiding liu    ))
4233952421bSweiding liu    val mergedData = mergeDataWithElemIdx(
424bfdc3576Sweiding liu      oldData = oldData,
4253952421bSweiding liu      newData = io.fromPipeline.map(_.bits.vecdata.get),
42626af847eSgood-circle      alignedType = alignedType(1,0),
4273952421bSweiding liu      elemIdx = flowWbElemIdxInVd,
428b5d66726Sweiding liu      valids = mergePortMatrix(i)
4293952421bSweiding liu    )
430b5d66726Sweiding liu    /* this only for unit-stride load data merge
431b5d66726Sweiding liu     * cycle0: broden 128-bits to 256-bits (max 6 to 1)
432b5d66726Sweiding liu     * cycle1: select 128-bits data from 256-bits (16 to 1)
433b5d66726Sweiding liu     */
434b5d66726Sweiding liu    val (brodenMergeData, brodenMergeMask)     = mergeDataByIndex(
435b5d66726Sweiding liu      data    = io.fromPipeline.map(_.bits.vecdata.get).drop(i),
436d0d2c22dSAnzooooo      mask    = maskWithexceptionMask.drop(i),
43755178b77Sweiding liu      index   = io.fromPipeline(i).bits.elemIdxInsideVd.get,
438b5d66726Sweiding liu      valids  = mergePortMatrix(i).drop(i)
43926af847eSgood-circle    )
440b5d66726Sweiding liu    /** step1 **/
441bfdc3576Sweiding liu    pipewbValidReg(i)      := RegNext(pipewb.valid)
442bfdc3576Sweiding liu    wbIndexReg(i)          := RegEnable(wbIndex, pipewb.valid)
443bfdc3576Sweiding liu    mergeDataReg(i)        := RegEnable(mergedData, pipewb.valid) // for not Unit-stride
444b5d66726Sweiding liu    val brodenMergeDataReg  = RegEnable(brodenMergeData, pipewb.valid) // only for Unit-stride
445b5d66726Sweiding liu    val brodenMergeMaskReg  = RegEnable(brodenMergeMask, pipewb.valid)
446b5d66726Sweiding liu    val mergedByPrevPortReg = RegEnable(mergedByPrevPortVec(i), pipewb.valid)
447b5d66726Sweiding liu    val regOffsetReg        = RegEnable(pipewb.bits.reg_offset.get, pipewb.valid) // only for Unit-stride
448b5d66726Sweiding liu    val isusMerge           = RegEnable(alignedType(2), pipewb.valid)
449b5d66726Sweiding liu
450b5d66726Sweiding liu    val usSelData           = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => getNoAlignedSlice(brodenMergeDataReg, i, 128)})
451b5d66726Sweiding liu    val usSelMask           = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => brodenMergeMaskReg(16 + i - 1, i)})
452bfdc3576Sweiding liu    val usMergeData         = mergeDataByByte(entries(wbIndexReg(i)).data, usSelData, usSelMask)
453bfdc3576Sweiding liu    when(pipewbValidReg(i) && !mergedByPrevPortReg){
454bfdc3576Sweiding liu      entries(wbIndexReg(i)).data := Mux(isusMerge, usMergeData, mergeDataReg(i)) // if aligned(2) == 1, is Unit-Stride inst
4553952421bSweiding liu    }
4563952421bSweiding liu  }
4573952421bSweiding liu}
4583952421bSweiding liu
4593952421bSweiding liuclass VSMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=true){
46026af847eSgood-circle  override lazy val uopSize = VsMergeBufferSize
46126af847eSgood-circle  println(s"VSMergeBuffer Size: ${VsMergeBufferSize}")
46226af847eSgood-circle  override lazy val freeList = Module(new FreeList(
4633952421bSweiding liu    size = uopSize,
464b7618691Sweiding liu    allocWidth = VecStorePipelineWidth,
4653952421bSweiding liu    freeWidth = deqWidth,
4663952421bSweiding liu    enablePreAlloc = false,
4673952421bSweiding liu    moduleName = "VStore MergeBuffer freelist"
4683952421bSweiding liu  ))
469b7618691Sweiding liu  override def DeqConnect(source: MBufferBundle): MemExuOutput = {
470b7618691Sweiding liu    val sink               = Wire(new MemExuOutput(isVector = true))
4712e1c1560Sweiding liu    sink.data             := DontCare
4722e1c1560Sweiding liu    sink.mask.get         := DontCare
4733952421bSweiding liu    sink.uop              := source.uop
474828e88c8Sweiding liu    sink.uop.exceptionVec := source.exceptionVec
475b7618691Sweiding liu    sink.debug            := 0.U.asTypeOf(new DebugBundle)
4762e1c1560Sweiding liu    sink.vdIdxInField.get := DontCare
4772e1c1560Sweiding liu    sink.vdIdx.get        := DontCare
478bd3e32c1Ssinsanction    sink.isFromLoadUnit   := DontCare
47955178b77Sweiding liu    sink.uop.vpu.vstart   := source.vstart
480b7618691Sweiding liu    sink
4813952421bSweiding liu  }
482b240e1c0SAnzooooo
483b240e1c0SAnzooooo  // from misalignBuffer flush
484b240e1c0SAnzooooo  when(io.fromMisalignBuffer.get.flush){
485b240e1c0SAnzooooo    needRSReplay(io.fromMisalignBuffer.get.mbIndex) := true.B
486b240e1c0SAnzooooo  }
4873952421bSweiding liu}
488