xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VMergeBuffer.scala (revision b2d6d8e7fa46979159a5bceab4eb91d3f174195a)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.FuType
29import freechips.rocketchip.diplomacy.BufferParams
30
31class MBufferBundle(implicit p: Parameters) extends VLSUBundle{
32  val data             = UInt(VLEN.W)
33  val mask             = UInt(VLENB.W)
34  val flowNum          = UInt(flowIdxBits.W)
35  val exceptionVec     = ExceptionVec()
36  val uop              = new DynInst
37  // val vdOffset         = UInt(vOffsetBits.W)
38  val sourceType       = VSFQFeedbackType()
39  val flushState       = Bool()
40  val vdIdx            = UInt(3.W)
41
42  def allReady(): Bool = (flowNum === 0.U)
43}
44
45abstract class BaseVMergeBuffer(isVStore: Boolean=false)(implicit p: Parameters) extends VLSUModule{
46  val io = IO(new VMergeBufferIO(isVStore))
47
48  def EnqConnect(source: MergeBufferReq): MBufferBundle = {
49    val sink           = WireInit(0.U.asTypeOf(new MBufferBundle))
50    sink.data         := source.data
51    sink.mask         := source.mask
52    sink.flowNum      := source.flowNum
53    sink.exceptionVec := 0.U.asTypeOf(ExceptionVec())
54    sink.uop          := source.uop
55    sink.sourceType   := 0.U.asTypeOf(VSFQFeedbackType())
56    sink.flushState   := false.B
57    sink.vdIdx        := source.vdIdx
58    sink
59    // sink.vdOffset     := source.vdOffset
60  }
61  def DeqConnect(source: MBufferBundle): MemExuOutput = {
62    val sink               = WireInit(0.U.asTypeOf(new MemExuOutput(isVector = true)))
63    sink.data             := source.data
64    sink.mask.get         := source.mask
65    sink.uop.exceptionVec := source.exceptionVec
66    sink.uop              := source.uop
67    sink.uop.vpu.vmask    := source.mask
68    sink.debug            := 0.U.asTypeOf(new DebugBundle)
69    sink.vdIdxInField.get := 0.U
70    sink.vdIdx.get        := source.vdIdx
71    sink
72  }
73  def ToLsqConnect(source: MBufferBundle): FeedbackToLsqIO = {
74    val sink                                 = WireInit(0.U.asTypeOf(new FeedbackToLsqIO))
75    sink.robidx                             := source.uop.robIdx
76    sink.uopidx                             := source.uop.uopIdx
77    sink.feedback(VecFeedbacks.COMMIT)      := true.B // TODO:
78    sink.feedback(VecFeedbacks.FLUSH)       := false.B
79    sink.feedback(VecFeedbacks.LAST)        := true.B
80    sink.vaddr                              := 0.U // TODO: used when exception
81    sink
82  }
83  // freeliset: store valid entries index.
84  // +---+---+--------------+-----+-----+
85  // | 0 | 1 |      ......  | n-2 | n-1 |
86  // +---+---+--------------+-----+-----+
87  val freeList: FreeList
88  val uopSize: Int
89  val enqWidth = io.fromSplit.length
90  val deqWidth = io.uopWriteback.length
91  val pipeWidth = io.fromPipeline.length
92
93  val entries      = Reg(Vec(uopSize, new MBufferBundle))
94  val needCancel   = WireInit(VecInit(Seq.fill(uopSize)(false.B)))
95  val allocated    = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
96  val freeMaskVec  = WireInit(VecInit(Seq.fill(uopSize)(false.B)))
97  val uopFinish    = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
98  val needRSReplay = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
99  // enq, from splitPipeline
100  // val allowEnqueue =
101  val cancelEnq    = io.fromSplit.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
102  val canEnqueue   = io.fromSplit.map(_.req.valid)
103  val needEnqueue  = (0 until enqWidth).map{i =>
104    canEnqueue(i) && !cancelEnq(i)
105  }
106
107  for ((enq, i) <- io.fromSplit.zipWithIndex){
108    freeList.io.doAllocate(i) := false.B
109
110    freeList.io.allocateReq(i) := true.B
111
112    val offset    = PopCount(needEnqueue.take(i))
113    val canAccept = freeList.io.canAllocate(offset)
114    val enqIndex  = freeList.io.allocateSlot(offset)
115    enq.req.ready := canAccept
116
117    when(needEnqueue(i) && enq.req.ready){
118      freeList.io.doAllocate(i) := true.B
119      // enqueue
120      allocated(enqIndex)       := true.B
121      uopFinish(enqIndex)       := false.B
122      needRSReplay(enqIndex)    := false.B
123
124      entries(enqIndex) := EnqConnect(enq.req.bits)// initial entry
125    }
126
127    enq.resp.bits.mBIndex := enqIndex
128    enq.resp.bits.fail    := false.B
129    enq.resp.valid        := canAccept //resp in 1 cycle
130  }
131
132  //redirect
133  for (i <- 0 until uopSize){
134    needCancel(i) := entries(i).uop.robIdx.needFlush(io.redirect) && allocated(i)
135    when (needCancel(i)) {
136      allocated(i)   := false.B
137      freeMaskVec(i) := true.B
138      uopFinish(i)   := false.B
139      needRSReplay(i):= false.B
140    }
141  }
142  freeList.io.free := freeMaskVec.asUInt
143  //pipelineWriteback
144  // handle the situation where multiple ports are going to write the same uop queue entry
145  val mergePortMatrix        = Wire(Vec(pipeWidth, Vec(pipeWidth, Bool())))
146  val mergedByPrevPortVec    = Wire(Vec(pipeWidth, Bool()))
147  (0 until pipeWidth).map{case i => (0 until pipeWidth).map{case j =>
148    mergePortMatrix(i)(j) := (j == i).B ||
149      (j > i).B &&
150      io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex &&
151      io.fromPipeline(j).valid
152  }}
153  (0 until pipeWidth).map{case i =>
154    mergedByPrevPortVec(i) := (i != 0).B && Cat((0 until i).map(j =>
155      io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex &&
156      io.fromPipeline(j).valid)).orR
157  }
158  dontTouch(mergePortMatrix)
159  dontTouch(mergedByPrevPortVec)
160  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
161    val wbIndex          = pipewb.bits.mBIndex
162    val flowNumOffset    = Mux(pipewb.bits.usSecondInv,
163                               2.U,
164                               PopCount(mergePortMatrix(i)))
165    val sourceTypeNext   = entries(wbIndex).sourceType | pipewb.bits.sourceType
166    val hasExp           = pipewb.bits.exceptionVec.asUInt.orR
167    val exceptionVecNext = Mux(hasExp, pipewb.bits.exceptionVec, entries(wbIndex).exceptionVec)
168
169    // if is VLoad, need latch 1 cycle to merge data. only flowNum and wbIndex need to latch
170    val latchWbValid     = if(isVStore) pipewb.valid else RegNext(pipewb.valid)
171    val latchWbIndex     = if(isVStore) wbIndex      else RegEnable(wbIndex, pipewb.valid)
172    val latchFlowNum     = if(isVStore) flowNumOffset else RegEnable(flowNumOffset, pipewb.valid)
173    val latchMergeByPre  = if(isVStore) mergedByPrevPortVec(i) else RegEnable(mergedByPrevPortVec(i), pipewb.valid)
174    when(latchWbValid && !latchMergeByPre){
175      entries(latchWbIndex).flowNum := entries(latchWbIndex).flowNum - latchFlowNum
176    }
177
178    when(pipewb.valid){
179      entries(wbIndex).sourceType   := sourceTypeNext
180      entries(wbIndex).exceptionVec := exceptionVecNext
181      entries(wbIndex).flushState   := pipewb.bits.flushState
182    }
183    when(pipewb.valid && !pipewb.bits.hit){
184      needRSReplay(wbIndex) := true.B
185    }
186    pipewb.ready := true.B
187    XSError((entries(latchWbIndex).flowNum - latchFlowNum > entries(latchWbIndex).flowNum) && latchWbValid && !latchMergeByPre, "FlowWriteback overflow!!\n")
188    XSError(!allocated(wbIndex) && pipewb.valid, "Writeback error flow!!\n")
189  }
190  // for inorder mem asscess
191  io.toSplit := DontCare
192
193  //uopwriteback(deq)
194  for (i <- 0 until uopSize){
195    when(allocated(i) && entries(i).allReady()){
196      uopFinish(i) := true.B
197    }
198  }
199   val selPolicy = SelectOne("circ", uopFinish, deqWidth) // select one entry to deq
200   for(((port, lsqport), i) <- (io.uopWriteback zip io.toLsq).zipWithIndex){
201    val canGo    = port.ready
202    val (selValid, selOHVec) = selPolicy.getNthOH(i + 1)
203    val entryIdx = OHToUInt(selOHVec)
204    val selEntry = entries(entryIdx)
205    val selFire  = selValid && canGo
206    when(selFire){
207      freeMaskVec(entryIdx) := true.B
208      allocated(entryIdx)   := false.B
209      uopFinish(entryIdx)   := false.B
210      needRSReplay(entryIdx):= false.B
211    }
212    //writeback connect
213    port.valid   := selFire && allocated(entryIdx) && !needRSReplay(entryIdx)
214    port.bits    := DeqConnect(selEntry)
215    //to lsq
216    lsqport.bits := ToLsqConnect(selEntry) // when uopwriteback, free MBuffer entry, write to lsq
217    lsqport.valid:= selFire && allocated(entryIdx) && !needRSReplay(entryIdx)
218    //to RS
219    io.feedback(i).valid                 := selFire && allocated(entryIdx)
220    io.feedback(i).bits.hit              := !needRSReplay(entryIdx)
221    io.feedback(i).bits.robIdx           := selEntry.uop.robIdx
222    io.feedback(i).bits.sourceType       := selEntry.sourceType
223    io.feedback(i).bits.flushState       := selEntry.flushState
224    io.feedback(i).bits.dataInvalidSqIdx := DontCare
225    io.feedback(i).bits.uopIdx.get       := selEntry.uop.uopIdx
226   }
227
228  QueuePerf(uopSize, freeList.io.validCount, freeList.io.validCount === 0.U)
229}
230
231class VLMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=false){
232  override lazy val uopSize = VlMergeBufferSize
233  println(s"VLMergeBuffer Size: ${VlMergeBufferSize}")
234  override lazy val freeList = Module(new FreeList(
235    size = uopSize,
236    allocWidth = VecLoadPipelineWidth,
237    freeWidth = deqWidth,
238    enablePreAlloc = false,
239    moduleName = "VLoad MergeBuffer freelist"
240  ))
241
242  //merge data
243  val flowWbElemIdx = Wire(Vec(LoadPipelineWidth, UInt(elemIdxBits.W)))
244  val flowWbElemIdxInVd = Wire(Vec(LoadPipelineWidth, UInt(elemIdxBits.W)))
245
246  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
247    /** step0 **/
248    val wbIndex = pipewb.bits.mBIndex
249    val alignedType = pipewb.bits.alignedType.get
250    val elemIdxInsideVd = pipewb.bits.elemIdxInsideVd
251    flowWbElemIdx(i) := pipewb.bits.elemIdx.get
252    flowWbElemIdxInVd(i) := elemIdxInsideVd.get
253
254    val mergedData = mergeDataWithElemIdx(
255      oldData = entries(wbIndex).data,
256      newData = io.fromPipeline.map(_.bits.vecdata.get),
257      alignedType = alignedType(1,0),
258      elemIdx = flowWbElemIdxInVd,
259      valids = mergePortMatrix(i)
260    )
261    /* this only for unit-stride load data merge
262     * cycle0: broden 128-bits to 256-bits (max 6 to 1)
263     * cycle1: select 128-bits data from 256-bits (16 to 1)
264     */
265    val (brodenMergeData, brodenMergeMask)     = mergeDataByIndex(
266      data    = io.fromPipeline.map(_.bits.vecdata.get).drop(i),
267      mask    = io.fromPipeline.map(_.bits.mask.get).drop(i),
268      index   = io.fromPipeline(i).bits.elemIdx.get,
269      valids  = mergePortMatrix(i).drop(i)
270    )
271    /** step1 **/
272    val pipewbValidReg      = RegNext(pipewb.valid)
273    val wbIndexReg          = RegEnable(wbIndex, pipewb.valid)
274    val mergeDataReg        = RegEnable(mergedData, pipewb.valid) // for not Unit-stride
275    val brodenMergeDataReg  = RegEnable(brodenMergeData, pipewb.valid) // only for Unit-stride
276    val brodenMergeMaskReg  = RegEnable(brodenMergeMask, pipewb.valid)
277    val mergedByPrevPortReg = RegEnable(mergedByPrevPortVec(i), pipewb.valid)
278    val regOffsetReg        = RegEnable(pipewb.bits.reg_offset.get, pipewb.valid) // only for Unit-stride
279    val isusMerge           = RegEnable(alignedType(2), pipewb.valid)
280
281    val usSelData           = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => getNoAlignedSlice(brodenMergeDataReg, i, 128)})
282    val usSelMask           = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => brodenMergeMaskReg(16 + i - 1, i)})
283    val usMergeData         = mergeDataByByte(entries(wbIndexReg).data, usSelData, usSelMask)
284    when(pipewbValidReg && !mergedByPrevPortReg){
285      entries(wbIndexReg).data := Mux(isusMerge, usMergeData, mergeDataReg) // if aligned(2) == 1, is Unit-Stride inst
286    }
287  }
288}
289
290class VSMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=true){
291  override lazy val uopSize = VsMergeBufferSize
292  println(s"VSMergeBuffer Size: ${VsMergeBufferSize}")
293  override lazy val freeList = Module(new FreeList(
294    size = uopSize,
295    allocWidth = VecStorePipelineWidth,
296    freeWidth = deqWidth,
297    enablePreAlloc = false,
298    moduleName = "VStore MergeBuffer freelist"
299  ))
300  override def DeqConnect(source: MBufferBundle): MemExuOutput = {
301    val sink               = Wire(new MemExuOutput(isVector = true))
302    sink.data             := DontCare
303    sink.mask.get         := DontCare
304    sink.uop.exceptionVec := source.exceptionVec
305    sink.uop              := source.uop
306    sink.debug            := 0.U.asTypeOf(new DebugBundle)
307    sink.vdIdxInField.get := DontCare
308    sink.vdIdx.get        := DontCare
309    sink
310  }
311}
312