xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VMergeBuffer.scala (revision 4aa0028654716f3ef660f985eb6662c6c75b70d0)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.FuType
29import freechips.rocketchip.diplomacy.BufferParams
30
31class MBufferBundle(implicit p: Parameters) extends VLSUBundle{
32  val data             = UInt(VLEN.W)
33  val mask             = UInt(VLENB.W)
34  val flowNum          = UInt(flowIdxBits.W)
35  val exceptionVec     = ExceptionVec()
36  val uop              = new DynInst
37  // val vdOffset         = UInt(vOffsetBits.W)
38  val sourceType       = VSFQFeedbackType()
39  val flushState       = Bool()
40  val vdIdx            = UInt(3.W)
41  // for exception
42  val vstart           = UInt(elemIdxBits.W)
43  val vl               = UInt(elemIdxBits.W)
44  val vaddr            = UInt(VAddrBits.W)
45  val fof              = Bool()
46  val vlmax            = UInt(elemIdxBits.W)
47
48  def allReady(): Bool = (flowNum === 0.U)
49}
50
51abstract class BaseVMergeBuffer(isVStore: Boolean=false)(implicit p: Parameters) extends VLSUModule{
52  val io = IO(new VMergeBufferIO(isVStore))
53
54  def EnqConnect(source: MergeBufferReq, sink: MBufferBundle) = {
55    sink.data         := source.data
56    sink.mask         := source.mask
57    sink.flowNum      := source.flowNum
58    sink.exceptionVec := 0.U.asTypeOf(ExceptionVec())
59    sink.uop          := source.uop
60    sink.sourceType   := 0.U.asTypeOf(VSFQFeedbackType())
61    sink.flushState   := false.B
62    sink.vdIdx        := source.vdIdx
63    sink.fof          := source.fof
64    sink.vlmax        := source.vlmax
65    sink.vl           := source.uop.vpu.vl
66    sink.vstart       := 0.U
67  }
68  def DeqConnect(source: MBufferBundle): MemExuOutput = {
69    val sink               = WireInit(0.U.asTypeOf(new MemExuOutput(isVector = true)))
70    sink.data             := source.data
71    sink.mask.get         := source.mask
72    sink.uop              := source.uop
73    sink.uop.exceptionVec := source.exceptionVec
74    sink.uop.vpu.vmask    := source.mask
75    sink.debug            := 0.U.asTypeOf(new DebugBundle)
76    sink.vdIdxInField.get := source.vdIdx // Mgu needs to use this.
77    sink.vdIdx.get        := source.vdIdx
78    sink.uop.vpu.vstart   := source.vstart
79    sink.uop.vpu.vl       := source.vl
80    sink
81  }
82  def ToLsqConnect(source: MBufferBundle): FeedbackToLsqIO = {
83    val sink                                 = WireInit(0.U.asTypeOf(new FeedbackToLsqIO))
84    val hasExp                               = source.exceptionVec.asUInt.orR
85    sink.robidx                             := source.uop.robIdx
86    sink.uopidx                             := source.uop.uopIdx
87    sink.feedback(VecFeedbacks.COMMIT)      := !hasExp
88    sink.feedback(VecFeedbacks.FLUSH)       := hasExp
89    sink.feedback(VecFeedbacks.LAST)        := true.B
90    sink.vstart                             := source.vstart // TODO: if lsq need vl for fof?
91    sink.vaddr                              := source.vaddr
92    sink.vl                                 := source.vl
93    sink
94  }
95  // freeliset: store valid entries index.
96  // +---+---+--------------+-----+-----+
97  // | 0 | 1 |      ......  | n-2 | n-1 |
98  // +---+---+--------------+-----+-----+
99  val freeList: FreeList
100  val uopSize: Int
101  val enqWidth = io.fromSplit.length
102  val deqWidth = io.uopWriteback.length
103  val pipeWidth = io.fromPipeline.length
104
105  val entries      = Reg(Vec(uopSize, new MBufferBundle))
106  val needCancel   = WireInit(VecInit(Seq.fill(uopSize)(false.B)))
107  val allocated    = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
108  val freeMaskVec  = WireInit(VecInit(Seq.fill(uopSize)(false.B)))
109  val uopFinish    = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
110  val needRSReplay = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
111  // enq, from splitPipeline
112  // val allowEnqueue =
113  val cancelEnq    = io.fromSplit.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
114  val canEnqueue   = io.fromSplit.map(_.req.valid)
115  val needEnqueue  = (0 until enqWidth).map{i =>
116    canEnqueue(i) && !cancelEnq(i)
117  }
118
119  for ((enq, i) <- io.fromSplit.zipWithIndex){
120    freeList.io.doAllocate(i) := false.B
121
122    freeList.io.allocateReq(i) := true.B
123
124    val offset    = PopCount(needEnqueue.take(i))
125    val canAccept = freeList.io.canAllocate(offset)
126    val enqIndex  = freeList.io.allocateSlot(offset)
127    enq.req.ready := canAccept
128
129    when(needEnqueue(i) && enq.req.ready){
130      freeList.io.doAllocate(i) := true.B
131      // enqueue
132      allocated(enqIndex)       := true.B
133      uopFinish(enqIndex)       := false.B
134      needRSReplay(enqIndex)    := false.B
135
136      EnqConnect(enq.req.bits, entries(enqIndex))// initial entry
137    }
138
139    enq.resp.bits.mBIndex := enqIndex
140    enq.resp.bits.fail    := false.B
141    enq.resp.valid        := canAccept //resp in 1 cycle
142  }
143
144  //redirect
145  for (i <- 0 until uopSize){
146    needCancel(i) := entries(i).uop.robIdx.needFlush(io.redirect) && allocated(i)
147    when (needCancel(i)) {
148      allocated(i)   := false.B
149      freeMaskVec(i) := true.B
150      uopFinish(i)   := false.B
151      needRSReplay(i):= false.B
152    }
153  }
154  freeList.io.free := freeMaskVec.asUInt
155  //pipelineWriteback
156  // handle the situation where multiple ports are going to write the same uop queue entry
157  val mergePortMatrix        = Wire(Vec(pipeWidth, Vec(pipeWidth, Bool())))
158  val mergedByPrevPortVec    = Wire(Vec(pipeWidth, Bool()))
159  (0 until pipeWidth).map{case i => (0 until pipeWidth).map{case j =>
160    mergePortMatrix(i)(j) := (j == i).B ||
161      (j > i).B &&
162      io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex &&
163      io.fromPipeline(j).valid
164  }}
165  (0 until pipeWidth).map{case i =>
166    mergedByPrevPortVec(i) := (i != 0).B && Cat((0 until i).map(j =>
167      io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex &&
168      io.fromPipeline(j).valid)).orR
169  }
170  dontTouch(mergePortMatrix)
171  dontTouch(mergedByPrevPortVec)
172
173  // for exception, select exception, when multi port writeback exception, we need select oldest one
174  def selectOldest[T <: VecPipelineFeedbackIO](valid: Seq[Bool], bits: Seq[T], sel: Seq[UInt]): (Seq[Bool], Seq[T], Seq[UInt]) = {
175    assert(valid.length == bits.length)
176    assert(valid.length == sel.length)
177    if (valid.length == 0 || valid.length == 1) {
178      (valid, bits, sel)
179    } else if (valid.length == 2) {
180      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
181      for (i <- res.indices) {
182        res(i).valid := valid(i)
183        res(i).bits := bits(i)
184      }
185      val oldest = Mux(valid(0) && valid(1),
186        Mux(sel(0) < sel(1),
187            res(0), res(1)),
188        Mux(valid(0) && !valid(1), res(0), res(1)))
189      (Seq(oldest.valid), Seq(oldest.bits), Seq(0.U))
190    } else {
191      val left  = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2), sel.take(sel.length / 2))
192      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)), sel.takeRight(sel.length - (sel.length / 2)))
193      selectOldest(left._1 ++ right._1, left._2 ++ right._2, left._3 ++ right._3)
194    }
195  }
196
197  val pipeValid        = io.fromPipeline.map(_.valid)
198  val pipeBits         = io.fromPipeline.map(x => x.bits)
199  val wbElemIdx        = pipeBits.map(_.elemIdx)
200  val wbMbIndex        = pipeBits.map(_.mBIndex)
201  val wbElemIdxInField = wbElemIdx.zip(wbMbIndex).map(x => x._1 & (entries(x._2).vlmax - 1.U))
202
203  val portHasExcp       = pipeBits.zip(mergePortMatrix).map{case (port, v) =>
204    (0 until pipeWidth).map{case i =>
205      (v(i) && io.fromPipeline(i).bits.exceptionVec.asUInt.orR && io.fromPipeline(i).bits.mask.orR) // this port have exception or merged port have exception
206    }.reduce(_ || _)
207  }
208
209  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
210    val sel                    = selectOldest(mergePortMatrix(i), pipeBits, wbElemIdxInField)
211    val selPort                = sel._2
212    val selElemInfield         = selPort(0).elemIdx & (entries(wbMbIndex(i)).vlmax - 1.U)
213    val selExceptionVec        = selPort(0).exceptionVec
214    val thisPortExcp           = pipewb.bits.exceptionVec.asUInt.orR && pipewb.bits.mask.orR
215
216    when((((entries(wbMbIndex(i)).vstart >= selElemInfield) && thisPortExcp && portHasExcp(i)) || (!thisPortExcp && portHasExcp(i))) && pipewb.valid){
217      when(!entries(wbMbIndex(i)).fof || selElemInfield === 0.U){
218        // For fof loads, if element 0 raises an exception, vl is not modified, and the trap is taken.
219        entries(wbMbIndex(i)).vstart       := selElemInfield
220        entries(wbMbIndex(i)).exceptionVec := selExceptionVec
221      }.otherwise{
222        entries(wbMbIndex(i)).vl           := selElemInfield
223      }
224    }
225  }
226
227  // for pipeline writeback
228  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
229    val wbIndex          = pipewb.bits.mBIndex
230    val flowNumOffset    = Mux(pipewb.bits.usSecondInv,
231                               2.U,
232                               PopCount(mergePortMatrix(i)))
233    val sourceTypeNext   = entries(wbIndex).sourceType | pipewb.bits.sourceType
234    val hasExp           = pipewb.bits.exceptionVec.asUInt.orR
235
236    // if is VLoad, need latch 1 cycle to merge data. only flowNum and wbIndex need to latch
237    val latchWbValid     = if(isVStore) pipewb.valid else RegNext(pipewb.valid)
238    val latchWbIndex     = if(isVStore) wbIndex      else RegEnable(wbIndex, pipewb.valid)
239    val latchFlowNum     = if(isVStore) flowNumOffset else RegEnable(flowNumOffset, pipewb.valid)
240    val latchMergeByPre  = if(isVStore) mergedByPrevPortVec(i) else RegEnable(mergedByPrevPortVec(i), pipewb.valid)
241    when(latchWbValid && !latchMergeByPre){
242      entries(latchWbIndex).flowNum := entries(latchWbIndex).flowNum - latchFlowNum
243    }
244
245    when(pipewb.valid){
246      entries(wbIndex).sourceType   := sourceTypeNext
247      entries(wbIndex).flushState   := pipewb.bits.flushState
248    }
249    when(pipewb.valid && !pipewb.bits.hit){
250      needRSReplay(wbIndex) := true.B
251    }
252    pipewb.ready := true.B
253    XSError((entries(latchWbIndex).flowNum - latchFlowNum > entries(latchWbIndex).flowNum) && latchWbValid && !latchMergeByPre, "FlowWriteback overflow!!\n")
254    XSError(!allocated(latchWbIndex) && latchWbValid, "Writeback error flow!!\n")
255  }
256  // for inorder mem asscess
257  io.toSplit := DontCare
258
259  //uopwriteback(deq)
260  for (i <- 0 until uopSize){
261    when(allocated(i) && entries(i).allReady()){
262      uopFinish(i) := true.B
263    }
264  }
265   val selPolicy = SelectOne("circ", uopFinish, deqWidth) // select one entry to deq
266   for(((port, lsqport), i) <- (io.uopWriteback zip io.toLsq).zipWithIndex){
267    val canGo    = port.ready
268    val (selValid, selOHVec) = selPolicy.getNthOH(i + 1)
269    val entryIdx = OHToUInt(selOHVec)
270    val selEntry = entries(entryIdx)
271    val selFire  = selValid && canGo
272    when(selFire){
273      freeMaskVec(entryIdx) := true.B
274      allocated(entryIdx)   := false.B
275      uopFinish(entryIdx)   := false.B
276      needRSReplay(entryIdx):= false.B
277    }
278    //writeback connect
279    port.valid   := selFire && allocated(entryIdx) && !needRSReplay(entryIdx) && !selEntry.uop.robIdx.needFlush(io.redirect)
280    port.bits    := DeqConnect(selEntry)
281    //to lsq
282    lsqport.bits := ToLsqConnect(selEntry) // when uopwriteback, free MBuffer entry, write to lsq
283    lsqport.valid:= selFire && allocated(entryIdx) && !needRSReplay(entryIdx)
284    //to RS
285    io.feedback(i).valid                 := selFire && allocated(entryIdx)
286    io.feedback(i).bits.hit              := !needRSReplay(entryIdx)
287    io.feedback(i).bits.robIdx           := selEntry.uop.robIdx
288    io.feedback(i).bits.sourceType       := selEntry.sourceType
289    io.feedback(i).bits.flushState       := selEntry.flushState
290    io.feedback(i).bits.dataInvalidSqIdx := DontCare
291    io.feedback(i).bits.uopIdx.get       := selEntry.uop.uopIdx
292   }
293
294  QueuePerf(uopSize, freeList.io.validCount, freeList.io.validCount === 0.U)
295}
296
297class VLMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=false){
298  override lazy val uopSize = VlMergeBufferSize
299  println(s"VLMergeBuffer Size: ${VlMergeBufferSize}")
300  override lazy val freeList = Module(new FreeList(
301    size = uopSize,
302    allocWidth = VecLoadPipelineWidth,
303    freeWidth = deqWidth,
304    enablePreAlloc = false,
305    moduleName = "VLoad MergeBuffer freelist"
306  ))
307
308  //merge data
309  val flowWbElemIdx     = Wire(Vec(pipeWidth, UInt(elemIdxBits.W)))
310  val flowWbElemIdxInVd = Wire(Vec(pipeWidth, UInt(elemIdxBits.W)))
311  val pipewbValidReg    = Wire(Vec(pipeWidth, Bool()))
312  val wbIndexReg        = Wire(Vec(pipeWidth, UInt(vlmBindexBits.W)))
313  val mergeDataReg      = Wire(Vec(pipeWidth, UInt(VLEN.W)))
314
315  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
316    /** step0 **/
317    val wbIndex = pipewb.bits.mBIndex
318    val alignedType = pipewb.bits.alignedType
319    val elemIdxInsideVd = pipewb.bits.elemIdxInsideVd
320    flowWbElemIdx(i) := pipewb.bits.elemIdx
321    flowWbElemIdxInVd(i) := elemIdxInsideVd.get
322
323    val oldData = PriorityMux(Seq(
324      (pipewbValidReg(0) && (wbIndexReg(0) === wbIndex)) -> mergeDataReg(0),
325      (pipewbValidReg(1) && (wbIndexReg(1) === wbIndex)) -> mergeDataReg(1),
326      (pipewbValidReg(2) && (wbIndexReg(2) === wbIndex)) -> mergeDataReg(2),
327      true.B                                             -> entries(wbIndex).data // default use entries_data
328    ))
329    val mergedData = mergeDataWithElemIdx(
330      oldData = oldData,
331      newData = io.fromPipeline.map(_.bits.vecdata.get),
332      alignedType = alignedType(1,0),
333      elemIdx = flowWbElemIdxInVd,
334      valids = mergePortMatrix(i)
335    )
336    /* this only for unit-stride load data merge
337     * cycle0: broden 128-bits to 256-bits (max 6 to 1)
338     * cycle1: select 128-bits data from 256-bits (16 to 1)
339     */
340    val (brodenMergeData, brodenMergeMask)     = mergeDataByIndex(
341      data    = io.fromPipeline.map(_.bits.vecdata.get).drop(i),
342      mask    = io.fromPipeline.map(_.bits.mask).drop(i),
343      index   = io.fromPipeline(i).bits.elemIdxInsideVd.get,
344      valids  = mergePortMatrix(i).drop(i)
345    )
346    /** step1 **/
347    pipewbValidReg(i)      := RegNext(pipewb.valid)
348    wbIndexReg(i)          := RegEnable(wbIndex, pipewb.valid)
349    mergeDataReg(i)        := RegEnable(mergedData, pipewb.valid) // for not Unit-stride
350    val brodenMergeDataReg  = RegEnable(brodenMergeData, pipewb.valid) // only for Unit-stride
351    val brodenMergeMaskReg  = RegEnable(brodenMergeMask, pipewb.valid)
352    val mergedByPrevPortReg = RegEnable(mergedByPrevPortVec(i), pipewb.valid)
353    val regOffsetReg        = RegEnable(pipewb.bits.reg_offset.get, pipewb.valid) // only for Unit-stride
354    val isusMerge           = RegEnable(alignedType(2), pipewb.valid)
355
356    val usSelData           = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => getNoAlignedSlice(brodenMergeDataReg, i, 128)})
357    val usSelMask           = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => brodenMergeMaskReg(16 + i - 1, i)})
358    val usMergeData         = mergeDataByByte(entries(wbIndexReg(i)).data, usSelData, usSelMask)
359    when(pipewbValidReg(i) && !mergedByPrevPortReg){
360      entries(wbIndexReg(i)).data := Mux(isusMerge, usMergeData, mergeDataReg(i)) // if aligned(2) == 1, is Unit-Stride inst
361    }
362  }
363}
364
365class VSMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=true){
366  override lazy val uopSize = VsMergeBufferSize
367  println(s"VSMergeBuffer Size: ${VsMergeBufferSize}")
368  override lazy val freeList = Module(new FreeList(
369    size = uopSize,
370    allocWidth = VecStorePipelineWidth,
371    freeWidth = deqWidth,
372    enablePreAlloc = false,
373    moduleName = "VStore MergeBuffer freelist"
374  ))
375  override def DeqConnect(source: MBufferBundle): MemExuOutput = {
376    val sink               = Wire(new MemExuOutput(isVector = true))
377    sink.data             := DontCare
378    sink.mask.get         := DontCare
379    sink.uop              := source.uop
380    sink.uop.exceptionVec := source.exceptionVec
381    sink.debug            := 0.U.asTypeOf(new DebugBundle)
382    sink.vdIdxInField.get := DontCare
383    sink.vdIdx.get        := DontCare
384    sink.uop.vpu.vstart   := source.vstart
385    sink
386  }
387}
388