xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VMergeBuffer.scala (revision 506ca2a39c8376d3bdb39986964a0b2b61292028)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.FuType
29import xiangshan.backend.fu.FuConfig._
30import xiangshan.backend.datapath.NewPipelineConnect
31import freechips.rocketchip.diplomacy.BufferParams
32
33class MBufferBundle(implicit p: Parameters) extends VLSUBundle{
34  val data             = UInt(VLEN.W)
35  val mask             = UInt(VLENB.W)
36  val flowNum          = UInt(flowIdxBits.W)
37  val exceptionVec     = ExceptionVec()
38  val uop              = new DynInst
39  // val vdOffset         = UInt(vOffsetBits.W)
40  val sourceType       = VSFQFeedbackType()
41  val flushState       = Bool()
42  val vdIdx            = UInt(3.W)
43  // for exception
44  val vstart           = UInt(elemIdxBits.W)
45  val vl               = UInt(elemIdxBits.W)
46  val vaNeedExt        = Bool()
47  val vaddr            = UInt(XLEN.W)
48  val gpaddr           = UInt(GPAddrBits.W)
49  val isForVSnonLeafPTE= Bool()
50  val fof              = Bool()
51  val vlmax            = UInt(elemIdxBits.W)
52
53  def allReady(): Bool = (flowNum === 0.U)
54}
55
56abstract class BaseVMergeBuffer(isVStore: Boolean=false)(implicit p: Parameters) extends VLSUModule{
57  val io = IO(new VMergeBufferIO(isVStore))
58
59  // freeliset: store valid entries index.
60  // +---+---+--------------+-----+-----+
61  // | 0 | 1 |      ......  | n-2 | n-1 |
62  // +---+---+--------------+-----+-----+
63  val freeList: FreeList
64  val uopSize: Int
65  val enqWidth = io.fromSplit.length
66  val deqWidth = io.uopWriteback.length
67  val pipeWidth = io.fromPipeline.length
68  lazy val fuCfg = if (isVStore) VstuCfg else VlduCfg
69
70  def EnqConnect(source: MergeBufferReq, sink: MBufferBundle) = {
71    sink.data         := source.data
72    sink.mask         := source.mask
73    sink.flowNum      := source.flowNum
74    sink.exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(ExceptionVec()), fuCfg)
75    sink.uop          := source.uop
76    sink.sourceType   := 0.U.asTypeOf(VSFQFeedbackType())
77    sink.flushState   := false.B
78    sink.vdIdx        := source.vdIdx
79    sink.fof          := source.fof
80    sink.vlmax        := source.vlmax
81    sink.vl           := source.uop.vpu.vl
82    sink.vstart       := 0.U
83  }
84  def DeqConnect(source: MBufferBundle): MemExuOutput = {
85    val sink               = WireInit(0.U.asTypeOf(new MemExuOutput(isVector = true)))
86    sink.data             := source.data
87    sink.mask.get         := source.mask
88    sink.uop              := source.uop
89    sink.uop.exceptionVec := ExceptionNO.selectByFu(source.exceptionVec, fuCfg)
90    sink.uop.vpu.vmask    := source.mask
91    sink.debug            := 0.U.asTypeOf(new DebugBundle)
92    sink.vdIdxInField.get := source.vdIdx // Mgu needs to use this.
93    sink.vdIdx.get        := source.vdIdx
94    sink.uop.vpu.vstart   := source.vstart
95    sink.uop.vpu.vl       := source.vl
96    sink
97  }
98  def ToLsqConnect(source: MBufferBundle): FeedbackToLsqIO = {
99    val sink                                 = WireInit(0.U.asTypeOf(new FeedbackToLsqIO))
100    val hasExp                               = ExceptionNO.selectByFu(source.exceptionVec, fuCfg).asUInt.orR
101    sink.robidx                             := source.uop.robIdx
102    sink.uopidx                             := source.uop.uopIdx
103    sink.feedback(VecFeedbacks.COMMIT)      := !hasExp
104    sink.feedback(VecFeedbacks.FLUSH)       := hasExp
105    sink.feedback(VecFeedbacks.LAST)        := true.B
106    sink.vstart                             := source.vstart // TODO: if lsq need vl for fof?
107    sink.vaddr                              := source.vaddr
108    sink.vaNeedExt                          := source.vaNeedExt
109    sink.gpaddr                             := source.gpaddr
110    sink.isForVSnonLeafPTE                  := source.isForVSnonLeafPTE
111    sink.vl                                 := source.vl
112    sink.exceptionVec                       := ExceptionNO.selectByFu(source.exceptionVec, fuCfg)
113    sink
114  }
115
116
117  val entries      = Reg(Vec(uopSize, new MBufferBundle))
118  val needCancel   = WireInit(VecInit(Seq.fill(uopSize)(false.B)))
119  val allocated    = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
120  val freeMaskVec  = WireInit(VecInit(Seq.fill(uopSize)(false.B)))
121  val uopFinish    = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
122  val needRSReplay = RegInit(VecInit(Seq.fill(uopSize)(false.B)))
123  // enq, from splitPipeline
124  // val allowEnqueue =
125  val cancelEnq    = io.fromSplit.map(_.req.bits.uop.robIdx.needFlush(io.redirect))
126  val canEnqueue   = io.fromSplit.map(_.req.valid)
127  val needEnqueue  = (0 until enqWidth).map{i =>
128    canEnqueue(i) && !cancelEnq(i)
129  }
130
131  val freeCount    = uopSize.U - freeList.io.validCount
132
133  for ((enq, i) <- io.fromSplit.zipWithIndex){
134    freeList.io.doAllocate(i) := false.B
135
136    freeList.io.allocateReq(i) := true.B
137
138    val offset    = PopCount(needEnqueue.take(i))
139    val canAccept = freeList.io.canAllocate(offset)
140    val enqIndex  = freeList.io.allocateSlot(offset)
141    enq.req.ready := freeCount >= (i + 1).U // for better timing
142
143    when(needEnqueue(i) && enq.req.ready){
144      freeList.io.doAllocate(i) := true.B
145      // enqueue
146      allocated(enqIndex)       := true.B
147      uopFinish(enqIndex)       := false.B
148      needRSReplay(enqIndex)    := false.B
149
150      EnqConnect(enq.req.bits, entries(enqIndex))// initial entry
151    }
152
153    enq.resp.bits.mBIndex := enqIndex
154    enq.resp.bits.fail    := false.B
155    enq.resp.valid        := freeCount >= (i + 1).U // for better timing
156  }
157
158  //redirect
159  for (i <- 0 until uopSize){
160    needCancel(i) := entries(i).uop.robIdx.needFlush(io.redirect) && allocated(i)
161    when (needCancel(i)) {
162      allocated(i)   := false.B
163      freeMaskVec(i) := true.B
164      uopFinish(i)   := false.B
165      needRSReplay(i):= false.B
166    }
167  }
168  freeList.io.free := freeMaskVec.asUInt
169  //pipelineWriteback
170  // handle the situation where multiple ports are going to write the same uop queue entry
171  val mergePortMatrix        = Wire(Vec(pipeWidth, Vec(pipeWidth, Bool())))
172  val mergedByPrevPortVec    = Wire(Vec(pipeWidth, Bool()))
173  (0 until pipeWidth).map{case i => (0 until pipeWidth).map{case j =>
174    mergePortMatrix(i)(j) := (j == i).B ||
175      (j > i).B &&
176      io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex &&
177      io.fromPipeline(j).valid
178  }}
179  (0 until pipeWidth).map{case i =>
180    mergedByPrevPortVec(i) := (i != 0).B && Cat((0 until i).map(j =>
181      io.fromPipeline(j).bits.mBIndex === io.fromPipeline(i).bits.mBIndex &&
182      io.fromPipeline(j).valid)).orR
183  }
184  dontTouch(mergePortMatrix)
185  dontTouch(mergedByPrevPortVec)
186
187  // for exception, select exception, when multi port writeback exception, we need select oldest one
188  def selectOldest[T <: VecPipelineFeedbackIO](valid: Seq[Bool], bits: Seq[T], sel: Seq[UInt]): (Seq[Bool], Seq[T], Seq[UInt]) = {
189    assert(valid.length == bits.length)
190    assert(valid.length == sel.length)
191    if (valid.length == 0 || valid.length == 1) {
192      (valid, bits, sel)
193    } else if (valid.length == 2) {
194      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
195      for (i <- res.indices) {
196        res(i).valid := valid(i)
197        res(i).bits := bits(i)
198      }
199      val oldest = Mux(valid(0) && valid(1),
200        Mux(sel(0) < sel(1),
201            res(0), res(1)),
202        Mux(valid(0) && !valid(1), res(0), res(1)))
203      (Seq(oldest.valid), Seq(oldest.bits), Seq(0.U))
204    } else {
205      val left  = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2), sel.take(sel.length / 2))
206      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)), sel.takeRight(sel.length - (sel.length / 2)))
207      selectOldest(left._1 ++ right._1, left._2 ++ right._2, left._3 ++ right._3)
208    }
209  }
210
211  val pipeValid        = io.fromPipeline.map(_.valid)
212  val pipeBits         = io.fromPipeline.map(_.bits)
213  val wbElemIdx        = pipeBits.map(_.elemIdx)
214  val wbMbIndex        = pipeBits.map(_.mBIndex)
215  val wbElemIdxInField = wbElemIdx.zip(wbMbIndex).map(x => x._1 & (entries(x._2).vlmax - 1.U))
216
217  val portHasExcp       = pipeBits.zip(mergePortMatrix).map{case (port, v) =>
218    (0 until pipeWidth).map{case i =>
219      val pipeHasExcep = ExceptionNO.selectByFu(port.exceptionVec, fuCfg).asUInt.orR
220      (v(i) && ((pipeHasExcep && io.fromPipeline(i).bits.mask.orR) || TriggerAction.isDmode(port.trigger))) // this port have exception or merged port have exception
221    }.reduce(_ || _)
222  }
223
224  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
225    val entry               = entries(wbMbIndex(i))
226    val entryVeew           = entry.uop.vpu.veew
227    val entryIsUS           = LSUOpType.isAllUS(entry.uop.fuOpType)
228    val entryHasException   = ExceptionNO.selectByFu(entry.exceptionVec, fuCfg).asUInt.orR || TriggerAction.isDmode(entry.uop.trigger)
229    val entryExcp           = entryHasException && entry.mask.orR
230
231    val sel                    = selectOldest(mergePortMatrix(i), pipeBits, wbElemIdxInField)
232    val selPort                = sel._2
233    val selElemInfield         = selPort(0).elemIdx & (entries(wbMbIndex(i)).vlmax - 1.U)
234    val selExceptionVec        = selPort(0).exceptionVec
235
236    val isUSFirstUop           = !selPort(0).elemIdx.orR
237    // Only the first unaligned uop of unit-stride needs to be offset.
238    // When unaligned, the lowest bit of mask is 0.
239    //  example: 16'b1111_1111_1111_0000
240    val vaddrOffset            = Mux(entryIsUS && isUSFirstUop, genVFirstUnmask(selPort(0).mask).asUInt, 0.U)
241    val vaddr                  = selPort(0).vaddr +  vaddrOffset
242
243    // select oldest port to raise exception
244    when((((entries(wbMbIndex(i)).vstart >= selElemInfield) && entryExcp && portHasExcp(i)) || (!entryExcp && portHasExcp(i))) && pipewb.valid && !mergedByPrevPortVec(i)){
245      entries(wbMbIndex(i)).uop.trigger := selPort(0).trigger
246      when(!entries(wbMbIndex(i)).fof || selElemInfield === 0.U){
247        // For fof loads, if element 0 raises an exception, vl is not modified, and the trap is taken.
248        entries(wbMbIndex(i)).vstart       := selElemInfield
249        entries(wbMbIndex(i)).exceptionVec := ExceptionNO.selectByFu(selExceptionVec, fuCfg)
250        entries(wbMbIndex(i)).vaddr        := vaddr
251        entries(wbMbIndex(i)).vaNeedExt    := selPort(0).vaNeedExt
252        entries(wbMbIndex(i)).gpaddr       := selPort(0).gpaddr
253        entries(wbMbIndex(i)).isForVSnonLeafPTE := selPort(0).isForVSnonLeafPTE
254      }.otherwise{
255        entries(wbMbIndex(i)).vl           := selElemInfield
256      }
257    }
258  }
259
260  // for pipeline writeback
261  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
262    val wbIndex          = pipewb.bits.mBIndex
263    val flowNumOffset    = Mux(pipewb.bits.usSecondInv,
264                               2.U,
265                               PopCount(mergePortMatrix(i)))
266    val sourceTypeNext   = entries(wbIndex).sourceType | pipewb.bits.sourceType
267    val hasExp           = ExceptionNO.selectByFu(pipewb.bits.exceptionVec, fuCfg).asUInt.orR
268
269    // if is VLoad, need latch 1 cycle to merge data. only flowNum and wbIndex need to latch
270    val latchWbValid     = if(isVStore) pipewb.valid else RegNext(pipewb.valid)
271    val latchWbIndex     = if(isVStore) wbIndex      else RegEnable(wbIndex, pipewb.valid)
272    val latchFlowNum     = if(isVStore) flowNumOffset else RegEnable(flowNumOffset, pipewb.valid)
273    val latchMergeByPre  = if(isVStore) mergedByPrevPortVec(i) else RegEnable(mergedByPrevPortVec(i), pipewb.valid)
274    when(latchWbValid && !latchMergeByPre){
275      entries(latchWbIndex).flowNum := entries(latchWbIndex).flowNum - latchFlowNum
276    }
277
278    when(pipewb.valid){
279      entries(wbIndex).sourceType   := sourceTypeNext
280      entries(wbIndex).flushState   := pipewb.bits.flushState
281    }
282    when(pipewb.valid && !pipewb.bits.hit){
283      needRSReplay(wbIndex) := true.B
284    }
285    pipewb.ready := true.B
286    XSError((entries(latchWbIndex).flowNum - latchFlowNum > entries(latchWbIndex).flowNum) && latchWbValid && !latchMergeByPre, "FlowWriteback overflow!!\n")
287    XSError(!allocated(latchWbIndex) && latchWbValid, "Writeback error flow!!\n")
288  }
289  // for inorder mem asscess
290  io.toSplit := DontCare
291
292  //uopwriteback(deq)
293  for (i <- 0 until uopSize){
294    when(allocated(i) && entries(i).allReady()){
295      uopFinish(i) := true.B
296    }
297  }
298   val selPolicy = SelectOne("circ", uopFinish, deqWidth) // select one entry to deq
299   private val pipelineOut              = Wire(Vec(deqWidth, DecoupledIO(new MemExuOutput(isVector = true))))
300   private val writeBackOut             = Wire(Vec(deqWidth, DecoupledIO(new MemExuOutput(isVector = true))))
301   private val writeBackOutExceptionVec = writeBackOut.map(_.bits.uop.exceptionVec)
302   for(((port, lsqport), i) <- (pipelineOut zip io.toLsq).zipWithIndex){
303    val canGo    = port.ready
304    val (selValid, selOHVec) = selPolicy.getNthOH(i + 1)
305    val entryIdx = OHToUInt(selOHVec)
306    val selEntry = entries(entryIdx)
307    val selAllocated = allocated(entryIdx)
308    val selFire  = selValid && canGo
309    when(selFire){
310      freeMaskVec(entryIdx) := selAllocated
311      allocated(entryIdx)   := false.B
312      uopFinish(entryIdx)   := false.B
313      needRSReplay(entryIdx):= false.B
314    }
315    //writeback connect
316    port.valid   := selFire && selAllocated && !needRSReplay(entryIdx) && !selEntry.uop.robIdx.needFlush(io.redirect)
317    port.bits    := DeqConnect(selEntry)
318    //to lsq
319    lsqport.bits := ToLsqConnect(selEntry) // when uopwriteback, free MBuffer entry, write to lsq
320    lsqport.valid:= selFire && selAllocated && !needRSReplay(entryIdx)
321    //to RS
322    val feedbackOut                       = WireInit(0.U.asTypeOf(io.feedback(i).bits)).suggestName(s"feedbackOut_${i}")
323    val feedbackValid                     = selFire && selAllocated
324    feedbackOut.hit                      := !needRSReplay(entryIdx)
325    feedbackOut.robIdx                   := selEntry.uop.robIdx
326    feedbackOut.sourceType               := selEntry.sourceType
327    feedbackOut.flushState               := selEntry.flushState
328    feedbackOut.dataInvalidSqIdx         := DontCare
329    feedbackOut.sqIdx                    := selEntry.uop.sqIdx
330    feedbackOut.lqIdx                    := selEntry.uop.lqIdx
331
332    io.feedback(i).valid                 := RegNext(feedbackValid)
333    io.feedback(i).bits                  := RegEnable(feedbackOut, feedbackValid)
334
335    NewPipelineConnect(
336      port, writeBackOut(i), writeBackOut(i).fire,
337      Mux(port.fire,
338        selEntry.uop.robIdx.needFlush(io.redirect),
339        writeBackOut(i).bits.uop.robIdx.needFlush(io.redirect)),
340      Option(s"VMergebufferPipelineConnect${i}")
341    )
342     io.uopWriteback(i)                  <> writeBackOut(i)
343     io.uopWriteback(i).bits.uop.exceptionVec := ExceptionNO.selectByFu(writeBackOutExceptionVec(i), fuCfg)
344   }
345
346  QueuePerf(uopSize, freeList.io.validCount, freeList.io.validCount === 0.U)
347}
348
349class VLMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=false){
350  override lazy val uopSize = VlMergeBufferSize
351  println(s"VLMergeBuffer Size: ${VlMergeBufferSize}")
352  override lazy val freeList = Module(new FreeList(
353    size = uopSize,
354    allocWidth = VecLoadPipelineWidth,
355    freeWidth = deqWidth,
356    enablePreAlloc = false,
357    moduleName = "VLoad MergeBuffer freelist"
358  ))
359
360  //merge data
361  val flowWbElemIdx     = Wire(Vec(pipeWidth, UInt(elemIdxBits.W)))
362  val flowWbElemIdxInVd = Wire(Vec(pipeWidth, UInt(elemIdxBits.W)))
363  val pipewbValidReg    = Wire(Vec(pipeWidth, Bool()))
364  val wbIndexReg        = Wire(Vec(pipeWidth, UInt(vlmBindexBits.W)))
365  val mergeDataReg      = Wire(Vec(pipeWidth, UInt(VLEN.W)))
366
367  for((pipewb, i) <- io.fromPipeline.zipWithIndex){
368    /** step0 **/
369    val wbIndex = pipewb.bits.mBIndex
370    val alignedType = pipewb.bits.alignedType
371    val elemIdxInsideVd = pipewb.bits.elemIdxInsideVd
372    flowWbElemIdx(i) := pipewb.bits.elemIdx
373    flowWbElemIdxInVd(i) := elemIdxInsideVd.get
374
375    val oldData = PriorityMux(Seq(
376      (pipewbValidReg(0) && (wbIndexReg(0) === wbIndex)) -> mergeDataReg(0),
377      (pipewbValidReg(1) && (wbIndexReg(1) === wbIndex)) -> mergeDataReg(1),
378      (pipewbValidReg(2) && (wbIndexReg(2) === wbIndex)) -> mergeDataReg(2),
379      true.B                                             -> entries(wbIndex).data // default use entries_data
380    ))
381    val mergedData = mergeDataWithElemIdx(
382      oldData = oldData,
383      newData = io.fromPipeline.map(_.bits.vecdata.get),
384      alignedType = alignedType(1,0),
385      elemIdx = flowWbElemIdxInVd,
386      valids = mergePortMatrix(i)
387    )
388    /* this only for unit-stride load data merge
389     * cycle0: broden 128-bits to 256-bits (max 6 to 1)
390     * cycle1: select 128-bits data from 256-bits (16 to 1)
391     */
392    val (brodenMergeData, brodenMergeMask)     = mergeDataByIndex(
393      data    = io.fromPipeline.map(_.bits.vecdata.get).drop(i),
394      mask    = io.fromPipeline.map(_.bits.mask).drop(i),
395      index   = io.fromPipeline(i).bits.elemIdxInsideVd.get,
396      valids  = mergePortMatrix(i).drop(i)
397    )
398    /** step1 **/
399    pipewbValidReg(i)      := RegNext(pipewb.valid)
400    wbIndexReg(i)          := RegEnable(wbIndex, pipewb.valid)
401    mergeDataReg(i)        := RegEnable(mergedData, pipewb.valid) // for not Unit-stride
402    val brodenMergeDataReg  = RegEnable(brodenMergeData, pipewb.valid) // only for Unit-stride
403    val brodenMergeMaskReg  = RegEnable(brodenMergeMask, pipewb.valid)
404    val mergedByPrevPortReg = RegEnable(mergedByPrevPortVec(i), pipewb.valid)
405    val regOffsetReg        = RegEnable(pipewb.bits.reg_offset.get, pipewb.valid) // only for Unit-stride
406    val isusMerge           = RegEnable(alignedType(2), pipewb.valid)
407
408    val usSelData           = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => getNoAlignedSlice(brodenMergeDataReg, i, 128)})
409    val usSelMask           = Mux1H(UIntToOH(regOffsetReg), (0 until VLENB).map{case i => brodenMergeMaskReg(16 + i - 1, i)})
410    val usMergeData         = mergeDataByByte(entries(wbIndexReg(i)).data, usSelData, usSelMask)
411    when(pipewbValidReg(i) && !mergedByPrevPortReg){
412      entries(wbIndexReg(i)).data := Mux(isusMerge, usMergeData, mergeDataReg(i)) // if aligned(2) == 1, is Unit-Stride inst
413    }
414  }
415}
416
417class VSMergeBufferImp(implicit p: Parameters) extends BaseVMergeBuffer(isVStore=true){
418  override lazy val uopSize = VsMergeBufferSize
419  println(s"VSMergeBuffer Size: ${VsMergeBufferSize}")
420  override lazy val freeList = Module(new FreeList(
421    size = uopSize,
422    allocWidth = VecStorePipelineWidth,
423    freeWidth = deqWidth,
424    enablePreAlloc = false,
425    moduleName = "VStore MergeBuffer freelist"
426  ))
427  override def DeqConnect(source: MBufferBundle): MemExuOutput = {
428    val sink               = Wire(new MemExuOutput(isVector = true))
429    sink.data             := DontCare
430    sink.mask.get         := DontCare
431    sink.uop              := source.uop
432    sink.uop.exceptionVec := source.exceptionVec
433    sink.debug            := 0.U.asTypeOf(new DebugBundle)
434    sink.vdIdxInField.get := DontCare
435    sink.vdIdx.get        := DontCare
436    sink.uop.vpu.vstart   := source.vstart
437    sink
438  }
439}
440