xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VSplit.scala (revision 26af847e669bb208507278eafc6ebe52f03b0d19)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.vector.Bundles._
29
30
31class VSplitPipeline(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{
32  val io = IO(new VSplitPipelineIO(isVStore))
33
34  def us_whole_reg(fuOpType: UInt) = fuOpType === VlduType.vlr
35  def us_mask(fuOpType: UInt) = fuOpType === VlduType.vlm
36  def us_fof(fuOpType: UInt) = fuOpType === VlduType.vleff
37
38  val s1_ready = WireInit(false.B)
39  io.in.ready := s1_ready
40
41  /**-----------------------------------------------------------
42    * s0 stage
43    * decode and generate AlignedType, uop mask, preIsSplit
44    * ----------------------------------------------------------
45    */
46  val s0_vtype = io.in.bits.uop.vpu.vtype
47  val s0_sew = s0_vtype.vsew
48  val s0_eew = io.in.bits.uop.vpu.veew
49  val s0_lmul = s0_vtype.vlmul
50  // when load whole register or unit-stride masked , emul should be 1
51  val s0_fuOpType = io.in.bits.uop.fuOpType
52  val s0_mop = s0_fuOpType(6, 5)
53  val s0_nf = Mux(us_whole_reg(s0_fuOpType), 0.U, io.in.bits.uop.vpu.nf)
54  val s0_vm = io.in.bits.uop.vpu.vm
55  val s0_emul = Mux(us_whole_reg(s0_fuOpType) ,GenUSWholeEmul(io.in.bits.uop.vpu.nf), Mux(us_mask(s0_fuOpType), 0.U(mulBits.W), EewLog2(s0_eew) - s0_sew + s0_lmul))
56  val s0_preIsSplit = !(isUnitStride(s0_mop) && !us_fof(s0_fuOpType))
57
58  val s0_valid         = Wire(Bool())
59  val s0_kill          = io.in.bits.uop.robIdx.needFlush(io.redirect)
60  val s0_can_go        = s1_ready
61  val s0_fire          = s0_valid && s0_can_go
62  val s0_out           = Wire(new VLSBundle(isVStore))
63
64  val isUsWholeReg = isUnitStride(s0_mop) && us_whole_reg(s0_fuOpType)
65  val isMaskReg = isUnitStride(s0_mop) && us_mask(s0_fuOpType)
66  val isSegment = s0_nf =/= 0.U && !us_whole_reg(s0_fuOpType)
67  val instType = Cat(isSegment, s0_mop)
68  val uopIdx = io.in.bits.uop.vpu.vuopIdx
69  val uopIdxInField = GenUopIdxInField(instType, s0_emul, s0_lmul, uopIdx)
70  val vdIdxInField = GenVdIdxInField(instType, s0_emul, s0_lmul, uopIdxInField)
71  val lmulLog2 = Mux(s0_lmul.asSInt >= 0.S, 0.U, s0_lmul)
72  val emulLog2 = Mux(s0_emul.asSInt >= 0.S, 0.U, s0_emul)
73  val numEewLog2 = emulLog2 - EewLog2(s0_eew)
74  val numSewLog2 = lmulLog2 - s0_sew
75  val numFlowsSameVdLog2 = Mux(
76    isIndexed(instType),
77    log2Up(VLENB).U - s0_sew(1,0),
78    log2Up(VLENB).U - s0_eew(1,0)
79  )
80  // numUops = nf * max(lmul, emul)
81  val lmulLog2Pos = Mux(s0_lmul.asSInt < 0.S, 0.U, s0_lmul)
82  val emulLog2Pos = Mux(s0_emul.asSInt < 0.S, 0.U, s0_emul)
83  val numUops = Mux(
84    isIndexed(s0_mop) && s0_lmul.asSInt > s0_emul.asSInt,
85    (s0_nf +& 1.U) << lmulLog2Pos,
86    (s0_nf +& 1.U) << emulLog2Pos
87  )
88
89  val vvl = io.in.bits.src_vl.asTypeOf(VConfig()).vl
90  val evl = Mux(isUsWholeReg, GenUSWholeRegVL(io.in.bits.uop.vpu.nf +& 1.U,s0_eew), Mux(isMaskReg, GenUSMaskRegVL(vvl), vvl))
91  val vvstart = io.in.bits.uop.vpu.vstart
92  val alignedType = Mux(isIndexed(instType), s0_sew(1, 0), s0_eew(1, 0))
93  val broadenAligendType = Mux(s0_preIsSplit, Cat("b0".U, alignedType), "b100".U) // if is unit-stride, use 128-bits memory access
94  val flowsLog2 = GenRealFlowLog2(instType, s0_emul, s0_lmul, s0_eew, s0_sew)
95  val flowsPrevThisUop = uopIdxInField << flowsLog2 // # of flows before this uop in a field
96  val flowsPrevThisVd = vdIdxInField << numFlowsSameVdLog2 // # of flows before this vd in a field
97  val flowsIncludeThisUop = (uopIdxInField +& 1.U) << flowsLog2 // # of flows before this uop besides this uop
98  val flowNum = io.in.bits.flowNum.get
99  val srcMask = GenFlowMask(Mux(s0_vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vvstart, evl, true)
100
101  val flowMask = ((srcMask &
102    UIntToMask(flowsIncludeThisUop.asUInt, VLEN + 1) &
103    (~UIntToMask(flowsPrevThisUop.asUInt, VLEN)).asUInt
104  ) >> flowsPrevThisVd)(VLENB - 1, 0)
105  val vlmax = GenVLMAX(s0_lmul, s0_sew)
106
107    // connect
108  s0_out := DontCare
109  s0_out match {case x =>
110    x.uop := io.in.bits.uop
111    x.uop.vpu.vl := evl
112    x.uop.uopIdx := uopIdx
113    x.uop.numUops := numUops
114    x.uop.lastUop := (uopIdx +& 1.U) === numUops
115    x.flowMask := flowMask
116    x.byteMask := GenUopByteMask(flowMask, broadenAligendType)(VLENB - 1, 0)
117    x.fof := isUnitStride(s0_mop) && us_fof(s0_fuOpType)
118    x.baseAddr := io.in.bits.src_rs1
119    x.stride := io.in.bits.src_stride
120    x.flowNum := (1.U << flowNum)
121    x.nfields := s0_nf +& 1.U
122    x.vm := s0_vm
123    x.usWholeReg := isUsWholeReg
124    x.usMaskReg := isMaskReg
125    x.eew := s0_eew
126    x.sew := s0_sew
127    x.emul := s0_emul
128    x.lmul := s0_lmul
129    x.vlmax := Mux(isUsWholeReg, evl, vlmax)
130    x.instType := instType
131    x.data := io.in.bits.src_vs3
132    x.vdIdxInField := vdIdxInField
133    x.preIsSplit  := s0_preIsSplit
134    x.alignedType := broadenAligendType
135  }
136  s0_valid := io.in.valid && !s0_kill
137  /**-------------------------------------
138    * s1 stage
139    * ------------------------------------
140    * generate UopOffset
141    */
142  val s1_valid         = RegInit(false.B)
143  val s1_kill          = Wire(Bool())
144  val s1_in            = Wire(new VLSBundle(isVStore))
145  val s1_can_go        = io.out.ready && io.toMergeBuffer.resp.valid
146  val s1_fire          = s1_valid && !s1_kill && s1_can_go
147
148  s1_ready         := s1_kill || !s1_valid || io.out.ready
149
150  when(s0_fire){
151    s1_valid := true.B
152  }.elsewhen(s1_fire){
153    s1_valid := false.B
154  }.elsewhen(s1_kill){
155    s1_valid := false.B
156  }
157  s1_in := RegEnable(s0_out, s0_fire)
158
159  val s1_uopidx           = s1_in.uop.vpu.vuopIdx
160  val s1_nf               = s1_in.uop.vpu.nf
161  val s1_nfields          = s1_in.nfields
162  val s1_eew              = s1_in.eew
163  val s1_instType         = s1_in.instType
164  val s1_stride           = s1_in.stride
165  val s1_alignedType      = Mux(isIndexed(s1_in.instType), s1_in.sew(1, 0), s1_in.eew(1, 0))
166  val s1_notIndexedStride = Mux( // stride for strided/unit-stride instruction
167    isStrided(s1_instType),
168    s1_stride(XLEN - 1, 0), // for strided load, stride = x[rs2]
169    s1_nfields << s1_eew(1, 0) // for unit-stride load, stride = eew * NFIELDS
170  )
171  val uopOffset  = (s1_uopidx >> s1_nf) << s1_alignedType
172  val stride     = Mux(isIndexed(s1_instType), s1_stride, s1_notIndexedStride) // if is index instructions, get index when split
173
174  s1_kill               := s1_in.uop.robIdx.needFlush(io.redirect)
175
176  // query mergeBuffer
177  io.toMergeBuffer.req.valid             := s1_fire // only can_go will get MergeBuffer entry
178  io.toMergeBuffer.req.bits.flowNum      := Mux(s1_in.preIsSplit, 1.U << flowNum, PopCount(s1_in.flowMask))
179  io.toMergeBuffer.req.bits.data         := s1_in.data
180  io.toMergeBuffer.req.bits.uop          := s1_in.uop
181  io.toMergeBuffer.req.bits.mask         := flowMask
182  io.toMergeBuffer.req.bits.vaddr        := DontCare
183//   io.toMergeBuffer.req.bits.vdOffset :=
184
185  // out connect
186  io.out.valid          := s1_valid
187  io.out.bits           := s1_in
188  io.out.bits.uopOffset := uopOffset
189  io.out.bits.stride    := stride
190  io.out.bits.mBIndex   := io.toMergeBuffer.resp.bits.mBIndex
191}
192
193abstract class VSplitBuffer(isVStore: Boolean = false)(implicit p: Parameters) extends VLSUModule{
194  val io = IO(new VSplitBufferIO(isVStore))
195
196  val bufferSize: Int
197
198  class VSplitPtr(implicit p: Parameters) extends CircularQueuePtr[VSplitPtr](bufferSize){
199  }
200
201  object VSplitPtr {
202    def apply(f: Bool, v: UInt)(implicit p: Parameters): VSplitPtr = {
203      val ptr = Wire(new VSplitPtr)
204      ptr.flag := f
205      ptr.value := v
206      ptr
207    }
208  }
209
210  val uopq = Reg(Vec(bufferSize, new VLSBundle(isVStore)))
211  val valid = RegInit(VecInit(Seq.fill(bufferSize)(false.B)))
212  val vstart = RegInit(VecInit(Seq.fill(bufferSize)(0.U(elemIdxBits.W)))) // index of the exception element
213  val vl = RegInit(VecInit(Seq.fill(bufferSize)(0.U.asTypeOf(Valid(UInt(elemIdxBits.W)))))) // only for fof instructions that modify vl
214  val srcMaskVec = Reg(Vec(bufferSize, UInt(VLEN.W)))
215  // ptr
216  val enqPtr = RegInit(0.U.asTypeOf(new VSplitPtr))
217  val deqPtr = RegInit(0.U.asTypeOf(new VSplitPtr))
218  // for split
219  val splitIdx = RegInit(0.U(flowIdxBits.W))
220  val strideOffsetReg = RegInit(0.U(VLEN.W))
221
222  /**
223    * Redirect
224    */
225  val flushed = WireInit(VecInit(Seq.fill(bufferSize)(false.B))) // entry has been flushed by the redirect arrived in the pre 1 cycle
226  val flushVec = (valid zip flushed).zip(uopq).map { case ((v, f), entry) => v && entry.uop.robIdx.needFlush(io.redirect) && !f }
227  val flushEnq = io.in.fire && io.in.bits.uop.robIdx.needFlush(io.redirect)
228  val flushNumReg = RegNext(PopCount(flushEnq +: flushVec))
229  val redirectReg = RegNext(io.redirect)
230  val flushVecReg = RegNext(WireInit(VecInit(flushVec)))
231
232  // enqueue
233  when (io.in.fire && !flushEnq) {
234    val id = enqPtr.value
235    uopq(id) := io.in.bits
236    valid(id) := true.B
237  }
238  io.in.ready := isAfter(enqPtr, deqPtr)
239
240  //split uops
241  val issueValid       = valid(deqPtr.value)
242  val issueEntry       = uopq(deqPtr.value)
243  val issueMbIndex     = uopq(deqPtr.value).mBIndex
244  val issueFlowNum     = issueEntry.flowNum
245  val issueBaseAddr    = issueEntry.baseAddr
246  val issueUop         = issueEntry.uop
247  val issueUopIdx      = issueUop.vpu.vuopIdx
248  val issueInstType    = issueEntry.instType
249  val issueUopOffset   = issueEntry.uopOffset
250  val issueEew         = issueEntry.eew
251  val issueSew         = issueEntry.sew
252  val issueLmul        = issueEntry.emul
253  val issueEmul        = issueEntry.lmul
254  val issueAlignedType = issueEntry.alignedType
255  val issuePreIsSplit  = issueEntry.preIsSplit
256  val issueByteMask    = issueEntry.byteMask
257  val elemIdx = GenElemIdx(
258    instType = issueInstType,
259    emul = issueEmul,
260    lmul = issueLmul,
261    eew = issueEew,
262    sew = issueSew,
263    uopIdx = issueUopIdx,
264    flowIdx = splitIdx
265  ) // elemIdx inside an inst, for exception
266  val indexedStride    = IndexAddr( // index for indexed instruction
267    index = issueEntry.stride,
268    flow_inner_idx = ((splitIdx << issueEew(1, 0))(vOffsetBits - 1, 0) >> issueEew(1, 0)).asUInt,
269    eew = issueEew
270  )
271  val issueStride = Mux(isIndexed(issueInstType), indexedStride, strideOffsetReg)
272  val vaddr = issueBaseAddr + issueUopOffset + issueStride
273  val mask = genVWmask128(vaddr ,issueAlignedType) // scala maske for flow
274  val flowMask = issueEntry.flowMask
275  val vecActive = (flowMask & UIntToOH(splitIdx)).orR
276  /*
277   * Unit-Stride split to one flow or two flow.
278   * for Unit-Stride, if uop's addr is aligned with 128-bits, split it to one flow, otherwise split two
279   */
280
281  val usAligned128     = (vaddr(3,0) === 0.U)// addr 128-bit aligned
282  val usSplitMask      = genUSSplitMask(issueByteMask, splitIdx, vaddr(3,0))
283  val usNoSplit        = (usAligned128 || !(vaddr(3,0) +& PopCount(usSplitMask))(4)) && !issuePreIsSplit && (splitIdx === 0.U)// unit-stride uop don't need to split into two flow
284  val usSplitVaddr     = genUSSplitAddr(vaddr, splitIdx)
285  val regOffset        = vaddr(3,0) // offset in 256-bits vd
286  XSError((splitIdx > 1.U && usNoSplit) || (splitIdx > 1.U && !issuePreIsSplit) , "Unit-Stride addr split error!\n")
287
288  // data
289  io.out.bits match { case x =>
290    x.uop                   := issueUop
291    x.vaddr                 := Mux(issuePreIsSplit, usSplitVaddr, vaddr)
292    x.alignedType           := issueAlignedType
293    x.isvec                 := true.B
294    x.mask                  := Mux(issuePreIsSplit, usSplitMask, mask)
295    x.reg_offset            := regOffset //for merge unit-stride
296    x.vecActive             := vecActive
297    x.is_first_ele          := DontCare
298    x.usSecondInv           := usNoSplit
299    x.elemIdx               := elemIdx
300    x.uop_unit_stride_fof   := DontCare
301    x.isFirstIssue          := DontCare
302    x.mBIndex               := issueMbIndex
303  }
304
305    //update enqptr
306  when (redirectReg.valid && flushNumReg =/= 0.U) {
307    enqPtr := enqPtr - flushNumReg
308  }.otherwise {
309    when (io.in.fire) {
310      enqPtr := enqPtr + 1.U
311    }
312  }
313
314  // flush queue
315  for (i <- 0 until bufferSize) {
316    when(flushVecReg(i) && redirectReg.valid && flushNumReg =/= 0.U) {
317      valid(i) := false.B
318      flushed(i) := true.B
319    }
320  }
321
322 /* Execute logic */
323  /** Issue to scala pipeline**/
324  val canIssue = Wire(Bool())
325  val allowIssue = io.out.ready
326  val doIssue = Wire(Bool())
327  val issueCount = Mux(usNoSplit, 2.U,PopCount(doIssue)) // for dont need split unit-stride, issue two flow
328
329  // handshake
330  val thisPtr = deqPtr.value
331  canIssue := !issueUop.robIdx.needFlush(io.redirect) && deqPtr < enqPtr
332  doIssue := canIssue && allowIssue
333    when (!RegNext(io.redirect.valid) || distanceBetween(enqPtr, deqPtr) > flushNumReg) {
334    when (splitIdx < (issueFlowNum - issueCount)) {
335      // The uop has not been entirly splited yet
336      splitIdx := splitIdx + issueCount
337      strideOffsetReg := strideOffsetReg + issueStride
338    }.otherwise {
339      when (doIssue) {
340        // The uop is done spliting
341        splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx
342        strideOffsetReg := 0.U
343        deqPtr := deqPtr + 1.U
344      }
345    }
346  }.otherwise {
347    splitIdx := 0.U(flowIdxBits.W) // initialize flowIdx
348    strideOffsetReg := 0.U
349  }
350
351  // out connect
352  io.out.valid := canIssue && vecActive
353}
354
355class VSSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = true){
356  override lazy val bufferSize = SplitBufferSize
357  // split data
358  val flowData = GenVSData(
359        data = issueEntry.data.asUInt,
360        elemIdx = splitIdx,
361        alignedType = issueAlignedType
362      )
363  val usSplitData      = genUSSplitData(issueEntry.data.asUInt, splitIdx, vaddr(3,0))
364
365  // send data to sq
366  val vstd = io.vstd.get
367  vstd.valid := canIssue
368  vstd.bits.uop := issueUop
369  vstd.bits.data := Mux(issuePreIsSplit, usSplitData, flowData)
370  vstd.bits.debug := DontCare
371  vstd.bits.vdIdx.get := DontCare
372  vstd.bits.vdIdxInField.get := DontCare
373}
374
375class VLSplitBufferImp(implicit p: Parameters) extends VSplitBuffer(isVStore = false){
376  override lazy val bufferSize = SplitBufferSize
377}
378
379class VSSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = true){
380}
381
382class VLSplitPipelineImp(implicit p: Parameters) extends VSplitPipeline(isVStore = false){
383}
384
385class VLSplitImp(implicit p: Parameters) extends VLSUModule{
386  val io = IO(new VSplitIO(isVStore=false))
387  val splitPipeline = Module(new VLSplitPipelineImp())
388  val splitBuffer = Module(new VLSplitBufferImp())
389  // Split Pipeline
390  splitPipeline.io.in <> io.in
391  splitPipeline.io.redirect <> io.redirect
392  io.toMergeBuffer <> splitPipeline.io.toMergeBuffer
393
394  // Split Buffer
395  splitBuffer.io.in <> splitPipeline.io.out
396  splitBuffer.io.redirect <> io.redirect
397  io.out <> splitBuffer.io.out
398}
399
400class VSSplitImp(implicit p: Parameters) extends VLSUModule{
401  val io = IO(new VSplitIO(isVStore=true))
402  val splitPipeline = Module(new VSSplitPipelineImp())
403  val splitBuffer = Module(new VSSplitBufferImp())
404  // Split Pipeline
405  splitPipeline.io.in <> io.in
406  splitPipeline.io.redirect <> io.redirect
407  io.toMergeBuffer <> splitPipeline.io.toMergeBuffer
408
409  // Split Buffer
410  splitBuffer.io.in <> splitPipeline.io.out
411  splitBuffer.io.redirect <> io.redirect
412  io.out <> splitBuffer.io.out
413  io.vstd.get <> splitBuffer.io.vstd.get
414}
415
416