xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala (revision 4e187dce80a0525ee3653d4d595def1bb3500c98)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.FuType
29import freechips.rocketchip.diplomacy.BufferParams
30import xiangshan.cache.mmu._
31import xiangshan.cache._
32import xiangshan.cache.wpu.ReplayCarry
33import xiangshan.backend.fu.util.SdtrigExt
34import xiangshan.ExceptionNO._
35import xiangshan.backend.fu.vector.Bundles.VConfig
36
37class VSegmentBundle(implicit p: Parameters) extends VLSUBundle
38{
39  val vaddr            = UInt(VAddrBits.W)
40  val uop              = new DynInst
41  val paddr            = UInt(PAddrBits.W)
42  val mask             = UInt(VLEN.W)
43  val valid            = Bool()
44  val alignedType      = UInt(alignTypeBits.W)
45  val vl               = UInt(elemIdxBits.W)
46  val vlmaxInVd        = UInt(elemIdxBits.W)
47  val vlmaxMaskInVd    = UInt(elemIdxBits.W)
48  // for exception
49  val vstart           = UInt(elemIdxBits.W)
50  val exceptionvaddr   = UInt(VAddrBits.W)
51  val exception_va     = Bool()
52  val exception_pa     = Bool()
53}
54
55class VSegmentUnit (implicit p: Parameters) extends VLSUModule
56  with HasDCacheParameters
57  with MemoryOpConstants
58  with SdtrigExt
59  with HasLoadHelper
60{
61  val io               = IO(new VSegmentUnitIO)
62
63  val maxSize          = VSegmentBufferSize
64
65  class VSegUPtr(implicit p: Parameters) extends CircularQueuePtr[VSegUPtr](maxSize){
66  }
67
68  object VSegUPtr {
69    def apply(f: Bool, v: UInt)(implicit p: Parameters): VSegUPtr = {
70      val ptr           = Wire(new VSegUPtr)
71      ptr.flag         := f
72      ptr.value        := v
73      ptr
74    }
75  }
76
77  // buffer uop
78  val instMicroOp       = Reg(new VSegmentBundle)
79  val data              = Reg(Vec(maxSize, UInt(VLEN.W)))
80  val uopIdx            = Reg(Vec(maxSize, UopIdx()))
81  val stride            = Reg(Vec(maxSize, UInt(VLEN.W)))
82  val allocated         = RegInit(VecInit(Seq.fill(maxSize)(false.B)))
83  val enqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
84  val deqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
85  val stridePtr         = WireInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index
86
87  val segmentIdx        = RegInit(0.U(elemIdxBits.W))
88  val fieldIdx          = RegInit(0.U(fieldBits.W))
89  val segmentOffset     = RegInit(0.U(VAddrBits.W))
90  val splitPtr          = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select load/store data
91  val splitPtrNext      = WireInit(0.U.asTypeOf(new VSegUPtr))
92
93  val exception_va      = WireInit(false.B)
94  val exception_pa      = WireInit(false.B)
95
96  val maxSegIdx         = instMicroOp.vl
97  val maxNfields        = instMicroOp.uop.vpu.nf
98
99  XSError(segmentIdx > maxSegIdx, s"segmentIdx > vl, something error!\n")
100  XSError(fieldIdx > maxNfields, s"fieldIdx > nfields, something error!\n")
101
102  // Segment instruction's FSM
103  /*
104  * s_idle: wait request
105  * s_flush_sbuffer_req: flush sbuffer
106  * s_wait_flush_sbuffer_resp: wait sbuffer empty
107  * s_tlb_req:
108  * s_wait_tlb_resp:
109  * s_pm:
110  * s_cache_req:
111  * s_cache_resp:
112  * s_latch_and_merge_data:
113  * s_send_data:
114  * s_finish:
115  * */
116  val s_idle :: s_flush_sbuffer_req :: s_wait_flush_sbuffer_resp :: s_tlb_req :: s_wait_tlb_resp :: s_pm ::s_cache_req :: s_cache_resp :: s_latch_and_merge_data :: s_send_data :: s_finish :: Nil = Enum(11)
117  val state             = RegInit(s_idle)
118  val stateNext         = WireInit(s_idle)
119  val sbufferEmpty      = io.flush_sbuffer.empty
120
121  /**
122   * state update
123   */
124  state  := stateNext
125
126  /**
127   * state transfer
128   */
129  when(state === s_idle){
130    stateNext := Mux(isAfter(enqPtr, deqPtr), s_flush_sbuffer_req, s_idle)
131  }.elsewhen(state === s_flush_sbuffer_req){
132    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) // if sbuffer is empty, go to query tlb
133
134  }.elsewhen(state === s_wait_flush_sbuffer_resp){
135    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp)
136
137  }.elsewhen(state === s_tlb_req){
138    stateNext := s_wait_tlb_resp
139
140  }.elsewhen(state === s_wait_tlb_resp){
141    stateNext := Mux(!io.dtlb.resp.bits.miss && io.dtlb.resp.fire, s_pm, s_tlb_req)
142
143  }.elsewhen(state === s_pm){
144    stateNext := Mux(exception_pa || exception_va, s_finish, s_cache_req)
145
146  }.elsewhen(state === s_cache_req){
147    stateNext := Mux(io.wdcache.req.fire || io.rdcache.req.fire, s_cache_resp, s_cache_req)
148
149  }.elsewhen(state === s_cache_resp){
150    when(io.wdcache.resp.fire || io.rdcache.resp.fire) {
151      when(io.wdcache.resp.bits.miss && io.rdcache.resp.bits.miss) {
152        stateNext := s_cache_req
153      }.otherwise {
154        stateNext := Mux(FuType.isVLoad(instMicroOp.uop.fuType), s_latch_and_merge_data, s_send_data)
155      }
156    }.otherwise{
157      stateNext := s_cache_resp
158    }
159
160  }.elsewhen(state === s_latch_and_merge_data) {
161    when((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields)) {
162      stateNext := s_finish // segment instruction finish
163    }.otherwise {
164      stateNext := s_tlb_req // need continue
165    }
166
167  }.elsewhen(state === s_send_data) { // when sbuffer accept data
168    when(!io.sbuffer.fire) {
169      stateNext := s_send_data
170    }.elsewhen((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields)) {
171      stateNext := s_finish // segment instruction finish
172    }.otherwise {
173      stateNext := s_tlb_req // need continue
174    }
175  }.elsewhen(state === s_finish){ // writeback uop
176    stateNext := Mux(distanceBetween(enqPtr, deqPtr) === 0.U, s_idle, s_finish)
177
178  }.otherwise{
179    stateNext := s_idle
180    XSError(true.B, s"Unknown state!\n")
181  }
182
183  /*************************************************************************
184   *                            enqueue logic
185   *************************************************************************/
186  io.in.ready                         := true.B
187  val fuOpType                         = io.in.bits.uop.fuOpType
188  val vtype                            = io.in.bits.uop.vpu.vtype
189  val mop                              = fuOpType(6, 5)
190  val instType                         = Cat(true.B, mop)
191  val eew                              = io.in.bits.uop.vpu.veew
192  val sew                              = vtype.vsew
193  val lmul                             = vtype.vlmul
194  val vl                               = instMicroOp.vl
195  val vm                               = instMicroOp.uop.vpu.vm
196  val vstart                           = instMicroOp.uop.vpu.vstart
197  val srcMask                          = GenFlowMask(Mux(vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vstart, vl, true)
198  // first uop enqueue, we need to latch microOp of segment instruction
199  when(io.in.fire && !instMicroOp.valid){
200    val vlmaxInVd                      = GenVLMAX(Mux(lmul.asSInt > 0.S, 0.U, lmul), Mux(isIndexed(instType), sew(1, 0), eew(1, 0))) // element number in a vd
201    instMicroOp.vaddr                 := io.in.bits.src_rs1(VAddrBits - 1, 0)
202    instMicroOp.valid                 := true.B // if is first uop
203    instMicroOp.alignedType           := Mux(isIndexed(instType), sew(1, 0), eew(1, 0))
204    instMicroOp.uop                   := io.in.bits.uop
205    instMicroOp.mask                  := srcMask
206    instMicroOp.vstart                := 0.U
207    instMicroOp.vlmaxInVd             := vlmaxInVd
208    instMicroOp.vlmaxMaskInVd         := UIntToMask(vlmaxInVd, elemIdxBits) // for merge data
209    instMicroOp.vl                    := io.in.bits.src_vl.asTypeOf(VConfig()).vl
210    segmentOffset                     := 0.U
211    fieldIdx                          := 0.U
212  }
213  // latch data
214  when(io.in.fire){
215    data(enqPtr.value)                := io.in.bits.src_vs3
216    stride(enqPtr.value)              := io.in.bits.src_stride
217    uopIdx(enqPtr.value)              := io.in.bits.uop.vpu.vuopIdx
218  }
219
220  // update enqptr, only 1 port
221  when(io.in.fire){
222    enqPtr                            := enqPtr + 1.U
223  }
224
225  /*************************************************************************
226   *                            output logic
227   *************************************************************************/
228  // MicroOp
229  val baseVaddr                       = instMicroOp.vaddr
230  val alignedType                     = instMicroOp.alignedType
231  val fuType                          = instMicroOp.uop.fuType
232  val mask                            = instMicroOp.mask
233  val exceptionVec                    = instMicroOp.uop.exceptionVec
234  val issueEew                        = instMicroOp.uop.vpu.veew
235  val issueLmul                       = instMicroOp.uop.vpu.vtype.vlmul
236  val issueSew                        = instMicroOp.uop.vpu.vtype.vsew
237  val issueEmul                       = EewLog2(issueEew) - issueSew + issueLmul
238  val elemIdxInVd                     = segmentIdx & instMicroOp.vlmaxMaskInVd
239  val issueInstType                   = Cat(true.B, instMicroOp.uop.fuOpType(6, 5)) // always segment instruction
240  val issueVLMAXLog2                  = GenVLMAXLog2(
241                                                      Mux(issueLmul.asSInt > 0.S, 0.U, issueLmul),
242                                                      Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0))
243                                                    ) // max element number log2 in vd
244  val issueVlMax                      = instMicroOp.vlmaxInVd // max elementIdx in vd
245  val issueMaxIdxInIndex              = GenVLMAX(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew) // index element index in index register
246  val issueMaxIdxInIndexMask          = UIntToMask(issueMaxIdxInIndex, elemIdxBits)
247  val issueMaxIdxInIndexLog2          = GenVLMAXLog2(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew)
248  val issueIndexIdx                   = segmentIdx & issueMaxIdxInIndexMask
249
250  val indexStride                     = IndexAddr( // index for indexed instruction
251                                                    index = stride(stridePtr.value),
252                                                    flow_inner_idx = issueIndexIdx,
253                                                    eew = issueEew
254                                                  )
255  val realSegmentOffset               = Mux(isIndexed(issueInstType),
256                                            indexStride,
257                                            segmentOffset)
258  val vaddr                           = baseVaddr + (fieldIdx << alignedType).asUInt + realSegmentOffset
259  /**
260   * tlb req and tlb resq
261   */
262
263  // query DTLB IO Assign
264  io.dtlb.req                         := DontCare
265  io.dtlb.resp.ready                  := true.B
266  io.dtlb.req.valid                   := state === s_tlb_req
267  io.dtlb.req.bits.cmd                := Mux(FuType.isVLoad(fuType), TlbCmd.read, TlbCmd.write)
268  io.dtlb.req.bits.vaddr              := vaddr
269  io.dtlb.req.bits.size               := instMicroOp.alignedType(2,0)
270  io.dtlb.req.bits.memidx.is_ld       := FuType.isVLoad(fuType)
271  io.dtlb.req.bits.memidx.is_st       := FuType.isVStore(fuType)
272  io.dtlb.req.bits.debug.robIdx       := instMicroOp.uop.robIdx
273  io.dtlb.req.bits.no_translate       := false.B
274  io.dtlb.req.bits.debug.pc           := instMicroOp.uop.pc
275  io.dtlb.req.bits.debug.isFirstIssue := DontCare
276  io.dtlb.req_kill                    := false.B
277
278  // tlb resp
279  when(io.dtlb.resp.fire && state === s_wait_tlb_resp){
280      exceptionVec(storePageFault)    := io.dtlb.resp.bits.excp(0).pf.st
281      exceptionVec(loadPageFault)     := io.dtlb.resp.bits.excp(0).pf.ld
282      exceptionVec(storeAccessFault)  := io.dtlb.resp.bits.excp(0).af.st
283      exceptionVec(loadAccessFault)   := io.dtlb.resp.bits.excp(0).af.ld
284      when(!io.dtlb.resp.bits.miss){
285        instMicroOp.paddr             := io.dtlb.resp.bits.paddr(0)
286      }
287  }
288  // pmp
289  // NOTE: only handle load/store exception here, if other exception happens, don't send here
290  val pmp = WireInit(io.pmpResp)
291  when(state === s_pm){
292    exception_va := exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
293    exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault)
294    exception_pa := pmp.st || pmp.ld
295
296    instMicroOp.exception_pa       := exception_pa
297    instMicroOp.exception_va       := exception_va
298    // update storeAccessFault bit
299    exceptionVec(loadAccessFault)  := exceptionVec(loadAccessFault) || pmp.ld
300    exceptionVec(storeAccessFault) := exceptionVec(storeAccessFault) || pmp.st
301
302    instMicroOp.exceptionvaddr     := vaddr
303    instMicroOp.vl                 := segmentIdx // for exception
304    instMicroOp.vstart             := segmentIdx // for exception
305  }
306
307  /**
308   * flush sbuffer IO Assign
309   */
310  io.flush_sbuffer.valid           := !sbufferEmpty && (state === s_flush_sbuffer_req)
311
312
313  /**
314   * merge data for load
315   */
316  val cacheData = io.rdcache.resp.bits.data
317  val pickData  = rdataVecHelper(alignedType(1,0), cacheData)
318  val mergedData = mergeDataWithElemIdx(
319    oldData = data(splitPtr.value),
320    newData = Seq(pickData),
321    alignedType = alignedType(1,0),
322    elemIdx = Seq(elemIdxInVd),
323    valids = Seq(true.B)
324  )
325  when(state === s_latch_and_merge_data){
326    data(splitPtr.value) := mergedData
327  }
328  /**
329   * split data for store
330   * */
331  val splitData = genVSData(
332    data = data(splitPtr.value),
333    elemIdx = elemIdxInVd,
334    alignedType = alignedType
335  )
336  val flowData  = genVWdata(splitData, alignedType) // TODO: connect vstd, pass vector data
337  val wmask     = genVWmask(vaddr, alignedType(1, 0)) & mask(segmentIdx)
338
339  /**
340   * rdcache req
341   */
342  io.rdcache.req                    := DontCare
343  io.rdcache.req.valid              := state === s_cache_req && FuType.isVLoad(fuType)
344  io.rdcache.req.bits.cmd           := MemoryOpConstants.M_XRD
345  io.rdcache.req.bits.vaddr         := vaddr
346  io.rdcache.req.bits.mask          := mask
347  io.rdcache.req.bits.data          := flowData
348  io.rdcache.pf_source              := LOAD_SOURCE.U
349  io.rdcache.req.bits.id            := DontCare
350  io.rdcache.resp.ready             := true.B
351  io.rdcache.s1_paddr_dup_lsu       := instMicroOp.paddr
352  io.rdcache.s1_paddr_dup_dcache    := instMicroOp.paddr
353  io.rdcache.s1_kill                := false.B
354  io.rdcache.s2_kill                := false.B
355  if (env.FPGAPlatform){
356    io.rdcache.s0_pc                := DontCare
357    io.rdcache.s1_pc                := DontCare
358    io.rdcache.s2_pc                := DontCare
359  }else{
360    io.rdcache.s0_pc                := instMicroOp.uop.pc
361    io.rdcache.s1_pc                := instMicroOp.uop.pc
362    io.rdcache.s2_pc                := instMicroOp.uop.pc
363  }
364  io.rdcache.replacementUpdated     := false.B
365  io.rdcache.is128Req               := false.B
366
367  /**
368  * wdcache req
369  * */
370  io.wdcache.req                    := DontCare
371  io.wdcache.req.valid              := state === s_cache_req && FuType.isVStore(fuType)
372  io.wdcache.req.bits.cmd           := MemoryOpConstants.M_PFW
373  io.wdcache.req.bits.vaddr         := vaddr
374  io.wdcache.resp.ready             := true.B
375  io.wdcache.s1_paddr               := instMicroOp.paddr
376  io.wdcache.s1_kill                := false.B
377  io.wdcache.s2_kill                := false.B
378  io.wdcache.s2_pc                  := instMicroOp.uop.pc
379
380
381  /**
382   * write data to sbuffer
383   * */
384
385  io.sbuffer.bits                  := DontCare
386  io.sbuffer.valid                 := state === s_send_data
387  io.sbuffer.bits.mask             := wmask
388  io.sbuffer.bits.data             := flowData
389  io.sbuffer.bits.vaddr            := vaddr
390  io.sbuffer.bits.cmd              := MemoryOpConstants.M_XWR
391  io.sbuffer.bits.id               := DontCare
392  io.sbuffer.bits.addr             := instMicroOp.paddr
393
394  /**
395   * update ptr
396   * */
397
398  val splitPtrOffset = Mux(lmul.asSInt < 0.S, 1.U, (1.U << lmul).asUInt)
399  splitPtrNext := PriorityMux(Seq(
400    ((fieldIdx === maxNfields) && (elemIdxInVd === (issueVlMax - 1.U)))   -> (deqPtr +                     // segment finish and need access next register in group
401                                                                             (segmentIdx >> issueVLMAXLog2).asUInt),
402    (fieldIdx === maxNfields)                                             -> deqPtr,                       // segment finish
403    true.B                                                                -> (splitPtr + splitPtrOffset)   // next field
404  ))
405
406  // update splitPtr
407  when(state === s_latch_and_merge_data){
408    splitPtr := splitPtrNext
409  }.elsewhen(io.in.fire && !instMicroOp.valid){
410    splitPtr := deqPtr // initial splitPtr
411  }
412
413  // update stridePtr, only use in index
414  val strideOffset = Mux(isIndexed(issueInstType), segmentIdx >> issueMaxIdxInIndexLog2, 0.U)
415  stridePtr       := deqPtr + strideOffset
416
417  // update fieldIdx
418  when(fieldIdx === maxNfields && state === s_latch_and_merge_data){
419    fieldIdx := 0.U
420  }.elsewhen(state === s_latch_and_merge_data){
421    fieldIdx := fieldIdx + 1.U
422  }
423  //update segmentOffset
424  when(fieldIdx === maxNfields && state === s_latch_and_merge_data){
425    segmentOffset := segmentOffset + Mux(isUnitStride(issueInstType), (maxNfields +& 1.U) << issueEew, stride(stridePtr.value))
426  }
427
428  //update deqPtr
429  when(io.uopwriteback.fire){
430    deqPtr := deqPtr + 1.U
431  }
432
433  /*************************************************************************
434   *                            dequeue logic
435   *************************************************************************/
436  when(stateNext === s_idle){
437    instMicroOp.valid := false.B
438  }
439  io.uopwriteback.valid               := state === s_finish
440  io.uopwriteback.bits.uop            := instMicroOp.uop
441  io.uopwriteback.bits.mask.get       := instMicroOp.mask
442  io.uopwriteback.bits.data           := data(deqPtr.value)
443  io.uopwriteback.bits.vdIdx.get      := uopIdx(deqPtr.value)
444  io.uopwriteback.bits.uop.vpu.vl     := instMicroOp.vl
445  io.uopwriteback.bits.uop.vpu.vstart := instMicroOp.vstart
446  io.uopwriteback.bits.debug          := DontCare
447  io.uopwriteback.bits.vdIdxInField.get := DontCare
448
449  //to RS
450  io.feedback.valid                   := state === s_finish
451  io.feedback.bits.hit                := true.B
452  io.feedback.bits.robIdx             := instMicroOp.uop.robIdx
453  io.feedback.bits.sourceType         := DontCare
454  io.feedback.bits.flushState         := DontCare
455  io.feedback.bits.dataInvalidSqIdx   := DontCare
456  io.feedback.bits.uopIdx.get         := uopIdx(deqPtr.value)
457
458  // exception
459  io.exceptionAddr                    := DontCare // TODO: fix it when handle exception
460}
461
462