xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala (revision bb2f3f51dd67f6e16e0cc1ffe43368c9fc7e4aef)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.FuType
29import freechips.rocketchip.diplomacy.BufferParams
30import xiangshan.cache.mmu._
31import xiangshan.cache._
32import xiangshan.cache.wpu.ReplayCarry
33import xiangshan.backend.fu.util.SdtrigExt
34import xiangshan.ExceptionNO._
35import xiangshan.backend.fu.vector.Bundles.VConfig
36import xiangshan.backend.fu.vector.Utils.VecDataToMaskDataVec
37
38class VSegmentBundle(implicit p: Parameters) extends VLSUBundle
39{
40  val baseVaddr        = UInt(VAddrBits.W)
41  val uop              = new DynInst
42  val paddr            = UInt(PAddrBits.W)
43  val mask             = UInt(VLEN.W)
44  val alignedType      = UInt(alignTypeBits.W)
45  val vl               = UInt(elemIdxBits.W)
46  val uopFlowNum       = UInt(elemIdxBits.W)
47  val uopFlowNumMask   = UInt(elemIdxBits.W)
48  // for exception
49  val vstart           = UInt(elemIdxBits.W)
50  val exceptionvaddr   = UInt(VAddrBits.W)
51  val exception_va     = Bool()
52  val exception_pa     = Bool()
53  val isFof            = Bool()
54}
55
56// latch each uop's VecWen, pdest, v0Wen, uopIdx
57class VSegmentUop(implicit p: Parameters) extends VLSUBundle{
58  val uop              = new DynInst
59}
60
61class VSegmentUnit (implicit p: Parameters) extends VLSUModule
62  with HasDCacheParameters
63  with MemoryOpConstants
64  with SdtrigExt
65  with HasLoadHelper
66{
67  val io               = IO(new VSegmentUnitIO)
68
69  val maxSize          = VSegmentBufferSize
70
71  class VSegUPtr(implicit p: Parameters) extends CircularQueuePtr[VSegUPtr](maxSize){
72  }
73
74  object VSegUPtr {
75    def apply(f: Bool, v: UInt)(implicit p: Parameters): VSegUPtr = {
76      val ptr           = Wire(new VSegUPtr)
77      ptr.flag         := f
78      ptr.value        := v
79      ptr
80    }
81  }
82
83
84  /**
85  ********************************************************************************************************
86  *  Use an example to illustrate the working logic of a segmentunit:                                    *
87  *    For:                                                                                              *
88  *      lmul=2 sew=32 emul=2 eew=32  vl=16                                                              *
89  *    Then:                                                                                             *
90  *      Access memory in the order:                                                                     *
91  *        (V2,S0),(V4,S0),(V6,S0),(V8,S0),                                                              *
92  *        (V2,S1),(V4,S1),(V6,S1),(V8,S1),                                                              *
93  *        (V2,S2),(V4,S2),(V6,S2),(V8,S2),                                                              *
94  *        (V2,S3),(V4,S3),(V6,S3),(V8,S3),                                                              *
95  *        (V3,S4),(V5,S4),(V7,S4),(V9,S4),                                                              *
96  *        (V3,S5),(V5,S5),(V7,S5),(V9,S5),                                                              *
97  *        (V3,S6),(V5,S6),(V7,S6),(V9,S6),                                                              *
98  *        (V3,S7),(V5,S7),(V7,S7),(V9,S7),                                                              *
99  *                                                                                                      *
100  *                                                                                                      *
101  *    [[data]] saves the data generated by the access and corresponds to the register.                  *
102  *    [[splitPtr]] controls the destination register written to.                                        *
103  *                                                                                                      *
104  *    splitptr offset can be seen in [[splitPtrNext]] is assignment logic,                              *
105  *    which is mainly calculated in terms of [[fieldIdx]] and [[segmentIdx]]                            *
106  *    First access different fields of the same segment, and then visit different segments.             *
107  *    For the case of 'emul' greater than 1, such as the following example,                             *
108  *    although 'v2' and 'v3' are different vd and the same field, they are still different segments,    *
109  *    so they should be accessed sequentially.Just like the 'Access memory in the order' above.         *
110  *                                                                                                      *
111  *                         [[segmentIdx]]                                                               *
112  *                               |                                                                      *
113  *                               |                                                                      *
114  *                               V                                                                      *
115  *                                                                                                      *
116  *                               S0               S1                S2                 S3               *
117  *                      ----------------------------------------------------------------------------    *
118  *  [[splitPtr]]--> v2  |     field0     |      field0     |      field0     |      field0         |    *
119  *                      ----------------------------------------------------------------------------    *
120  *                               S4               S5                S6                 S7               *
121  *                      ----------------------------------------------------------------------------    *
122  *                  v3  |     field0     |      field0     |      field0     |      field0         |    *
123  *                      ----------------------------------------------------------------------------    *
124  *                               S0               S1                S2                 S3               *
125  *                      ----------------------------------------------------------------------------    *
126  *                  v4  |     field1     |      field1     |      field1     |      field1         |    *
127  *                      ----------------------------------------------------------------------------    *
128  *                               S4               S5                S6                 S7               *
129  *                      ----------------------------------------------------------------------------    *
130  *                  v5  |     field1     |      field1     |      field1     |      field1         |    *
131  *                      ----------------------------------------------------------------------------    *
132  *                               S0               S1                S2                 S3               *
133  *                      ----------------------------------------------------------------------------    *
134  *                  v6  |     field2     |      field2     |      field2     |      field2         |    *
135  *                      ----------------------------------------------------------------------------    *
136  *                               S4               S5                S6                 S7               *
137  *                      ----------------------------------------------------------------------------    *
138  *                  v7  |     field2     |      field2     |      field2     |      field2         |    *
139  *                      ----------------------------------------------------------------------------    *
140  *                               S0               S1                S2                 S3               *
141  *                      ----------------------------------------------------------------------------    *
142  *                  v8  |     field3     |      field3     |      field3     |      field3         |    *
143  *                      ----------------------------------------------------------------------------    *
144  *                               S4               S5                S6                 S7               *
145  *                      ----------------------------------------------------------------------------    *
146  *                  v9  |     field3     |      field3     |      field3     |      field3         |    *
147  *                      ----------------------------------------------------------------------------    *                                                                                    *
148  *                                                                                                      *                                                                                    *
149  *                                                                                                      *                                                                                    *
150  ********************************************************************************************************
151  **/
152
153
154  // buffer uop
155  val instMicroOp       = Reg(new VSegmentBundle)
156  val instMicroOpValid  = RegInit(false.B)
157  val data              = Reg(Vec(maxSize, UInt(VLEN.W)))
158  val uopq              = Reg(Vec(maxSize, new VSegmentUop))
159  val stride            = Reg(Vec(maxSize, UInt(VLEN.W)))
160  val allocated         = RegInit(VecInit(Seq.fill(maxSize)(false.B)))
161  val enqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
162  val deqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
163  val stridePtr         = WireInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index
164
165  val segmentIdx        = RegInit(0.U(elemIdxBits.W))
166  val fieldIdx          = RegInit(0.U(fieldBits.W))
167  val segmentOffset     = RegInit(0.U(VAddrBits.W))
168  val splitPtr          = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select load/store data
169  val splitPtrNext      = WireInit(0.U.asTypeOf(new VSegUPtr))
170
171  val exception_va      = WireInit(false.B)
172  val exception_pa      = WireInit(false.B)
173
174  val maxSegIdx         = instMicroOp.vl - 1.U
175  val maxNfields        = instMicroOp.uop.vpu.nf
176  val latchVaddr        = RegInit(0.U(VAddrBits.W))
177
178  XSError((segmentIdx > maxSegIdx) && instMicroOpValid, s"segmentIdx > vl, something error!\n")
179  XSError((fieldIdx > maxNfields) &&  instMicroOpValid, s"fieldIdx > nfields, something error!\n")
180
181  // MicroOp
182  val baseVaddr                       = instMicroOp.baseVaddr
183  val alignedType                     = instMicroOp.alignedType
184  val fuType                          = instMicroOp.uop.fuType
185  val mask                            = instMicroOp.mask
186  val exceptionVec                    = instMicroOp.uop.exceptionVec
187  val issueEew                        = instMicroOp.uop.vpu.veew
188  val issueLmul                       = instMicroOp.uop.vpu.vtype.vlmul
189  val issueSew                        = instMicroOp.uop.vpu.vtype.vsew
190  val issueEmul                       = EewLog2(issueEew) - issueSew + issueLmul
191  val elemIdxInVd                     = segmentIdx & instMicroOp.uopFlowNumMask
192  val issueInstType                   = Cat(true.B, instMicroOp.uop.fuOpType(6, 5)) // always segment instruction
193  val issueUopFlowNumLog2             = GenRealFlowLog2(issueInstType, issueEmul, issueLmul, issueEew, issueSew, true) // max element number log2 in vd
194  val issueVlMax                      = instMicroOp.uopFlowNum // max elementIdx in vd
195  val issueMaxIdxInIndex              = GenVLMAX(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0)) // index element index in index register
196  val issueMaxIdxInIndexMask          = GenVlMaxMask(issueMaxIdxInIndex, elemIdxBits)
197  val issueMaxIdxInIndexLog2          = GenVLMAXLog2(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0))
198  val issueIndexIdx                   = segmentIdx & issueMaxIdxInIndexMask
199  val segmentActive                   = (mask & UIntToOH(segmentIdx)).orR
200
201  // Segment instruction's FSM
202  /*
203  * s_idle: wait request
204  * s_flush_sbuffer_req: flush sbuffer
205  * s_wait_flush_sbuffer_resp: wait sbuffer empty
206  * s_tlb_req: request tlb
207  * s_wait_tlb_resp: wait tlb resp
208  * s_pm: check pmp
209  * s_cache_req: request cache
210  * s_cache_resp: wait cache resp
211  * s_latch_and_merge_data: for read data
212  * s_send_data: for send write data
213  * s_finish:
214  * */
215  val s_idle :: s_flush_sbuffer_req :: s_wait_flush_sbuffer_resp :: s_tlb_req :: s_wait_tlb_resp :: s_pm ::s_cache_req :: s_cache_resp :: s_latch_and_merge_data :: s_send_data :: s_finish :: Nil = Enum(11)
216  val state             = RegInit(s_idle)
217  val stateNext         = WireInit(s_idle)
218  val sbufferEmpty      = io.flush_sbuffer.empty
219
220  /**
221   * state update
222   */
223  state  := stateNext
224
225  /**
226   * state transfer
227   */
228  when(state === s_idle){
229    stateNext := Mux(isAfter(enqPtr, deqPtr), s_flush_sbuffer_req, s_idle)
230  }.elsewhen(state === s_flush_sbuffer_req){
231    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) // if sbuffer is empty, go to query tlb
232
233  }.elsewhen(state === s_wait_flush_sbuffer_resp){
234    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp)
235
236  }.elsewhen(state === s_tlb_req){
237    stateNext := Mux(segmentActive, s_wait_tlb_resp, Mux(FuType.isVLoad(instMicroOp.uop.fuType), s_latch_and_merge_data, s_send_data))
238
239  }.elsewhen(state === s_wait_tlb_resp){
240    stateNext := Mux(io.dtlb.resp.fire,
241                      Mux(!io.dtlb.resp.bits.miss,
242                          s_pm,
243                          s_tlb_req),
244                      s_wait_tlb_resp)
245
246  }.elsewhen(state === s_pm){
247    /* if is vStore, send data to sbuffer, so don't need query dcache */
248    stateNext := Mux(exception_pa || exception_va,
249                     s_finish,
250                     Mux(FuType.isVLoad(instMicroOp.uop.fuType), s_cache_req, s_send_data))
251
252  }.elsewhen(state === s_cache_req){
253    stateNext := Mux(io.rdcache.req.fire, s_cache_resp, s_cache_req)
254
255  }.elsewhen(state === s_cache_resp){
256    when(io.rdcache.resp.fire) {
257      when(io.rdcache.resp.bits.miss || io.rdcache.s2_bank_conflict) {
258        stateNext := s_cache_req
259      }.otherwise {
260        stateNext := Mux(FuType.isVLoad(instMicroOp.uop.fuType), s_latch_and_merge_data, s_send_data)
261      }
262    }.otherwise{
263      stateNext := s_cache_resp
264    }
265    /* if segment is inactive, don't need to wait access all of the field */
266  }.elsewhen(state === s_latch_and_merge_data) {
267    when((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields) ||
268      ((segmentIdx === maxSegIdx) && !segmentActive)) {
269
270      stateNext := s_finish // segment instruction finish
271    }.otherwise {
272      stateNext := s_tlb_req // need continue
273    }
274    /* if segment is inactive, don't need to wait access all of the field */
275  }.elsewhen(state === s_send_data) { // when sbuffer accept data
276    when(!io.sbuffer.fire && segmentActive) {
277      stateNext := s_send_data
278    }.elsewhen(((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields)) ||
279               ((segmentIdx === maxSegIdx) && !segmentActive)) {
280
281      stateNext := s_finish // segment instruction finish
282    }.otherwise {
283      stateNext := s_tlb_req // need continue
284    }
285  }.elsewhen(state === s_finish){ // writeback uop
286    stateNext := Mux(distanceBetween(enqPtr, deqPtr) === 0.U, s_idle, s_finish)
287
288  }.otherwise{
289    stateNext := s_idle
290    XSError(true.B, s"Unknown state!\n")
291  }
292
293  /*************************************************************************
294   *                            enqueue logic
295   *************************************************************************/
296  io.in.ready                         := true.B
297  val fuOpType                         = io.in.bits.uop.fuOpType
298  val vtype                            = io.in.bits.uop.vpu.vtype
299  val mop                              = fuOpType(6, 5)
300  val instType                         = Cat(true.B, mop)
301  val eew                              = io.in.bits.uop.vpu.veew
302  val sew                              = vtype.vsew
303  val lmul                             = vtype.vlmul
304  val emul                             = EewLog2(eew) - sew + lmul
305  val vl                               = instMicroOp.vl
306  val vm                               = instMicroOp.uop.vpu.vm
307  val vstart                           = instMicroOp.uop.vpu.vstart
308  val srcMask                          = GenFlowMask(Mux(vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vstart, vl, true)
309  // first uop enqueue, we need to latch microOp of segment instruction
310  when(io.in.fire && !instMicroOpValid){
311    // element number in a vd
312    // TODO Rewrite it in a more elegant way.
313    val uopFlowNum                    = ZeroExt(GenRealFlowNum(instType, emul, lmul, eew, sew, true), elemIdxBits)
314    instMicroOp.baseVaddr             := io.in.bits.src_rs1(VAddrBits - 1, 0)
315    instMicroOpValid                  := true.B // if is first uop
316    instMicroOp.alignedType           := Mux(isIndexed(instType), sew(1, 0), eew(1, 0))
317    instMicroOp.uop                   := io.in.bits.uop
318    instMicroOp.mask                  := srcMask
319    instMicroOp.vstart                := 0.U
320    instMicroOp.uopFlowNum            := uopFlowNum
321    instMicroOp.uopFlowNumMask        := GenVlMaxMask(uopFlowNum, elemIdxBits) // for merge data
322    instMicroOp.vl                    := io.in.bits.src_vl.asTypeOf(VConfig()).vl
323    segmentOffset                     := 0.U
324    instMicroOp.isFof                 := (fuOpType === VlduType.vleff) && FuType.isVLoad(fuType)
325  }
326  // latch data
327  when(io.in.fire){
328    data(enqPtr.value)                := io.in.bits.src_vs3
329    stride(enqPtr.value)              := io.in.bits.src_stride
330    uopq(enqPtr.value).uop            := io.in.bits.uop
331  }
332
333  // update enqptr, only 1 port
334  when(io.in.fire){
335    enqPtr                            := enqPtr + 1.U
336  }
337
338  /*************************************************************************
339   *                            output logic
340   *************************************************************************/
341
342  val indexStride                     = IndexAddr( // index for indexed instruction
343                                                    index = stride(stridePtr.value),
344                                                    flow_inner_idx = issueIndexIdx,
345                                                    eew = issueEew
346                                                  )
347  val realSegmentOffset               = Mux(isIndexed(issueInstType),
348                                            indexStride,
349                                            segmentOffset)
350  val vaddr                           = baseVaddr + (fieldIdx << alignedType).asUInt + realSegmentOffset
351
352  //latch vaddr
353  when(state === s_tlb_req){
354    latchVaddr := vaddr
355  }
356  /**
357   * tlb req and tlb resq
358   */
359
360  // query DTLB IO Assign
361  io.dtlb.req                         := DontCare
362  io.dtlb.resp.ready                  := true.B
363  io.dtlb.req.valid                   := state === s_tlb_req && segmentActive
364  io.dtlb.req.bits.cmd                := Mux(FuType.isVLoad(fuType), TlbCmd.read, TlbCmd.write)
365  io.dtlb.req.bits.vaddr              := vaddr
366  io.dtlb.req.bits.size               := instMicroOp.alignedType(2,0)
367  io.dtlb.req.bits.memidx.is_ld       := FuType.isVLoad(fuType)
368  io.dtlb.req.bits.memidx.is_st       := FuType.isVStore(fuType)
369  io.dtlb.req.bits.debug.robIdx       := instMicroOp.uop.robIdx
370  io.dtlb.req.bits.no_translate       := false.B
371  io.dtlb.req.bits.debug.pc           := instMicroOp.uop.pc
372  io.dtlb.req.bits.debug.isFirstIssue := DontCare
373  io.dtlb.req_kill                    := false.B
374
375  // tlb resp
376  when(io.dtlb.resp.fire && state === s_wait_tlb_resp){
377      exceptionVec(storePageFault)    := io.dtlb.resp.bits.excp(0).pf.st
378      exceptionVec(loadPageFault)     := io.dtlb.resp.bits.excp(0).pf.ld
379      exceptionVec(storeAccessFault)  := io.dtlb.resp.bits.excp(0).af.st
380      exceptionVec(loadAccessFault)   := io.dtlb.resp.bits.excp(0).af.ld
381      when(!io.dtlb.resp.bits.miss){
382        instMicroOp.paddr             := io.dtlb.resp.bits.paddr(0)
383      }
384  }
385  // pmp
386  // NOTE: only handle load/store exception here, if other exception happens, don't send here
387  val pmp = WireInit(io.pmpResp)
388  when(state === s_pm) {
389    val addr_aligned = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
390      "b00".U   -> true.B,                   //b
391      "b01".U   -> (vaddr(0)    === 0.U), //h
392      "b10".U   -> (vaddr(1, 0) === 0.U), //w
393      "b11".U   -> (vaddr(2, 0) === 0.U)  //d
394    ))
395    val missAligned = !addr_aligned
396    exceptionVec(loadAddrMisaligned)  := !addr_aligned && FuType.isVLoad(fuType)
397    exceptionVec(storeAddrMisaligned) := !addr_aligned && !FuType.isVLoad(fuType)
398
399    exception_va := exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
400      exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault) || missAligned
401    exception_pa := pmp.st || pmp.ld
402
403    instMicroOp.exception_pa := exception_pa
404    instMicroOp.exception_va := exception_va
405    // update storeAccessFault bit
406    exceptionVec(loadAccessFault) := exceptionVec(loadAccessFault) || pmp.ld
407    exceptionVec(storeAccessFault) := exceptionVec(storeAccessFault) || pmp.st
408
409    when(exception_va || exception_pa) {
410      when(segmentIdx === 0.U || !instMicroOp.isFof) {
411        instMicroOp.exceptionvaddr := vaddr
412        instMicroOp.vl := segmentIdx // for exception
413        instMicroOp.vstart := segmentIdx // for exception
414      }.otherwise {
415        instMicroOp.vl := segmentIdx
416      }
417    }
418  }
419
420  /**
421   * flush sbuffer IO Assign
422   */
423  io.flush_sbuffer.valid           := !sbufferEmpty && (state === s_flush_sbuffer_req)
424
425
426  /**
427   * merge data for load
428   */
429  val cacheData = LookupTree(latchVaddr(3,0), List(
430    "b0000".U -> io.rdcache.resp.bits.data_delayed(63,    0),
431    "b0001".U -> io.rdcache.resp.bits.data_delayed(63,    8),
432    "b0010".U -> io.rdcache.resp.bits.data_delayed(63,   16),
433    "b0011".U -> io.rdcache.resp.bits.data_delayed(63,   24),
434    "b0100".U -> io.rdcache.resp.bits.data_delayed(63,   32),
435    "b0101".U -> io.rdcache.resp.bits.data_delayed(63,   40),
436    "b0110".U -> io.rdcache.resp.bits.data_delayed(63,   48),
437    "b0111".U -> io.rdcache.resp.bits.data_delayed(63,   56),
438    "b1000".U -> io.rdcache.resp.bits.data_delayed(127,  64),
439    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
440    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
441    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
442    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
443    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
444    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
445    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
446  ))
447  val pickData  = rdataVecHelper(alignedType(1,0), cacheData)
448  val mergedData = mergeDataWithElemIdx(
449    oldData = data(splitPtr.value),
450    newData = Seq(pickData),
451    alignedType = alignedType(1,0),
452    elemIdx = Seq(elemIdxInVd),
453    valids = Seq(true.B)
454  )
455  when(state === s_latch_and_merge_data && segmentActive){
456    data(splitPtr.value) := mergedData
457  }
458  /**
459   * split data for store
460   * */
461  val splitData = genVSData(
462    data = data(splitPtr.value),
463    elemIdx = elemIdxInVd,
464    alignedType = alignedType
465  )
466  val flowData  = genVWdata(splitData, alignedType) // TODO: connect vstd, pass vector data
467  val wmask     = genVWmask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
468
469  /**
470   * rdcache req, write request don't need to query dcache, because we write element to sbuffer
471   */
472  io.rdcache.req                    := DontCare
473  io.rdcache.req.valid              := state === s_cache_req && FuType.isVLoad(fuType)
474  io.rdcache.req.bits.cmd           := MemoryOpConstants.M_XRD
475  io.rdcache.req.bits.vaddr         := latchVaddr
476  io.rdcache.req.bits.mask          := mask
477  io.rdcache.req.bits.data          := flowData
478  io.rdcache.pf_source              := LOAD_SOURCE.U
479  io.rdcache.req.bits.id            := DontCare
480  io.rdcache.resp.ready             := true.B
481  io.rdcache.s1_paddr_dup_lsu       := instMicroOp.paddr
482  io.rdcache.s1_paddr_dup_dcache    := instMicroOp.paddr
483  io.rdcache.s1_kill                := false.B
484  io.rdcache.s2_kill                := false.B
485  if (env.FPGAPlatform){
486    io.rdcache.s0_pc                := DontCare
487    io.rdcache.s1_pc                := DontCare
488    io.rdcache.s2_pc                := DontCare
489  }else{
490    io.rdcache.s0_pc                := instMicroOp.uop.pc
491    io.rdcache.s1_pc                := instMicroOp.uop.pc
492    io.rdcache.s2_pc                := instMicroOp.uop.pc
493  }
494  io.rdcache.replacementUpdated     := false.B
495  io.rdcache.is128Req               := false.B
496
497
498  /**
499   * write data to sbuffer
500   * */
501
502  io.sbuffer.bits                  := DontCare
503  io.sbuffer.valid                 := state === s_send_data && segmentActive
504  io.sbuffer.bits.vecValid         := state === s_send_data && segmentActive
505  io.sbuffer.bits.mask             := wmask
506  io.sbuffer.bits.data             := flowData
507  io.sbuffer.bits.vaddr            := latchVaddr
508  io.sbuffer.bits.cmd              := MemoryOpConstants.M_XWR
509  io.sbuffer.bits.id               := DontCare
510  io.sbuffer.bits.addr             := instMicroOp.paddr
511
512  io.vecDifftestInfo.valid         := state === s_send_data && segmentActive
513  io.vecDifftestInfo.bits          := uopq(deqPtr.value).uop
514
515  /**
516   * update ptr
517   * */
518  private val fieldActiveWirteFinish = io.sbuffer.fire && segmentActive // writedata finish and is a active segment
519  XSError(io.sbuffer.fire && !segmentActive, "Attempt write inactive segment to sbuffer, something wrong!\n")
520
521  private val segmentInactiveFinish = ((state === s_latch_and_merge_data) || (state === s_send_data)) && !segmentActive
522
523  val splitPtrOffset = Mux(
524    isIndexed(instType),
525    Mux(lmul.asSInt < 0.S, 1.U, (1.U << lmul).asUInt),
526    Mux(emul.asSInt < 0.S, 1.U, (1.U << emul).asUInt)
527  )
528  splitPtrNext :=
529    Mux(fieldIdx === maxNfields || !segmentActive, // if segment is active, need to complete this segment, otherwise jump to next segment
530      // segment finish, By shifting 'issueUopFlowNumLog2' to the right to ensure that emul != 1 can correctly generate lateral offset.
531     (deqPtr + ((segmentIdx +& 1.U) >> issueUopFlowNumLog2).asUInt),
532      // next field.
533     (splitPtr + splitPtrOffset)
534    )
535
536  dontTouch(issueUopFlowNumLog2)
537  dontTouch(issueEmul)
538  dontTouch(splitPtrNext)
539  dontTouch(stridePtr)
540  dontTouch(segmentActive)
541
542  // update splitPtr
543  when(state === s_latch_and_merge_data || (state === s_send_data && (fieldActiveWirteFinish || !segmentActive))){
544    splitPtr := splitPtrNext
545  }.elsewhen(io.in.fire && !instMicroOpValid){
546    splitPtr := deqPtr // initial splitPtr
547  }
548
549  // update stridePtr, only use in index
550  val strideOffset = Mux(isIndexed(issueInstType), segmentIdx >> issueMaxIdxInIndexLog2, 0.U)
551  stridePtr       := deqPtr + strideOffset
552
553  // update fieldIdx
554  when(io.in.fire && !instMicroOpValid){ // init
555    fieldIdx := 0.U
556  }.elsewhen(state === s_latch_and_merge_data && segmentActive ||
557            (state === s_send_data && fieldActiveWirteFinish)){ // only if segment is active
558
559    /* next segment, only if segment complete */
560    fieldIdx := Mux(fieldIdx === maxNfields, 0.U, fieldIdx + 1.U)
561  }.elsewhen(segmentInactiveFinish){ // segment is inactive, go to next segment
562    fieldIdx := 0.U
563  }
564  //update segmentIdx
565  when(io.in.fire && !instMicroOpValid){
566    segmentIdx := 0.U
567  }.elsewhen(fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && fieldActiveWirteFinish)) &&
568             segmentIdx =/= maxSegIdx){ // next segment, only if segment is active
569
570    segmentIdx := segmentIdx + 1.U
571  }.elsewhen(segmentInactiveFinish && segmentIdx =/= maxSegIdx){ // if segment is inactive, go to next segment
572    segmentIdx := segmentIdx + 1.U
573  }
574
575  //update segmentOffset
576  /* when segment is active or segment is inactive, increase segmentOffset */
577  when((fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && fieldActiveWirteFinish))) ||
578       segmentInactiveFinish){
579
580    segmentOffset := segmentOffset + Mux(isUnitStride(issueInstType), (maxNfields +& 1.U) << issueEew(1, 0), stride(stridePtr.value))
581  }
582
583  //update deqPtr
584  when(io.uopwriteback.fire){
585    deqPtr := deqPtr + 1.U
586  }
587
588  /*************************************************************************
589   *                            dequeue logic
590   *************************************************************************/
591  val vdIdxInField = GenUopIdxInField(Mux(isIndexed(instType), issueLmul, issueEmul), uopq(deqPtr.value).uop.vpu.vuopIdx)
592  /*select mask of vd, maybe remove in feature*/
593  val realEw        = Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0))
594  val maskDataVec: Vec[UInt] = VecDataToMaskDataVec(instMicroOp.mask, realEw)
595  val maskUsed      = maskDataVec(vdIdxInField)
596
597  when(stateNext === s_idle){
598    instMicroOpValid := false.B
599  }
600  io.uopwriteback.valid               := (state === s_finish) && distanceBetween(enqPtr, deqPtr) =/= 0.U
601  io.uopwriteback.bits.uop            := uopq(deqPtr.value).uop
602  io.uopwriteback.bits.uop.vpu        := instMicroOp.uop.vpu
603  io.uopwriteback.bits.uop.exceptionVec := instMicroOp.uop.exceptionVec
604  io.uopwriteback.bits.mask.get       := instMicroOp.mask
605  io.uopwriteback.bits.data           := data(deqPtr.value)
606  io.uopwriteback.bits.vdIdx.get      := vdIdxInField
607  io.uopwriteback.bits.uop.vpu.vl     := instMicroOp.vl
608  io.uopwriteback.bits.uop.vpu.vstart := instMicroOp.vstart
609  io.uopwriteback.bits.uop.vpu.vmask  := maskUsed
610  io.uopwriteback.bits.uop.vpu.vuopIdx  := uopq(deqPtr.value).uop.vpu.vuopIdx
611  io.uopwriteback.bits.debug          := DontCare
612  io.uopwriteback.bits.vdIdxInField.get := vdIdxInField
613  io.uopwriteback.bits.uop.robIdx     := instMicroOp.uop.robIdx
614  io.uopwriteback.bits.uop.fuOpType   := instMicroOp.uop.fuOpType
615
616  //to RS
617  io.feedback.valid                   := state === s_finish && distanceBetween(enqPtr, deqPtr) =/= 0.U
618  io.feedback.bits.hit                := true.B
619  io.feedback.bits.robIdx             := instMicroOp.uop.robIdx
620  io.feedback.bits.sourceType         := DontCare
621  io.feedback.bits.flushState         := DontCare
622  io.feedback.bits.dataInvalidSqIdx   := DontCare
623  io.feedback.bits.sqIdx              := uopq(deqPtr.value).uop.sqIdx
624  io.feedback.bits.lqIdx              := uopq(deqPtr.value).uop.lqIdx
625
626  // exception
627  io.exceptionInfo                    := DontCare
628  io.exceptionInfo.bits.robidx        := instMicroOp.uop.robIdx
629  io.exceptionInfo.bits.uopidx        := uopq(deqPtr.value).uop.vpu.vuopIdx
630  io.exceptionInfo.bits.vstart        := instMicroOp.vstart
631  io.exceptionInfo.bits.vaddr         := instMicroOp.exceptionvaddr
632  io.exceptionInfo.bits.vl            := instMicroOp.vl
633  io.exceptionInfo.valid              := (state === s_finish) && instMicroOp.uop.exceptionVec.asUInt.orR && distanceBetween(enqPtr, deqPtr) =/= 0.U
634}
635
636