xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala (revision 94aa21c6009c2f39c5c5dae9c87260c78887efcc)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.{FuType, PMPRespBundle}
29import freechips.rocketchip.diplomacy.BufferParams
30import xiangshan.cache.mmu._
31import xiangshan.cache._
32import xiangshan.cache.wpu.ReplayCarry
33import xiangshan.backend.fu.util.SdtrigExt
34import xiangshan.ExceptionNO._
35import xiangshan.backend.fu.vector.Bundles.{VConfig, VType}
36import xiangshan.backend.datapath.NewPipelineConnect
37import xiangshan.backend.fu.NewCSR._
38import xiangshan.backend.fu.vector.Utils.VecDataToMaskDataVec
39
40class VSegmentBundle(implicit p: Parameters) extends VLSUBundle
41{
42  val baseVaddr        = UInt(XLEN.W)
43  val uop              = new DynInst
44  val paddr            = UInt(PAddrBits.W)
45  val mask             = UInt(VLEN.W)
46  val alignedType      = UInt(alignTypeBits.W)
47  val vl               = UInt(elemIdxBits.W)
48  val uopFlowNum       = UInt(elemIdxBits.W)
49  val uopFlowNumMask   = UInt(elemIdxBits.W)
50  val isVSegLoad       = Bool()
51  val isVSegStore      = Bool()
52  // for exception
53  val vstart           = UInt(elemIdxBits.W)
54  val exceptionVaddr   = UInt(XLEN.W)
55  val exceptionGpaddr  = UInt(XLEN.W)
56  val exceptionIsForVSnonLeafPTE = Bool()
57  val exception_va     = Bool()
58  val exception_gpa    = Bool()
59  val exception_pa     = Bool()
60  val exceptionVstart  = UInt(elemIdxBits.W)
61  // valid: have fof exception but can not trigger, need update all writebacked uop.vl with exceptionVl
62  val exceptionVl      = ValidIO(UInt(elemIdxBits.W))
63  val isFof            = Bool()
64}
65
66// latch each uop's VecWen, pdest, v0Wen, uopIdx
67class VSegmentUop(implicit p: Parameters) extends VLSUBundle{
68  val uop              = new DynInst
69}
70
71class VSegmentUnit (implicit p: Parameters) extends VLSUModule
72  with HasDCacheParameters
73  with MemoryOpConstants
74  with SdtrigExt
75  with HasLoadHelper
76{
77  val io               = IO(new VSegmentUnitIO)
78
79  val maxSize          = VSegmentBufferSize
80
81  class VSegUPtr(implicit p: Parameters) extends CircularQueuePtr[VSegUPtr](maxSize){
82  }
83
84  object VSegUPtr {
85    def apply(f: Bool, v: UInt)(implicit p: Parameters): VSegUPtr = {
86      val ptr           = Wire(new VSegUPtr)
87      ptr.flag         := f
88      ptr.value        := v
89      ptr
90    }
91  }
92
93  val maxSplitNum = 2
94
95  /**
96  ********************************************************************************************************
97  *  Use an example to illustrate the working logic of a segmentunit:                                    *
98  *    For:                                                                                              *
99  *      lmul=2 sew=32 emul=2 eew=32  vl=16                                                              *
100  *    Then:                                                                                             *
101  *      Access memory in the order:                                                                     *
102  *        (V2,S0),(V4,S0),(V6,S0),(V8,S0),                                                              *
103  *        (V2,S1),(V4,S1),(V6,S1),(V8,S1),                                                              *
104  *        (V2,S2),(V4,S2),(V6,S2),(V8,S2),                                                              *
105  *        (V2,S3),(V4,S3),(V6,S3),(V8,S3),                                                              *
106  *        (V3,S4),(V5,S4),(V7,S4),(V9,S4),                                                              *
107  *        (V3,S5),(V5,S5),(V7,S5),(V9,S5),                                                              *
108  *        (V3,S6),(V5,S6),(V7,S6),(V9,S6),                                                              *
109  *        (V3,S7),(V5,S7),(V7,S7),(V9,S7),                                                              *
110  *                                                                                                      *
111  *                                                                                                      *
112  *    [[data]] saves the data generated by the access and corresponds to the register.                  *
113  *    [[splitPtr]] controls the destination register written to.                                        *
114  *                                                                                                      *
115  *    splitptr offset can be seen in [[splitPtrNext]] is assignment logic,                              *
116  *    which is mainly calculated in terms of [[fieldIdx]] and [[segmentIdx]]                            *
117  *    First access different fields of the same segment, and then visit different segments.             *
118  *    For the case of 'emul' greater than 1, such as the following example,                             *
119  *    although 'v2' and 'v3' are different vd and the same field, they are still different segments,    *
120  *    so they should be accessed sequentially.Just like the 'Access memory in the order' above.         *
121  *                                                                                                      *
122  *                         [[segmentIdx]]                                                               *
123  *                               |                                                                      *
124  *                               |                                                                      *
125  *                               V                                                                      *
126  *                                                                                                      *
127  *                               S0               S1                S2                 S3               *
128  *                      ----------------------------------------------------------------------------    *
129  *  [[splitPtr]]--> v2  |     field0     |      field0     |      field0     |      field0         |    *
130  *                      ----------------------------------------------------------------------------    *
131  *                               S4               S5                S6                 S7               *
132  *                      ----------------------------------------------------------------------------    *
133  *                  v3  |     field0     |      field0     |      field0     |      field0         |    *
134  *                      ----------------------------------------------------------------------------    *
135  *                               S0               S1                S2                 S3               *
136  *                      ----------------------------------------------------------------------------    *
137  *                  v4  |     field1     |      field1     |      field1     |      field1         |    *
138  *                      ----------------------------------------------------------------------------    *
139  *                               S4               S5                S6                 S7               *
140  *                      ----------------------------------------------------------------------------    *
141  *                  v5  |     field1     |      field1     |      field1     |      field1         |    *
142  *                      ----------------------------------------------------------------------------    *
143  *                               S0               S1                S2                 S3               *
144  *                      ----------------------------------------------------------------------------    *
145  *                  v6  |     field2     |      field2     |      field2     |      field2         |    *
146  *                      ----------------------------------------------------------------------------    *
147  *                               S4               S5                S6                 S7               *
148  *                      ----------------------------------------------------------------------------    *
149  *                  v7  |     field2     |      field2     |      field2     |      field2         |    *
150  *                      ----------------------------------------------------------------------------    *
151  *                               S0               S1                S2                 S3               *
152  *                      ----------------------------------------------------------------------------    *
153  *                  v8  |     field3     |      field3     |      field3     |      field3         |    *
154  *                      ----------------------------------------------------------------------------    *
155  *                               S4               S5                S6                 S7               *
156  *                      ----------------------------------------------------------------------------    *
157  *                  v9  |     field3     |      field3     |      field3     |      field3         |    *
158  *                      ----------------------------------------------------------------------------    *                                                                                    *
159  *                                                                                                      *                                                                                    *
160  *                                                                                                      *                                                                                    *
161  ********************************************************************************************************
162  **/
163
164
165  // buffer uop
166  val instMicroOp       = Reg(new VSegmentBundle)
167  val instMicroOpValid  = RegInit(false.B)
168  val data              = Reg(Vec(maxSize, UInt(VLEN.W)))
169  val uopq              = Reg(Vec(maxSize, new VSegmentUop))
170  val stride            = Reg(Vec(maxSize, UInt(VLEN.W)))
171  val allocated         = RegInit(VecInit(Seq.fill(maxSize)(false.B)))
172  val enqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
173  val deqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
174  val stridePtr         = WireInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index
175
176  val segmentIdx        = RegInit(0.U(elemIdxBits.W))
177  val fieldIdx          = RegInit(0.U(fieldBits.W))
178  val segmentOffset     = RegInit(0.U(XLEN.W))
179  val splitPtr          = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select load/store data
180  val splitPtrNext      = WireInit(0.U.asTypeOf(new VSegUPtr))
181
182  val exception_va      = WireInit(false.B)
183  val exception_gpa     = WireInit(false.B)
184  val exception_pa      = WireInit(false.B)
185
186  val maxSegIdx         = instMicroOp.vl - 1.U
187  val maxNfields        = instMicroOp.uop.vpu.nf
188  val latchVaddr        = RegInit(0.U(VAddrBits.W))
189
190  XSError((segmentIdx > maxSegIdx) && instMicroOpValid, s"segmentIdx > vl, something error!\n")
191  XSError((fieldIdx > maxNfields) &&  instMicroOpValid, s"fieldIdx > nfields, something error!\n")
192
193  // MicroOp
194  val baseVaddr                       = instMicroOp.baseVaddr
195  val alignedType                     = instMicroOp.alignedType
196  val fuType                          = instMicroOp.uop.fuType
197  val isVSegLoad                      = instMicroOp.isVSegLoad
198  val isVSegStore                     = instMicroOp.isVSegStore
199  val mask                            = instMicroOp.mask
200  val exceptionVec                    = instMicroOp.uop.exceptionVec
201  val issueEew                        = instMicroOp.uop.vpu.veew
202  val issueLmul                       = instMicroOp.uop.vpu.vtype.vlmul
203  val issueSew                        = instMicroOp.uop.vpu.vtype.vsew
204  val issueEmul                       = EewLog2(issueEew) - issueSew + issueLmul
205  val elemIdxInVd                     = segmentIdx & instMicroOp.uopFlowNumMask
206  val issueInstType                   = Cat(true.B, instMicroOp.uop.fuOpType(6, 5)) // always segment instruction
207  val issueUopFlowNumLog2             = GenRealFlowLog2(issueInstType, issueEmul, issueLmul, issueEew, issueSew, true) // max element number log2 in vd
208  val issueVlMax                      = instMicroOp.uopFlowNum // max elementIdx in vd
209  val issueMaxIdxInIndex              = GenVLMAX(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0)) // index element index in index register
210  val issueMaxIdxInIndexMask          = GenVlMaxMask(issueMaxIdxInIndex, elemIdxBits)
211  val issueMaxIdxInIndexLog2          = GenVLMAXLog2(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0))
212  val issueIndexIdx                   = segmentIdx & issueMaxIdxInIndexMask
213  val segmentActive                   = (mask & UIntToOH(segmentIdx)).orR
214
215  // sbuffer write interface
216  val sbufferOut                      = Wire(Decoupled(new DCacheWordReqWithVaddrAndPfFlag))
217
218
219  // segment fof instrction buffer
220  val fofBuffer                       = RegInit(0.U.asTypeOf(new DynInst))
221  val fofBufferValid                  = RegInit(false.B)
222
223
224  // Segment instruction's FSM
225  /*
226  * s_idle: wait request
227  * s_flush_sbuffer_req: flush sbuffer
228  * s_wait_flush_sbuffer_resp: wait sbuffer empty
229  * s_tlb_req: request tlb
230  * s_wait_tlb_resp: wait tlb resp
231  * s_pm: check pmp
232  * s_cache_req: request cache
233  * s_cache_resp: wait cache resp
234  * s_misalign_merge_data: merge unaligned data
235  * s_latch_and_merge_data: for read data
236  * s_send_data: for send write data
237  * s_wait_to_sbuffer: Wait for data from the sbufferOut pipelayer to be sent to the sbuffer
238  * s_finish: normal uop is complete
239  * s_fof_fix_vl: Writeback the uop of the fof instruction to modify vl.
240  * */
241  val s_idle :: s_flush_sbuffer_req :: s_wait_flush_sbuffer_resp :: s_tlb_req :: s_wait_tlb_resp :: s_pm ::s_cache_req :: s_cache_resp :: s_misalign_merge_data :: s_latch_and_merge_data :: s_send_data :: s_wait_to_sbuffer :: s_finish :: s_fof_fix_vl :: Nil = Enum(14)
242  val state             = RegInit(s_idle)
243  val stateNext         = WireInit(s_idle)
244  val sbufferEmpty      = io.flush_sbuffer.empty
245  val isEnqfof          = io.in.bits.uop.fuOpType === VlduType.vleff && io.in.valid
246  val isEnqFixVlUop     = isEnqfof && io.in.bits.uop.vpu.lastUop
247
248  // handle misalign sign
249  val curPtr             = RegInit(false.B)
250  val canHandleMisalign  = WireInit(false.B)
251  val isMisalignReg      = RegInit(false.B)
252  val isMisalignWire     = WireInit(false.B)
253  val notCross16ByteReg  = RegInit(false.B)
254  val notCross16ByteWire = WireInit(false.B)
255  val combinedData       = RegInit(0.U(XLEN.W))
256
257  val lowPagePaddr       = RegInit(0.U(PAddrBits.W))
258  val lowPageGPaddr      = RegInit(0.U(GPAddrBits.W))
259
260  val highPagePaddr      = RegInit(0.U(PAddrBits.W))
261  val highPageGPaddr     = RegInit(0.U(GPAddrBits.W))
262
263  val isFirstSplit       = !curPtr
264  val isSecondSplit      = curPtr
265  /**
266   * state update
267   */
268  state  := stateNext
269
270  /**
271   * state transfer
272   */
273  when(state === s_idle){
274    stateNext := Mux(isAfter(enqPtr, deqPtr), s_flush_sbuffer_req, s_idle)
275  }.elsewhen(state === s_flush_sbuffer_req){
276    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) // if sbuffer is empty, go to query tlb
277
278  }.elsewhen(state === s_wait_flush_sbuffer_resp){
279    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp)
280
281  }.elsewhen(state === s_tlb_req){
282    stateNext := Mux(segmentActive, s_wait_tlb_resp, Mux(isVSegLoad, s_latch_and_merge_data, s_send_data))
283
284  }.elsewhen(state === s_wait_tlb_resp){
285    stateNext := Mux(io.dtlb.resp.fire,
286                      Mux(!io.dtlb.resp.bits.miss,
287                          s_pm,
288                          s_tlb_req),
289                      s_wait_tlb_resp)
290
291  }.elsewhen(state === s_pm){
292    when(exception_pa || exception_va || exception_gpa) {
293      stateNext := s_finish
294    } .otherwise {
295      when(canHandleMisalign && isMisalignWire && !notCross16ByteWire || (isMisalignReg && !notCross16ByteReg && isFirstSplit && isVSegStore)) {
296        stateNext := s_tlb_req
297      } .otherwise {
298        /* if is vStore, send data to sbuffer, so don't need query dcache */
299        stateNext := Mux(isVSegLoad, s_cache_req, s_send_data)
300      }
301    }
302
303  }.elsewhen(state === s_cache_req){
304    stateNext := Mux(io.rdcache.req.fire, s_cache_resp, s_cache_req)
305
306  }.elsewhen(state === s_cache_resp){
307    when(io.rdcache.resp.fire) {
308      when(io.rdcache.resp.bits.miss || io.rdcache.s2_bank_conflict) {
309        stateNext := s_cache_req
310      }.otherwise {
311
312        stateNext := Mux(isVSegLoad, Mux(isMisalignReg && !notCross16ByteReg, s_misalign_merge_data, s_latch_and_merge_data), s_send_data)
313      }
314    }.otherwise{
315      stateNext := s_cache_resp
316    }
317  }.elsewhen(state === s_misalign_merge_data) {
318    stateNext := Mux(!curPtr, s_tlb_req, s_latch_and_merge_data)
319  }.elsewhen(state === s_latch_and_merge_data) {
320    when((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields) ||
321      ((segmentIdx === maxSegIdx) && !segmentActive)) {
322
323      stateNext := s_finish // segment instruction finish
324    }.otherwise {
325      stateNext := s_tlb_req // need continue
326    }
327    /* if segment is inactive, don't need to wait access all of the field */
328  }.elsewhen(state === s_send_data) { // when sbuffer accept data
329    when(!sbufferOut.fire && segmentActive || (isMisalignReg && !notCross16ByteReg && isFirstSplit)) {
330      stateNext := s_send_data
331    }.elsewhen(segmentIdx === maxSegIdx && (fieldIdx === maxNfields && sbufferOut.fire || !segmentActive && io.sbuffer.valid && !io.sbuffer.ready)) {
332      stateNext := s_wait_to_sbuffer
333    }.elsewhen(segmentIdx === maxSegIdx && !segmentActive){
334      stateNext := s_finish // segment instruction finish
335    }.otherwise {
336      stateNext := s_tlb_req // need continue
337    }
338
339  }.elsewhen(state === s_wait_to_sbuffer){
340    stateNext := Mux(io.sbuffer.fire, s_finish, s_wait_to_sbuffer)
341
342  }.elsewhen(state === s_finish){ // writeback uop
343    stateNext := Mux(
344      distanceBetween(enqPtr, deqPtr) === 0.U,
345      Mux(fofBufferValid, s_fof_fix_vl, s_idle),
346      s_finish
347    )
348  }.elsewhen(state === s_fof_fix_vl){ // writeback uop
349    stateNext := Mux(!fofBufferValid, s_idle, s_fof_fix_vl)
350  }.otherwise{ // unknown state
351    stateNext := s_idle
352    assert(false.B)
353  }
354
355  /*************************************************************************
356   *                            enqueue logic
357   *************************************************************************/
358  io.in.ready                         := true.B
359  val fuOpType                         = io.in.bits.uop.fuOpType
360  val vtype                            = io.in.bits.uop.vpu.vtype
361  val mop                              = fuOpType(6, 5)
362  val instType                         = Cat(true.B, mop)
363  val eew                              = io.in.bits.uop.vpu.veew
364  val sew                              = vtype.vsew
365  val lmul                             = vtype.vlmul
366  val emul                             = EewLog2(eew) - sew + lmul
367  val vl                               = instMicroOp.vl
368  val vm                               = instMicroOp.uop.vpu.vm
369  val vstart                           = instMicroOp.uop.vpu.vstart
370  val srcMask                          = GenFlowMask(Mux(vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vstart, vl, true)
371  // first uop enqueue, we need to latch microOp of segment instruction
372  when(io.in.fire && !instMicroOpValid && !isEnqFixVlUop){
373    // element number in a vd
374    // TODO Rewrite it in a more elegant way.
375    val uopFlowNum                    = ZeroExt(GenRealFlowNum(instType, emul, lmul, eew, sew, true), elemIdxBits)
376    instMicroOp.baseVaddr             := io.in.bits.src_rs1
377    instMicroOpValid                  := true.B // if is first uop
378    instMicroOp.alignedType           := Mux(isIndexed(instType), sew(1, 0), eew)
379    instMicroOp.uop                   := io.in.bits.uop
380    instMicroOp.mask                  := srcMask
381    instMicroOp.vstart                := 0.U
382    instMicroOp.uopFlowNum            := uopFlowNum
383    instMicroOp.uopFlowNumMask        := GenVlMaxMask(uopFlowNum, elemIdxBits) // for merge data
384    instMicroOp.vl                    := io.in.bits.src_vl.asTypeOf(VConfig()).vl
385    instMicroOp.exceptionVl.valid     := false.B
386    instMicroOp.exceptionVl.bits      := io.in.bits.src_vl.asTypeOf(VConfig()).vl
387    segmentOffset                     := 0.U
388    instMicroOp.isFof                 := (fuOpType === VlduType.vleff) && FuType.isVSegLoad(io.in.bits.uop.fuType)
389    instMicroOp.isVSegLoad            := FuType.isVSegLoad(io.in.bits.uop.fuType)
390    instMicroOp.isVSegStore           := FuType.isVSegStore(io.in.bits.uop.fuType)
391    isMisalignReg                     := false.B
392    notCross16ByteReg                 := false.B
393  }
394  // latch data
395  when(io.in.fire && !isEnqFixVlUop){
396    data(enqPtr.value)                := io.in.bits.src_vs3
397    stride(enqPtr.value)              := io.in.bits.src_stride
398    uopq(enqPtr.value).uop            := io.in.bits.uop
399  }
400
401  // update enqptr, only 1 port
402  when(io.in.fire && !isEnqFixVlUop){
403    enqPtr                            := enqPtr + 1.U
404  }
405
406  /*************************************************************************
407   *                            output logic
408   *************************************************************************/
409
410  val indexStride                     = IndexAddr( // index for indexed instruction
411                                                    index = stride(stridePtr.value),
412                                                    flow_inner_idx = issueIndexIdx,
413                                                    eew = issueEew
414                                                  )
415  val realSegmentOffset               = Mux(isIndexed(issueInstType),
416                                            indexStride,
417                                            segmentOffset)
418  val vaddr                           = baseVaddr + (fieldIdx << alignedType).asUInt + realSegmentOffset
419
420  val misalignLowVaddr                = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W))
421  val misalignHighVaddr               = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W)) + 8.U
422  val notCross16ByteVaddr             = Cat(latchVaddr(latchVaddr.getWidth - 1, 4), 0.U(4.W))
423//  val misalignVaddr                   = Mux(notCross16ByteReg, notCross16ByteVaddr, Mux(isFirstSplit, misalignLowVaddr, misalignHighVaddr))
424  val misalignVaddr                   = Mux(isFirstSplit, misalignLowVaddr, misalignHighVaddr)
425  val tlbReqVaddr                     = Mux(isMisalignReg, misalignVaddr, vaddr)
426  //latch vaddr
427  when(state === s_tlb_req && !isMisalignReg){
428    latchVaddr := vaddr(VAddrBits - 1, 0)
429  }
430  /**
431   * tlb req and tlb resq
432   */
433
434  // query DTLB IO Assign
435  io.dtlb.req                         := DontCare
436  io.dtlb.resp.ready                  := true.B
437  io.dtlb.req.valid                   := state === s_tlb_req && segmentActive
438  io.dtlb.req.bits.cmd                := Mux(isVSegLoad, TlbCmd.read, TlbCmd.write)
439  io.dtlb.req.bits.vaddr              := tlbReqVaddr(VAddrBits - 1, 0)
440  io.dtlb.req.bits.fullva             := tlbReqVaddr
441  io.dtlb.req.bits.checkfullva        := true.B
442  io.dtlb.req.bits.size               := instMicroOp.alignedType(2,0)
443  io.dtlb.req.bits.memidx.is_ld       := isVSegLoad
444  io.dtlb.req.bits.memidx.is_st       := isVSegStore
445  io.dtlb.req.bits.debug.robIdx       := instMicroOp.uop.robIdx
446  io.dtlb.req.bits.no_translate       := false.B
447  io.dtlb.req.bits.debug.pc           := instMicroOp.uop.pc
448  io.dtlb.req.bits.debug.isFirstIssue := DontCare
449  io.dtlb.req_kill                    := false.B
450
451  val canTriggerException              = segmentIdx === 0.U || !instMicroOp.isFof // only elementIdx = 0 or is not fof can trigger
452
453  val segmentTrigger = Module(new VSegmentTrigger)
454  segmentTrigger.io.fromCsrTrigger.tdataVec             := io.fromCsrTrigger.tdataVec
455  segmentTrigger.io.fromCsrTrigger.tEnableVec           := io.fromCsrTrigger.tEnableVec
456  segmentTrigger.io.fromCsrTrigger.triggerCanRaiseBpExp := io.fromCsrTrigger.triggerCanRaiseBpExp
457  segmentTrigger.io.fromCsrTrigger.debugMode            := io.fromCsrTrigger.debugMode
458  segmentTrigger.io.memType                             := isVSegLoad
459  segmentTrigger.io.fromLoadStore.vaddr                 := Mux(isMisalignReg, misalignVaddr, latchVaddr)
460  segmentTrigger.io.fromLoadStore.isVectorUnitStride    := false.B
461  segmentTrigger.io.fromLoadStore.mask                  := 0.U
462
463  val triggerAction = segmentTrigger.io.toLoadStore.triggerAction
464  val triggerDebugMode = TriggerAction.isDmode(triggerAction)
465  val triggerBreakpoint = TriggerAction.isExp(triggerAction)
466
467  // tlb resp
468  when(io.dtlb.resp.fire && state === s_wait_tlb_resp){
469      exceptionVec(storePageFault)      := io.dtlb.resp.bits.excp(0).pf.st
470      exceptionVec(loadPageFault)       := io.dtlb.resp.bits.excp(0).pf.ld
471      exceptionVec(storeGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.st
472      exceptionVec(loadGuestPageFault)  := io.dtlb.resp.bits.excp(0).gpf.ld
473      exceptionVec(storeAccessFault)    := io.dtlb.resp.bits.excp(0).af.st
474      exceptionVec(loadAccessFault)     := io.dtlb.resp.bits.excp(0).af.ld
475      when(!io.dtlb.resp.bits.miss){
476        instMicroOp.paddr             := io.dtlb.resp.bits.paddr(0)
477        instMicroOp.exceptionVaddr    := io.dtlb.resp.bits.fullva
478        instMicroOp.exceptionGpaddr   := io.dtlb.resp.bits.gpaddr(0)
479        instMicroOp.exceptionIsForVSnonLeafPTE  := io.dtlb.resp.bits.isForVSnonLeafPTE
480        lowPagePaddr  := Mux(isMisalignReg && !notCross16ByteReg && !curPtr, io.dtlb.resp.bits.paddr(0), lowPagePaddr)
481        lowPageGPaddr := Mux(isMisalignReg && !notCross16ByteReg && !curPtr, io.dtlb.resp.bits.gpaddr(0), lowPageGPaddr)
482
483        highPagePaddr  := Mux(isMisalignReg && !notCross16ByteReg && curPtr, io.dtlb.resp.bits.paddr(0), highPagePaddr)
484        highPageGPaddr := Mux(isMisalignReg && !notCross16ByteReg && curPtr, io.dtlb.resp.bits.gpaddr(0), highPageGPaddr)
485      }
486  }
487  // pmp
488  // NOTE: only handle load/store exception here, if other exception happens, don't send here
489  val exceptionWithPf = exceptionVec(storePageFault) || exceptionVec(loadPageFault) || exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
490  val pmp = (io.pmpResp.asUInt & Fill(io.pmpResp.asUInt.getWidth, !exceptionWithPf)).asTypeOf(new PMPRespBundle())
491  when(state === s_pm) {
492    val highAddress = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
493      "b00".U -> 0.U,
494      "b01".U -> 1.U,
495      "b10".U -> 3.U,
496      "b11".U -> 7.U
497    )) + tlbReqVaddr(4, 0)
498
499    val addr_aligned = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
500      "b00".U   -> true.B,                   //b
501      "b01".U   -> (tlbReqVaddr(0)    === 0.U), //h
502      "b10".U   -> (tlbReqVaddr(1, 0) === 0.U), //w
503      "b11".U   -> (tlbReqVaddr(2, 0) === 0.U)  //d
504    ))
505
506    notCross16ByteWire   := highAddress(4) === tlbReqVaddr(4)
507    isMisalignWire       := !addr_aligned
508    canHandleMisalign := !pmp.mmio && !triggerBreakpoint && !triggerDebugMode
509    exceptionVec(loadAddrMisaligned)  := isMisalignWire && isVSegLoad  && canTriggerException && !canHandleMisalign
510    exceptionVec(storeAddrMisaligned) := isMisalignWire && isVSegStore && canTriggerException && !canHandleMisalign
511
512    exception_va  := exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
513                     exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault) ||
514                     triggerBreakpoint || triggerDebugMode || (isMisalignWire && !canHandleMisalign)
515    exception_gpa := exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
516    exception_pa  := pmp.st || pmp.ld || pmp.mmio
517
518    instMicroOp.exception_pa  := exception_pa
519    instMicroOp.exception_va  := exception_va
520    instMicroOp.exception_gpa := exception_gpa
521    // update storeAccessFault bit. Currently, we don't support vector MMIO
522    exceptionVec(loadAccessFault)  := (exceptionVec(loadAccessFault) || pmp.ld || pmp.mmio)   && isVSegLoad  && canTriggerException
523    exceptionVec(storeAccessFault) := (exceptionVec(storeAccessFault) || pmp.st || pmp.mmio)  && isVSegStore && canTriggerException
524    exceptionVec(breakPoint)       := triggerBreakpoint && canTriggerException
525
526    exceptionVec(storePageFault)      := exceptionVec(storePageFault)      && isVSegStore && canTriggerException
527    exceptionVec(loadPageFault)       := exceptionVec(loadPageFault)       && isVSegLoad  && canTriggerException
528    exceptionVec(storeGuestPageFault) := exceptionVec(storeGuestPageFault) && isVSegStore && canTriggerException
529    exceptionVec(loadGuestPageFault)  := exceptionVec(loadGuestPageFault)  && isVSegLoad  && canTriggerException
530
531    when(exception_va || exception_gpa || exception_pa) {
532      when(canTriggerException) {
533        instMicroOp.exceptionVstart := segmentIdx // for exception
534      }.otherwise {
535        instMicroOp.exceptionVl.valid := true.B
536        instMicroOp.exceptionVl.bits := segmentIdx
537      }
538    }
539
540    when(exceptionVec(breakPoint) || triggerDebugMode) {
541      instMicroOp.uop.trigger := triggerAction
542    }
543
544    when(isMisalignWire && canHandleMisalign && !(exception_va || exception_gpa || exception_pa)) {
545      notCross16ByteReg := notCross16ByteWire
546      isMisalignReg       := true.B
547      curPtr              := false.B
548    }
549  }
550
551  /**
552   * flush sbuffer IO Assign
553   */
554  io.flush_sbuffer.valid           := !sbufferEmpty && (state === s_flush_sbuffer_req)
555
556  /**
557  * update curPtr
558  * */
559  when(state === s_finish || state === s_latch_and_merge_data || state === s_send_data && stateNext =/= s_send_data) {
560    isMisalignReg     := false.B
561    notCross16ByteReg := false.B
562    curPtr := false.B
563  } .otherwise {
564    when(isVSegLoad) {
565      when(isMisalignReg && !notCross16ByteReg && state === s_misalign_merge_data) {
566        curPtr := true.B
567      }
568    } .otherwise {
569      when(isMisalignReg && !notCross16ByteReg && state === s_pm) {
570        curPtr := !curPtr
571      } .elsewhen(isMisalignReg && !notCross16ByteReg && state === s_pm && stateNext === s_send_data) {
572        curPtr := false.B
573      } .elsewhen(isMisalignReg && !notCross16ByteReg && state === s_send_data && stateNext === s_send_data && sbufferOut.fire) {
574        curPtr := !curPtr
575      }
576    }
577  }
578
579
580
581  /**
582   * merge data for load
583   */
584  val cacheData = LookupTree(latchVaddr(3,0), List(
585    "b0000".U -> io.rdcache.resp.bits.data_delayed(63,    0),
586    "b0001".U -> io.rdcache.resp.bits.data_delayed(63,    8),
587    "b0010".U -> io.rdcache.resp.bits.data_delayed(63,   16),
588    "b0011".U -> io.rdcache.resp.bits.data_delayed(63,   24),
589    "b0100".U -> io.rdcache.resp.bits.data_delayed(63,   32),
590    "b0101".U -> io.rdcache.resp.bits.data_delayed(63,   40),
591    "b0110".U -> io.rdcache.resp.bits.data_delayed(63,   48),
592    "b0111".U -> io.rdcache.resp.bits.data_delayed(63,   56),
593    "b1000".U -> io.rdcache.resp.bits.data_delayed(127,  64),
594    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
595    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
596    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
597    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
598    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
599    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
600    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
601  ))
602
603  val misalignLowData  = LookupTree(latchVaddr(3,0), List(
604    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
605    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
606    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
607    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
608    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
609    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
610    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
611  ))
612
613  val misalignCombinedData = LookupTree(latchVaddr(3,0), List(
614    "b1001".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(55,    0))(63, 0),
615    "b1010".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(47,    0))(63, 0),
616    "b1011".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(39,    0))(63, 0),
617    "b1100".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(31,    0))(63, 0),
618    "b1101".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(23,    0))(63, 0),
619    "b1110".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(15,    0))(63, 0),
620    "b1111".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(7,     0))(63, 0)
621  ))
622  when(state === s_misalign_merge_data && segmentActive){
623    when(!curPtr) {
624      combinedData := misalignLowData
625    } .otherwise {
626      combinedData := misalignCombinedData
627    }
628  }
629
630  val shiftData    = (io.rdcache.resp.bits.data_delayed >> (latchVaddr(3, 0) << 3)).asUInt(63, 0)
631  val mergemisalignData = Mux(notCross16ByteReg, shiftData, combinedData)
632  val pickData  = rdataVecHelper(alignedType(1,0), Mux(isMisalignReg, mergemisalignData, cacheData))
633  val mergedData = mergeDataWithElemIdx(
634    oldData = data(splitPtr.value),
635    newData = Seq(pickData),
636    alignedType = alignedType(1,0),
637    elemIdx = Seq(elemIdxInVd),
638    valids = Seq(true.B)
639  )
640  when(state === s_latch_and_merge_data && segmentActive){
641    data(splitPtr.value) := mergedData
642  }
643
644
645  /**
646   * split data for store
647   * */
648  val splitData = genVSData(
649    data = data(splitPtr.value),
650    elemIdx = elemIdxInVd,
651    alignedType = alignedType
652  )
653  val flowData  = genVWdata(splitData, alignedType) // TODO: connect vstd, pass vector data
654  val wmask     = genVWmask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
655  val bmask     = genBasemask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
656  val dcacheReqVaddr = Mux(isMisalignReg, misalignVaddr, latchVaddr)
657  val dcacheReqPaddr = Mux(isMisalignReg, Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, PageOffsetWidth), misalignVaddr(PageOffsetWidth - 1, 0)), instMicroOp.paddr)
658  /**
659   * rdcache req, write request don't need to query dcache, because we write element to sbuffer
660   */
661  io.rdcache.req                    := DontCare
662  io.rdcache.req.valid              := state === s_cache_req && isVSegLoad
663  io.rdcache.req.bits.cmd           := MemoryOpConstants.M_XRD
664  io.rdcache.req.bits.vaddr         := dcacheReqVaddr
665  io.rdcache.req.bits.mask          := mask
666  io.rdcache.req.bits.data          := flowData
667  io.rdcache.pf_source              := LOAD_SOURCE.U
668  io.rdcache.req.bits.id            := DontCare
669  io.rdcache.resp.ready             := true.B
670  io.rdcache.s1_paddr_dup_lsu       := dcacheReqPaddr
671  io.rdcache.s1_paddr_dup_dcache    := dcacheReqPaddr
672  io.rdcache.s1_kill                := false.B
673  io.rdcache.s1_kill_data_read      := false.B
674  io.rdcache.s2_kill                := false.B
675  if (env.FPGAPlatform){
676    io.rdcache.s0_pc                := DontCare
677    io.rdcache.s1_pc                := DontCare
678    io.rdcache.s2_pc                := DontCare
679  }else{
680    io.rdcache.s0_pc                := instMicroOp.uop.pc
681    io.rdcache.s1_pc                := instMicroOp.uop.pc
682    io.rdcache.s2_pc                := instMicroOp.uop.pc
683  }
684  io.rdcache.replacementUpdated     := false.B
685  io.rdcache.is128Req               := notCross16ByteReg
686
687
688  /**
689   * write data to sbuffer
690   * */
691  val sbufferAddrLow4bit = latchVaddr(3, 0)
692
693  val notCross16BytePaddr          = Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, 4), 0.U(4.W))
694  val notCross16ByteData           = flowData << (sbufferAddrLow4bit << 3)
695
696  val Cross16ByteMask = Wire(UInt(32.W))
697  val Cross16ByteData = Wire(UInt(256.W))
698  Cross16ByteMask := bmask << sbufferAddrLow4bit
699  Cross16ByteData := flowData << (sbufferAddrLow4bit << 3)
700
701  val vaddrLow  = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W))
702  val vaddrHigh = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W)) + 8.U
703
704
705  val paddrLow  = Cat(lowPagePaddr(lowPagePaddr.getWidth - 1, 3), 0.U(3.W))
706  val paddrHigh = Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, 3), 0.U(3.W))
707
708  val maskLow   = Cross16ByteMask(15, 0)
709  val maskHigh  = Cross16ByteMask(31, 16)
710
711  val dataLow   = Cross16ByteData(127, 0)
712  val dataHigh  = Cross16ByteData(255, 128)
713
714  val sbuffermisalignMask          = Mux(notCross16ByteReg, wmask, Mux(isFirstSplit, maskLow, maskHigh))
715  val sbuffermisalignData          = Mux(notCross16ByteReg, notCross16ByteData, Mux(isFirstSplit, dataLow, dataHigh))
716  val sbuffermisalignPaddr         = Mux(notCross16ByteReg, notCross16BytePaddr, Mux(isFirstSplit, paddrLow, paddrHigh))
717  val sbuffermisalignVaddr         = Mux(notCross16ByteReg, notCross16ByteVaddr, Mux(isFirstSplit, vaddrLow, vaddrHigh))
718
719  val sbufferMask                  = Mux(isMisalignReg, sbuffermisalignMask, wmask)
720  val sbufferData                  = Mux(isMisalignReg, sbuffermisalignData, flowData)
721  val sbufferVaddr                 = Mux(isMisalignReg, sbuffermisalignVaddr, latchVaddr)
722  val sbufferPaddr                 = Mux(isMisalignReg, sbuffermisalignPaddr, instMicroOp.paddr)
723
724  dontTouch(wmask)
725  dontTouch(Cross16ByteMask)
726  sbufferOut.bits                  := DontCare
727  sbufferOut.valid                 := state === s_send_data && segmentActive
728  sbufferOut.bits.vecValid         := state === s_send_data && segmentActive
729  sbufferOut.bits.mask             := sbufferMask
730  sbufferOut.bits.data             := sbufferData
731  sbufferOut.bits.vaddr            := sbufferVaddr
732  sbufferOut.bits.cmd              := MemoryOpConstants.M_XWR
733  sbufferOut.bits.id               := DontCare
734  sbufferOut.bits.addr             := sbufferPaddr
735
736  NewPipelineConnect(
737    sbufferOut, io.sbuffer, io.sbuffer.fire,
738    false.B,
739    Option(s"VSegmentUnitPipelineConnect")
740  )
741
742  io.vecDifftestInfo.valid         := io.sbuffer.valid
743  io.vecDifftestInfo.bits          := uopq(deqPtr.value).uop
744
745  /**
746   * update ptr
747   * */
748  private val fieldActiveWirteFinish = sbufferOut.fire && segmentActive // writedata finish and is a active segment
749  XSError(sbufferOut.fire && !segmentActive, "Attempt write inactive segment to sbuffer, something wrong!\n")
750
751  private val segmentInactiveFinish = ((state === s_latch_and_merge_data) || (state === s_send_data && stateNext =/= s_send_data)) && !segmentActive
752
753  val splitPtrOffset = Mux(
754    isIndexed(instType),
755    Mux(lmul.asSInt < 0.S, 1.U, (1.U << lmul).asUInt),
756    Mux(emul.asSInt < 0.S, 1.U, (1.U << emul).asUInt)
757  )
758  splitPtrNext :=
759    Mux(fieldIdx === maxNfields || !segmentActive, // if segment is active, need to complete this segment, otherwise jump to next segment
760      // segment finish, By shifting 'issueUopFlowNumLog2' to the right to ensure that emul != 1 can correctly generate lateral offset.
761     (deqPtr + ((segmentIdx +& 1.U) >> issueUopFlowNumLog2).asUInt),
762      // next field.
763     (splitPtr + splitPtrOffset)
764    )
765
766  if (backendParams.debugEn){
767    dontTouch(issueUopFlowNumLog2)
768    dontTouch(issueEmul)
769    dontTouch(splitPtrNext)
770    dontTouch(stridePtr)
771    dontTouch(segmentActive)
772  }
773
774  // update splitPtr
775  when(state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && (fieldActiveWirteFinish || !segmentActive))){
776    splitPtr := splitPtrNext
777  }.elsewhen(io.in.fire && !instMicroOpValid){
778    splitPtr := deqPtr // initial splitPtr
779  }
780
781  // update stridePtr, only use in index
782  val strideOffset = Mux(isIndexed(issueInstType), segmentIdx >> issueMaxIdxInIndexLog2, 0.U)
783  stridePtr       := deqPtr + strideOffset
784
785  // update fieldIdx
786  when(io.in.fire && !instMicroOpValid){ // init
787    fieldIdx := 0.U
788  }.elsewhen(state === s_latch_and_merge_data && segmentActive ||
789            (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish)){ // only if segment is active
790
791    /* next segment, only if segment complete */
792    fieldIdx := Mux(fieldIdx === maxNfields, 0.U, fieldIdx + 1.U)
793  }.elsewhen(segmentInactiveFinish){ // segment is inactive, go to next segment
794    fieldIdx := 0.U
795  }
796  //update segmentIdx
797  when(io.in.fire && !instMicroOpValid){
798    segmentIdx := 0.U
799  }.elsewhen(fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish)) &&
800             segmentIdx =/= maxSegIdx){ // next segment, only if segment is active
801
802    segmentIdx := segmentIdx + 1.U
803  }.elsewhen(segmentInactiveFinish && segmentIdx =/= maxSegIdx){ // if segment is inactive, go to next segment
804    segmentIdx := segmentIdx + 1.U
805  }
806
807  //update segmentOffset
808  /* when segment is active or segment is inactive, increase segmentOffset */
809  when((fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish))) ||
810       segmentInactiveFinish){
811
812    segmentOffset := segmentOffset + Mux(isUnitStride(issueInstType), (maxNfields +& 1.U) << issueEew(1, 0), stride(stridePtr.value))
813  }
814
815  //update deqPtr
816  when((state === s_finish) && !isEmpty(enqPtr, deqPtr)){
817    deqPtr := deqPtr + 1.U
818  }
819
820
821  /*************************************************************************
822   *                            fof logic
823   *************************************************************************/
824
825  //Enq
826  when(isEnqFixVlUop && !fofBufferValid) { fofBuffer := io.in.bits.uop }
827  when(isEnqFixVlUop && !fofBufferValid) { fofBufferValid := true.B }
828
829  //Deq
830  val fofFixVlValid                    = state === s_fof_fix_vl && fofBufferValid
831
832  when(fofFixVlValid) { fofBuffer      := 0.U.asTypeOf(new DynInst) }
833  when(fofFixVlValid) { fofBufferValid := false.B }
834
835
836  /*************************************************************************
837   *                            dequeue logic
838   *************************************************************************/
839  val vdIdxInField = GenUopIdxInField(Mux(isIndexed(instType), issueLmul, issueEmul), uopq(deqPtr.value).uop.vpu.vuopIdx)
840  /*select mask of vd, maybe remove in feature*/
841  val realEw        = Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0))
842  val maskDataVec: Vec[UInt] = VecDataToMaskDataVec(instMicroOp.mask, realEw)
843  val maskUsed      = maskDataVec(vdIdxInField)
844
845  when(stateNext === s_idle){
846    instMicroOpValid := false.B
847  }
848  // writeback to backend
849  val writebackOut                     = WireInit(io.uopwriteback.bits)
850  val writebackValid                   = (state === s_finish) && !isEmpty(enqPtr, deqPtr) || fofFixVlValid
851
852  when(fofFixVlValid) {
853    writebackOut.uop                    := fofBuffer
854    writebackOut.uop.vpu.vl             := instMicroOp.exceptionVl.bits
855    writebackOut.data                   := instMicroOp.exceptionVl.bits
856    writebackOut.mask.get               := Fill(VLEN, 1.U)
857    writebackOut.uop.vpu.vmask          := Fill(VLEN, 1.U)
858  }.otherwise{
859    writebackOut.uop                    := uopq(deqPtr.value).uop
860    writebackOut.uop.vpu                := instMicroOp.uop.vpu
861    writebackOut.uop.trigger            := instMicroOp.uop.trigger
862    writebackOut.uop.exceptionVec       := instMicroOp.uop.exceptionVec
863    writebackOut.mask.get               := instMicroOp.mask
864    writebackOut.data                   := data(deqPtr.value)
865    writebackOut.vdIdx.get              := vdIdxInField
866    writebackOut.uop.vpu.vl             := Mux(instMicroOp.exceptionVl.valid, instMicroOp.exceptionVl.bits, instMicroOp.vl)
867    writebackOut.uop.vpu.vstart         := Mux(instMicroOp.uop.exceptionVec.asUInt.orR || TriggerAction.isDmode(instMicroOp.uop.trigger), instMicroOp.exceptionVstart, instMicroOp.vstart)
868    writebackOut.uop.vpu.vmask          := maskUsed
869    writebackOut.uop.vpu.vuopIdx        := uopq(deqPtr.value).uop.vpu.vuopIdx
870    // when exception updates vl, should use vtu strategy.
871    writebackOut.uop.vpu.vta            := Mux(instMicroOp.exceptionVl.valid, VType.tu, instMicroOp.uop.vpu.vta)
872    writebackOut.debug                  := DontCare
873    writebackOut.vdIdxInField.get       := vdIdxInField
874    writebackOut.uop.robIdx             := instMicroOp.uop.robIdx
875    writebackOut.uop.fuOpType           := instMicroOp.uop.fuOpType
876  }
877
878  io.uopwriteback.valid               := RegNext(writebackValid)
879  io.uopwriteback.bits                := RegEnable(writebackOut, writebackValid)
880
881  dontTouch(writebackValid)
882
883  //to RS
884  val feedbackOut                      = WireInit(0.U.asTypeOf(io.feedback.bits))
885  val feedbackValid                    = state === s_finish && !isEmpty(enqPtr, deqPtr)
886  feedbackOut.hit                     := true.B
887  feedbackOut.robIdx                  := instMicroOp.uop.robIdx
888  feedbackOut.sourceType              := DontCare
889  feedbackOut.flushState              := DontCare
890  feedbackOut.dataInvalidSqIdx        := DontCare
891  feedbackOut.sqIdx                   := uopq(deqPtr.value).uop.sqIdx
892  feedbackOut.lqIdx                   := uopq(deqPtr.value).uop.lqIdx
893
894  io.feedback.valid                   := RegNext(feedbackValid)
895  io.feedback.bits                    := RegEnable(feedbackOut, feedbackValid)
896
897  dontTouch(feedbackValid)
898
899  // exception
900  io.exceptionInfo                    := DontCare
901  io.exceptionInfo.bits.robidx        := instMicroOp.uop.robIdx
902  io.exceptionInfo.bits.uopidx        := uopq(deqPtr.value).uop.vpu.vuopIdx
903  io.exceptionInfo.bits.vstart        := instMicroOp.exceptionVstart
904  io.exceptionInfo.bits.vaddr         := instMicroOp.exceptionVaddr
905  io.exceptionInfo.bits.gpaddr        := instMicroOp.exceptionGpaddr
906  io.exceptionInfo.bits.isForVSnonLeafPTE := instMicroOp.exceptionIsForVSnonLeafPTE
907  io.exceptionInfo.bits.vl            := instMicroOp.exceptionVl.bits
908  io.exceptionInfo.valid              := (state === s_finish) && instMicroOp.uop.exceptionVec.asUInt.orR && !isEmpty(enqPtr, deqPtr)
909}
910
911