xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala (revision fa5e530d3c795f57d3c220da20132424a0f614b4)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.{FuType, PMPRespBundle}
29import freechips.rocketchip.diplomacy.BufferParams
30import xiangshan.cache.mmu._
31import xiangshan.cache._
32import xiangshan.cache.wpu.ReplayCarry
33import xiangshan.backend.fu.util.SdtrigExt
34import xiangshan.ExceptionNO._
35import xiangshan.backend.fu.vector.Bundles.{VConfig, VType}
36import xiangshan.backend.datapath.NewPipelineConnect
37import xiangshan.backend.fu.NewCSR._
38import xiangshan.backend.fu.vector.Utils.VecDataToMaskDataVec
39
40class VSegmentBundle(implicit p: Parameters) extends VLSUBundle
41{
42  val baseVaddr        = UInt(XLEN.W)
43  val uop              = new DynInst
44  val paddr            = UInt(PAddrBits.W)
45  val mask             = UInt(VLEN.W)
46  val alignedType      = UInt(alignTypeBits.W)
47  val vl               = UInt(elemIdxBits.W)
48  val uopFlowNum       = UInt(elemIdxBits.W)
49  val uopFlowNumMask   = UInt(elemIdxBits.W)
50  val isVSegLoad       = Bool()
51  val isVSegStore      = Bool()
52  // for exception
53  val vstart           = UInt(elemIdxBits.W)
54  val exceptionVaddr   = UInt(XLEN.W)
55  val exceptionGpaddr  = UInt(XLEN.W)
56  val exceptionIsForVSnonLeafPTE = Bool()
57  val exception_va     = Bool()
58  val exception_gpa    = Bool()
59  val exception_pa     = Bool()
60  val exceptionVstart  = UInt(elemIdxBits.W)
61  // valid: have fof exception but can not trigger, need update all writebacked uop.vl with exceptionVl
62  val exceptionVl      = ValidIO(UInt(elemIdxBits.W))
63  val isFof            = Bool()
64}
65
66// latch each uop's VecWen, pdest, v0Wen, uopIdx
67class VSegmentUop(implicit p: Parameters) extends VLSUBundle{
68  val uop              = new DynInst
69}
70
71class VSegmentUnit (implicit p: Parameters) extends VLSUModule
72  with HasDCacheParameters
73  with MemoryOpConstants
74  with SdtrigExt
75  with HasLoadHelper
76{
77  val io               = IO(new VSegmentUnitIO)
78
79  val maxSize          = VSegmentBufferSize
80
81  class VSegUPtr(implicit p: Parameters) extends CircularQueuePtr[VSegUPtr](maxSize){
82  }
83
84  object VSegUPtr {
85    def apply(f: Bool, v: UInt)(implicit p: Parameters): VSegUPtr = {
86      val ptr           = Wire(new VSegUPtr)
87      ptr.flag         := f
88      ptr.value        := v
89      ptr
90    }
91  }
92
93  val maxSplitNum = 2
94
95  /**
96  ********************************************************************************************************
97  *  Use an example to illustrate the working logic of a segmentunit:                                    *
98  *    For:                                                                                              *
99  *      lmul=2 sew=32 emul=2 eew=32  vl=16                                                              *
100  *    Then:                                                                                             *
101  *      Access memory in the order:                                                                     *
102  *        (V2,S0),(V4,S0),(V6,S0),(V8,S0),                                                              *
103  *        (V2,S1),(V4,S1),(V6,S1),(V8,S1),                                                              *
104  *        (V2,S2),(V4,S2),(V6,S2),(V8,S2),                                                              *
105  *        (V2,S3),(V4,S3),(V6,S3),(V8,S3),                                                              *
106  *        (V3,S4),(V5,S4),(V7,S4),(V9,S4),                                                              *
107  *        (V3,S5),(V5,S5),(V7,S5),(V9,S5),                                                              *
108  *        (V3,S6),(V5,S6),(V7,S6),(V9,S6),                                                              *
109  *        (V3,S7),(V5,S7),(V7,S7),(V9,S7),                                                              *
110  *                                                                                                      *
111  *                                                                                                      *
112  *    [[data]] saves the data generated by the access and corresponds to the register.                  *
113  *    [[splitPtr]] controls the destination register written to.                                        *
114  *                                                                                                      *
115  *    splitptr offset can be seen in [[splitPtrNext]] is assignment logic,                              *
116  *    which is mainly calculated in terms of [[fieldIdx]] and [[segmentIdx]]                            *
117  *    First access different fields of the same segment, and then visit different segments.             *
118  *    For the case of 'emul' greater than 1, such as the following example,                             *
119  *    although 'v2' and 'v3' are different vd and the same field, they are still different segments,    *
120  *    so they should be accessed sequentially.Just like the 'Access memory in the order' above.         *
121  *                                                                                                      *
122  *                         [[segmentIdx]]                                                               *
123  *                               |                                                                      *
124  *                               |                                                                      *
125  *                               V                                                                      *
126  *                                                                                                      *
127  *                               S0               S1                S2                 S3               *
128  *                      ----------------------------------------------------------------------------    *
129  *  [[splitPtr]]--> v2  |     field0     |      field0     |      field0     |      field0         |    *
130  *                      ----------------------------------------------------------------------------    *
131  *                               S4               S5                S6                 S7               *
132  *                      ----------------------------------------------------------------------------    *
133  *                  v3  |     field0     |      field0     |      field0     |      field0         |    *
134  *                      ----------------------------------------------------------------------------    *
135  *                               S0               S1                S2                 S3               *
136  *                      ----------------------------------------------------------------------------    *
137  *                  v4  |     field1     |      field1     |      field1     |      field1         |    *
138  *                      ----------------------------------------------------------------------------    *
139  *                               S4               S5                S6                 S7               *
140  *                      ----------------------------------------------------------------------------    *
141  *                  v5  |     field1     |      field1     |      field1     |      field1         |    *
142  *                      ----------------------------------------------------------------------------    *
143  *                               S0               S1                S2                 S3               *
144  *                      ----------------------------------------------------------------------------    *
145  *                  v6  |     field2     |      field2     |      field2     |      field2         |    *
146  *                      ----------------------------------------------------------------------------    *
147  *                               S4               S5                S6                 S7               *
148  *                      ----------------------------------------------------------------------------    *
149  *                  v7  |     field2     |      field2     |      field2     |      field2         |    *
150  *                      ----------------------------------------------------------------------------    *
151  *                               S0               S1                S2                 S3               *
152  *                      ----------------------------------------------------------------------------    *
153  *                  v8  |     field3     |      field3     |      field3     |      field3         |    *
154  *                      ----------------------------------------------------------------------------    *
155  *                               S4               S5                S6                 S7               *
156  *                      ----------------------------------------------------------------------------    *
157  *                  v9  |     field3     |      field3     |      field3     |      field3         |    *
158  *                      ----------------------------------------------------------------------------    *                                                                                    *
159  *                                                                                                      *                                                                                    *
160  *                                                                                                      *                                                                                    *
161  ********************************************************************************************************
162  **/
163
164
165  // buffer uop
166  val instMicroOp       = Reg(new VSegmentBundle)
167  val instMicroOpValid  = RegInit(false.B)
168  val data              = Reg(Vec(maxSize, UInt(VLEN.W)))
169  val uopq              = Reg(Vec(maxSize, new VSegmentUop))
170  val stride            = Reg(Vec(maxSize, UInt(VLEN.W)))
171  val allocated         = RegInit(VecInit(Seq.fill(maxSize)(false.B)))
172  val enqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
173  val deqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
174  val stridePtr         = WireInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index
175
176  val segmentIdx        = RegInit(0.U(elemIdxBits.W))
177  val fieldIdx          = RegInit(0.U(fieldBits.W))
178  val segmentOffset     = RegInit(0.U(XLEN.W))
179  val splitPtr          = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select load/store data
180  val splitPtrNext      = WireInit(0.U.asTypeOf(new VSegUPtr))
181
182  val exception_va      = WireInit(false.B)
183  val exception_gpa     = WireInit(false.B)
184  val exception_pa      = WireInit(false.B)
185
186  val maxSegIdx         = instMicroOp.vl - 1.U
187  val maxNfields        = instMicroOp.uop.vpu.nf
188  val latchVaddr        = RegInit(0.U(VAddrBits.W))
189  val latchVaddrDup     = RegInit(0.U(VAddrBits.W))
190
191  XSError((segmentIdx > maxSegIdx) && instMicroOpValid, s"segmentIdx > vl, something error!\n")
192  XSError((fieldIdx > maxNfields) &&  instMicroOpValid, s"fieldIdx > nfields, something error!\n")
193
194  // MicroOp
195  val baseVaddr                       = instMicroOp.baseVaddr
196  val alignedType                     = instMicroOp.alignedType
197  val fuType                          = instMicroOp.uop.fuType
198  val isVSegLoad                      = instMicroOp.isVSegLoad
199  val isVSegStore                     = instMicroOp.isVSegStore
200  val mask                            = instMicroOp.mask
201  val exceptionVec                    = instMicroOp.uop.exceptionVec
202  val issueEew                        = instMicroOp.uop.vpu.veew
203  val issueLmul                       = instMicroOp.uop.vpu.vtype.vlmul
204  val issueSew                        = instMicroOp.uop.vpu.vtype.vsew
205  val issueEmul                       = EewLog2(issueEew) - issueSew + issueLmul
206  val elemIdxInVd                     = segmentIdx & instMicroOp.uopFlowNumMask
207  val issueInstType                   = Cat(true.B, instMicroOp.uop.fuOpType(6, 5)) // always segment instruction
208  val issueUopFlowNumLog2             = GenRealFlowLog2(issueInstType, issueEmul, issueLmul, issueEew, issueSew, true) // max element number log2 in vd
209  val issueVlMax                      = instMicroOp.uopFlowNum // max elementIdx in vd
210  val issueMaxIdxInIndex              = GenVLMAX(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0)) // index element index in index register
211  val issueMaxIdxInIndexMask          = GenVlMaxMask(issueMaxIdxInIndex, elemIdxBits)
212  val issueMaxIdxInIndexLog2          = GenVLMAXLog2(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0))
213  val issueIndexIdx                   = segmentIdx & issueMaxIdxInIndexMask
214  val segmentActive                   = (mask & UIntToOH(segmentIdx)).orR
215
216  // sbuffer write interface
217  val sbufferOut                      = Wire(Decoupled(new DCacheWordReqWithVaddrAndPfFlag))
218
219
220  // segment fof instrction buffer
221  val fofBuffer                       = RegInit(0.U.asTypeOf(new DynInst))
222  val fofBufferValid                  = RegInit(false.B)
223
224
225  // Segment instruction's FSM
226  /*
227  * s_idle: wait request
228  * s_flush_sbuffer_req: flush sbuffer
229  * s_wait_flush_sbuffer_resp: wait sbuffer empty
230  * s_tlb_req: request tlb
231  * s_wait_tlb_resp: wait tlb resp
232  * s_pm: check pmp
233  * s_cache_req: request cache
234  * s_cache_resp: wait cache resp
235  * s_misalign_merge_data: merge unaligned data
236  * s_latch_and_merge_data: for read data
237  * s_send_data: for send write data
238  * s_wait_to_sbuffer: Wait for data from the sbufferOut pipelayer to be sent to the sbuffer
239  * s_finish: normal uop is complete
240  * s_fof_fix_vl: Writeback the uop of the fof instruction to modify vl.
241  * */
242  val s_idle :: s_flush_sbuffer_req :: s_wait_flush_sbuffer_resp :: s_tlb_req :: s_wait_tlb_resp :: s_pm ::s_cache_req :: s_cache_resp :: s_misalign_merge_data :: s_latch_and_merge_data :: s_send_data :: s_wait_to_sbuffer :: s_finish :: s_fof_fix_vl :: Nil = Enum(14)
243  val state             = RegInit(s_idle)
244  val stateNext         = WireInit(s_idle)
245  val sbufferEmpty      = io.flush_sbuffer.empty
246  val isEnqfof          = io.in.bits.uop.fuOpType === VlduType.vleff && io.in.valid
247  val isEnqFixVlUop     = isEnqfof && io.in.bits.uop.vpu.lastUop
248
249  // handle misalign sign
250  val curPtr             = RegInit(false.B)
251  val canHandleMisalign  = WireInit(false.B)
252  val isMisalignReg      = RegInit(false.B)
253  val isMisalignWire     = WireInit(false.B)
254  val notCross16ByteReg  = RegInit(false.B)
255  val notCross16ByteWire = WireInit(false.B)
256  val combinedData       = RegInit(0.U(XLEN.W))
257
258  val lowPagePaddr       = RegInit(0.U(PAddrBits.W))
259  val lowPageGPaddr      = RegInit(0.U(GPAddrBits.W))
260
261  val highPagePaddr      = RegInit(0.U(PAddrBits.W))
262  val highPageGPaddr     = RegInit(0.U(GPAddrBits.W))
263
264  val isFirstSplit       = !curPtr
265  val isSecondSplit      = curPtr
266  /**
267   * state update
268   */
269  state  := stateNext
270
271  /**
272   * state transfer
273   */
274  when(state === s_idle){
275    stateNext := Mux(isAfter(enqPtr, deqPtr), s_flush_sbuffer_req, s_idle)
276  }.elsewhen(state === s_flush_sbuffer_req){
277    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) // if sbuffer is empty, go to query tlb
278
279  }.elsewhen(state === s_wait_flush_sbuffer_resp){
280    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp)
281
282  }.elsewhen(state === s_tlb_req){
283    stateNext := Mux(segmentActive, s_wait_tlb_resp, Mux(isVSegLoad, s_latch_and_merge_data, s_send_data))
284
285  }.elsewhen(state === s_wait_tlb_resp){
286    stateNext := Mux(io.dtlb.resp.fire,
287                      Mux(!io.dtlb.resp.bits.miss,
288                          s_pm,
289                          s_tlb_req),
290                      s_wait_tlb_resp)
291
292  }.elsewhen(state === s_pm){
293    when(exception_pa || exception_va || exception_gpa) {
294      stateNext := s_finish
295    } .otherwise {
296      when(canHandleMisalign && isMisalignWire && !notCross16ByteWire || (isMisalignReg && !notCross16ByteReg && isFirstSplit && isVSegStore)) {
297        stateNext := s_tlb_req
298      } .otherwise {
299        /* if is vStore, send data to sbuffer, so don't need query dcache */
300        stateNext := Mux(isVSegLoad, s_cache_req, s_send_data)
301      }
302    }
303
304  }.elsewhen(state === s_cache_req){
305    stateNext := Mux(io.rdcache.req.fire, s_cache_resp, s_cache_req)
306
307  }.elsewhen(state === s_cache_resp){
308    when(io.rdcache.resp.fire) {
309      when(io.rdcache.resp.bits.miss || io.rdcache.s2_bank_conflict) {
310        stateNext := s_cache_req
311      }.otherwise {
312
313        stateNext := Mux(isVSegLoad, Mux(isMisalignReg && !notCross16ByteReg, s_misalign_merge_data, s_latch_and_merge_data), s_send_data)
314      }
315    }.otherwise{
316      stateNext := s_cache_resp
317    }
318  }.elsewhen(state === s_misalign_merge_data) {
319    stateNext := Mux(!curPtr, s_tlb_req, s_latch_and_merge_data)
320  }.elsewhen(state === s_latch_and_merge_data) {
321    when((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields) ||
322      ((segmentIdx === maxSegIdx) && !segmentActive)) {
323
324      stateNext := s_finish // segment instruction finish
325    }.otherwise {
326      stateNext := s_tlb_req // need continue
327    }
328    /* if segment is inactive, don't need to wait access all of the field */
329  }.elsewhen(state === s_send_data) { // when sbuffer accept data
330    when(!sbufferOut.fire && segmentActive || (isMisalignReg && !notCross16ByteReg && isFirstSplit)) {
331      stateNext := s_send_data
332    }.elsewhen(segmentIdx === maxSegIdx && (fieldIdx === maxNfields && sbufferOut.fire || !segmentActive && io.sbuffer.valid && !io.sbuffer.ready)) {
333      stateNext := s_wait_to_sbuffer
334    }.elsewhen(segmentIdx === maxSegIdx && !segmentActive){
335      stateNext := s_finish // segment instruction finish
336    }.otherwise {
337      stateNext := s_tlb_req // need continue
338    }
339
340  }.elsewhen(state === s_wait_to_sbuffer){
341    stateNext := Mux(io.sbuffer.fire, s_finish, s_wait_to_sbuffer)
342
343  }.elsewhen(state === s_finish){ // writeback uop
344    stateNext := Mux(
345      distanceBetween(enqPtr, deqPtr) === 0.U,
346      Mux(fofBufferValid, s_fof_fix_vl, s_idle),
347      s_finish
348    )
349  }.elsewhen(state === s_fof_fix_vl){ // writeback uop
350    stateNext := Mux(!fofBufferValid, s_idle, s_fof_fix_vl)
351  }.otherwise{ // unknown state
352    stateNext := s_idle
353    assert(false.B)
354  }
355
356  /*************************************************************************
357   *                            enqueue logic
358   *************************************************************************/
359  io.in.ready                         := true.B
360  val fuOpType                         = io.in.bits.uop.fuOpType
361  val vtype                            = io.in.bits.uop.vpu.vtype
362  val mop                              = fuOpType(6, 5)
363  val instType                         = Cat(true.B, mop)
364  val eew                              = io.in.bits.uop.vpu.veew
365  val sew                              = vtype.vsew
366  val lmul                             = vtype.vlmul
367  val emul                             = EewLog2(eew) - sew + lmul
368  val vl                               = instMicroOp.vl
369  val vm                               = instMicroOp.uop.vpu.vm
370  val vstart                           = instMicroOp.uop.vpu.vstart
371  val srcMask                          = GenFlowMask(Mux(vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vstart, vl, true)
372  // first uop enqueue, we need to latch microOp of segment instruction
373  when(io.in.fire && !instMicroOpValid && !isEnqFixVlUop){
374    // element number in a vd
375    // TODO Rewrite it in a more elegant way.
376    val uopFlowNum                    = ZeroExt(GenRealFlowNum(instType, emul, lmul, eew, sew, true), elemIdxBits)
377    instMicroOp.baseVaddr             := io.in.bits.src_rs1
378    instMicroOpValid                  := true.B // if is first uop
379    instMicroOp.alignedType           := Mux(isIndexed(instType), sew(1, 0), eew)
380    instMicroOp.uop                   := io.in.bits.uop
381    instMicroOp.mask                  := srcMask
382    instMicroOp.vstart                := 0.U
383    instMicroOp.uopFlowNum            := uopFlowNum
384    instMicroOp.uopFlowNumMask        := GenVlMaxMask(uopFlowNum, elemIdxBits) // for merge data
385    instMicroOp.vl                    := io.in.bits.src_vl.asTypeOf(VConfig()).vl
386    instMicroOp.exceptionVl.valid     := false.B
387    instMicroOp.exceptionVl.bits      := io.in.bits.src_vl.asTypeOf(VConfig()).vl
388    segmentOffset                     := 0.U
389    instMicroOp.isFof                 := (fuOpType === VlduType.vleff) && FuType.isVSegLoad(io.in.bits.uop.fuType)
390    instMicroOp.isVSegLoad            := FuType.isVSegLoad(io.in.bits.uop.fuType)
391    instMicroOp.isVSegStore           := FuType.isVSegStore(io.in.bits.uop.fuType)
392    isMisalignReg                     := false.B
393    notCross16ByteReg                 := false.B
394  }
395  // latch data
396  when(io.in.fire && !isEnqFixVlUop){
397    data(enqPtr.value)                := io.in.bits.src_vs3
398    stride(enqPtr.value)              := io.in.bits.src_stride
399    uopq(enqPtr.value).uop            := io.in.bits.uop
400  }
401
402  // update enqptr, only 1 port
403  when(io.in.fire && !isEnqFixVlUop){
404    enqPtr                            := enqPtr + 1.U
405  }
406
407  /*************************************************************************
408   *                            output logic
409   *************************************************************************/
410
411  val indexStride                     = IndexAddr( // index for indexed instruction
412                                                    index = stride(stridePtr.value),
413                                                    flow_inner_idx = issueIndexIdx,
414                                                    eew = issueEew
415                                                  )
416  val realSegmentOffset               = Mux(isIndexed(issueInstType),
417                                            indexStride,
418                                            segmentOffset)
419  val vaddr                           = baseVaddr + (fieldIdx << alignedType).asUInt + realSegmentOffset
420
421  val misalignLowVaddr                = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W))
422  val misalignLowVaddrDup             = Cat(latchVaddrDup(latchVaddrDup.getWidth - 1, 3), 0.U(3.W))
423  val misalignHighVaddr               = Cat(latchVaddr(latchVaddr.getWidth - 1, 3) + 1.U, 0.U(3.W))
424  val misalignHighVaddrDup            = Cat(latchVaddrDup(latchVaddrDup.getWidth - 1, 3) + 1.U, 0.U(3.W))
425  val notCross16ByteVaddr             = Cat(latchVaddr(latchVaddr.getWidth - 1, 4), 0.U(4.W))
426  val notCross16ByteVaddrDup          = Cat(latchVaddrDup(latchVaddrDup.getWidth - 1, 4), 0.U(4.W))
427 //  val misalignVaddr                   = Mux(notCross16ByteReg, notCross16ByteVaddr, Mux(isFirstSplit, misalignLowVaddr, misalignHighVaddr))
428  val misalignVaddr                   = Mux(isFirstSplit, misalignLowVaddr, misalignHighVaddr)
429  val misalignVaddrDup                = Mux(isFirstSplit, misalignLowVaddrDup, misalignHighVaddrDup)
430  val tlbReqVaddr                     = Mux(isMisalignReg, misalignVaddr, vaddr)
431  //latch vaddr
432  when(state === s_tlb_req && !isMisalignReg){
433    latchVaddr := vaddr(VAddrBits - 1, 0)
434    latchVaddrDup := vaddr(VAddrBits - 1, 0)
435  }
436  /**
437   * tlb req and tlb resq
438   */
439
440  // query DTLB IO Assign
441  io.dtlb.req                         := DontCare
442  io.dtlb.resp.ready                  := true.B
443  io.dtlb.req.valid                   := state === s_tlb_req && segmentActive
444  io.dtlb.req.bits.cmd                := Mux(isVSegLoad, TlbCmd.read, TlbCmd.write)
445  io.dtlb.req.bits.vaddr              := tlbReqVaddr(VAddrBits - 1, 0)
446  io.dtlb.req.bits.fullva             := tlbReqVaddr
447  io.dtlb.req.bits.checkfullva        := true.B
448  io.dtlb.req.bits.size               := instMicroOp.alignedType(2,0)
449  io.dtlb.req.bits.memidx.is_ld       := isVSegLoad
450  io.dtlb.req.bits.memidx.is_st       := isVSegStore
451  io.dtlb.req.bits.debug.robIdx       := instMicroOp.uop.robIdx
452  io.dtlb.req.bits.no_translate       := false.B
453  io.dtlb.req.bits.debug.pc           := instMicroOp.uop.pc
454  io.dtlb.req.bits.debug.isFirstIssue := DontCare
455  io.dtlb.req_kill                    := false.B
456
457  val canTriggerException              = segmentIdx === 0.U || !instMicroOp.isFof // only elementIdx = 0 or is not fof can trigger
458
459  val segmentTrigger = Module(new VSegmentTrigger)
460  segmentTrigger.io.fromCsrTrigger.tdataVec             := io.fromCsrTrigger.tdataVec
461  segmentTrigger.io.fromCsrTrigger.tEnableVec           := io.fromCsrTrigger.tEnableVec
462  segmentTrigger.io.fromCsrTrigger.triggerCanRaiseBpExp := io.fromCsrTrigger.triggerCanRaiseBpExp
463  segmentTrigger.io.fromCsrTrigger.debugMode            := io.fromCsrTrigger.debugMode
464  segmentTrigger.io.memType                             := isVSegLoad
465  segmentTrigger.io.fromLoadStore.vaddr                 := Mux(isMisalignReg, misalignVaddr, latchVaddr)
466  segmentTrigger.io.fromLoadStore.isVectorUnitStride    := false.B
467  segmentTrigger.io.fromLoadStore.mask                  := 0.U
468
469  val triggerAction = segmentTrigger.io.toLoadStore.triggerAction
470  val triggerDebugMode = TriggerAction.isDmode(triggerAction)
471  val triggerBreakpoint = TriggerAction.isExp(triggerAction)
472
473  // tlb resp
474  when(io.dtlb.resp.fire && state === s_wait_tlb_resp){
475      exceptionVec(storePageFault)      := io.dtlb.resp.bits.excp(0).pf.st
476      exceptionVec(loadPageFault)       := io.dtlb.resp.bits.excp(0).pf.ld
477      exceptionVec(storeGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.st
478      exceptionVec(loadGuestPageFault)  := io.dtlb.resp.bits.excp(0).gpf.ld
479      exceptionVec(storeAccessFault)    := io.dtlb.resp.bits.excp(0).af.st
480      exceptionVec(loadAccessFault)     := io.dtlb.resp.bits.excp(0).af.ld
481      when(!io.dtlb.resp.bits.miss){
482        instMicroOp.paddr             := io.dtlb.resp.bits.paddr(0)
483        instMicroOp.exceptionVaddr    := io.dtlb.resp.bits.fullva
484        instMicroOp.exceptionGpaddr   := io.dtlb.resp.bits.gpaddr(0)
485        instMicroOp.exceptionIsForVSnonLeafPTE  := io.dtlb.resp.bits.isForVSnonLeafPTE
486        lowPagePaddr  := Mux(isMisalignReg && !notCross16ByteReg && !curPtr, io.dtlb.resp.bits.paddr(0), lowPagePaddr)
487        lowPageGPaddr := Mux(isMisalignReg && !notCross16ByteReg && !curPtr, io.dtlb.resp.bits.gpaddr(0), lowPageGPaddr)
488
489        highPagePaddr  := Mux(isMisalignReg && !notCross16ByteReg && curPtr, io.dtlb.resp.bits.paddr(0), highPagePaddr)
490        highPageGPaddr := Mux(isMisalignReg && !notCross16ByteReg && curPtr, io.dtlb.resp.bits.gpaddr(0), highPageGPaddr)
491      }
492  }
493  // pmp
494  // NOTE: only handle load/store exception here, if other exception happens, don't send here
495  val exceptionWithPf = exceptionVec(storePageFault) || exceptionVec(loadPageFault) || exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
496  val pmp = (io.pmpResp.asUInt & Fill(io.pmpResp.asUInt.getWidth, !exceptionWithPf)).asTypeOf(new PMPRespBundle())
497  when(state === s_pm) {
498    val highAddress = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
499      "b00".U -> 0.U,
500      "b01".U -> 1.U,
501      "b10".U -> 3.U,
502      "b11".U -> 7.U
503    )) + tlbReqVaddr(4, 0)
504
505    val addr_aligned = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
506      "b00".U   -> true.B,                   //b
507      "b01".U   -> (tlbReqVaddr(0)    === 0.U), //h
508      "b10".U   -> (tlbReqVaddr(1, 0) === 0.U), //w
509      "b11".U   -> (tlbReqVaddr(2, 0) === 0.U)  //d
510    ))
511
512    notCross16ByteWire   := highAddress(4) === tlbReqVaddr(4)
513    isMisalignWire       := !addr_aligned
514    canHandleMisalign := !pmp.mmio && !triggerBreakpoint && !triggerDebugMode
515    exceptionVec(loadAddrMisaligned)  := isMisalignWire && isVSegLoad  && canTriggerException && !canHandleMisalign
516    exceptionVec(storeAddrMisaligned) := isMisalignWire && isVSegStore && canTriggerException && !canHandleMisalign
517
518    exception_va  := exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
519                     exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault) ||
520                     triggerBreakpoint || triggerDebugMode || (isMisalignWire && !canHandleMisalign)
521    exception_gpa := exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
522    exception_pa  := pmp.st || pmp.ld || pmp.mmio
523
524    instMicroOp.exception_pa  := exception_pa
525    instMicroOp.exception_va  := exception_va
526    instMicroOp.exception_gpa := exception_gpa
527    // update storeAccessFault bit. Currently, we don't support vector MMIO
528    exceptionVec(loadAccessFault)  := (exceptionVec(loadAccessFault) || pmp.ld || pmp.mmio)   && isVSegLoad  && canTriggerException
529    exceptionVec(storeAccessFault) := (exceptionVec(storeAccessFault) || pmp.st || pmp.mmio)  && isVSegStore && canTriggerException
530    exceptionVec(breakPoint)       := triggerBreakpoint && canTriggerException
531
532    exceptionVec(storePageFault)      := exceptionVec(storePageFault)      && isVSegStore && canTriggerException
533    exceptionVec(loadPageFault)       := exceptionVec(loadPageFault)       && isVSegLoad  && canTriggerException
534    exceptionVec(storeGuestPageFault) := exceptionVec(storeGuestPageFault) && isVSegStore && canTriggerException
535    exceptionVec(loadGuestPageFault)  := exceptionVec(loadGuestPageFault)  && isVSegLoad  && canTriggerException
536
537    when(exception_va || exception_gpa || exception_pa) {
538      when(canTriggerException) {
539        instMicroOp.exceptionVstart := segmentIdx // for exception
540      }.otherwise {
541        instMicroOp.exceptionVl.valid := true.B
542        instMicroOp.exceptionVl.bits := segmentIdx
543      }
544    }
545
546    when(exceptionVec(breakPoint) || triggerDebugMode) {
547      instMicroOp.uop.trigger := triggerAction
548    }
549
550    when(isMisalignWire && canHandleMisalign && !(exception_va || exception_gpa || exception_pa)) {
551      notCross16ByteReg := notCross16ByteWire
552      isMisalignReg       := true.B
553      curPtr              := false.B
554    }
555  }
556
557  /**
558   * flush sbuffer IO Assign
559   */
560  io.flush_sbuffer.valid           := !sbufferEmpty && (state === s_flush_sbuffer_req)
561
562  /**
563  * update curPtr
564  * */
565  when(state === s_finish || state === s_latch_and_merge_data || state === s_send_data && stateNext =/= s_send_data) {
566    isMisalignReg     := false.B
567    notCross16ByteReg := false.B
568    curPtr := false.B
569  } .otherwise {
570    when(isVSegLoad) {
571      when(isMisalignReg && !notCross16ByteReg && state === s_misalign_merge_data) {
572        curPtr := true.B
573      }
574    } .otherwise {
575      when(isMisalignReg && !notCross16ByteReg && state === s_pm) {
576        curPtr := !curPtr
577      } .elsewhen(isMisalignReg && !notCross16ByteReg && state === s_pm && stateNext === s_send_data) {
578        curPtr := false.B
579      } .elsewhen(isMisalignReg && !notCross16ByteReg && state === s_send_data && stateNext === s_send_data && sbufferOut.fire) {
580        curPtr := !curPtr
581      }
582    }
583  }
584
585
586
587  /**
588   * merge data for load
589   */
590  val cacheData = LookupTree(latchVaddr(3,0), List(
591    "b0000".U -> io.rdcache.resp.bits.data_delayed(63,    0),
592    "b0001".U -> io.rdcache.resp.bits.data_delayed(63,    8),
593    "b0010".U -> io.rdcache.resp.bits.data_delayed(63,   16),
594    "b0011".U -> io.rdcache.resp.bits.data_delayed(63,   24),
595    "b0100".U -> io.rdcache.resp.bits.data_delayed(63,   32),
596    "b0101".U -> io.rdcache.resp.bits.data_delayed(63,   40),
597    "b0110".U -> io.rdcache.resp.bits.data_delayed(63,   48),
598    "b0111".U -> io.rdcache.resp.bits.data_delayed(63,   56),
599    "b1000".U -> io.rdcache.resp.bits.data_delayed(127,  64),
600    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
601    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
602    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
603    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
604    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
605    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
606    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
607  ))
608
609  val misalignLowData  = LookupTree(latchVaddr(3,0), List(
610    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
611    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
612    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
613    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
614    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
615    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
616    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
617  ))
618
619  val misalignCombinedData = LookupTree(latchVaddr(3,0), List(
620    "b1001".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(55,    0))(63, 0),
621    "b1010".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(47,    0))(63, 0),
622    "b1011".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(39,    0))(63, 0),
623    "b1100".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(31,    0))(63, 0),
624    "b1101".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(23,    0))(63, 0),
625    "b1110".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(15,    0))(63, 0),
626    "b1111".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(7,     0))(63, 0)
627  ))
628  when(state === s_misalign_merge_data && segmentActive){
629    when(!curPtr) {
630      combinedData := misalignLowData
631    } .otherwise {
632      combinedData := misalignCombinedData
633    }
634  }
635
636  val shiftData    = (io.rdcache.resp.bits.data_delayed >> (latchVaddr(3, 0) << 3)).asUInt(63, 0)
637  val mergemisalignData = Mux(notCross16ByteReg, shiftData, combinedData)
638  val pickData  = rdataVecHelper(alignedType(1,0), Mux(isMisalignReg, mergemisalignData, cacheData))
639  val mergedData = mergeDataWithElemIdx(
640    oldData = data(splitPtr.value),
641    newData = Seq(pickData),
642    alignedType = alignedType(1,0),
643    elemIdx = Seq(elemIdxInVd),
644    valids = Seq(true.B)
645  )
646  when(state === s_latch_and_merge_data && segmentActive){
647    data(splitPtr.value) := mergedData
648  }
649
650
651  /**
652   * split data for store
653   * */
654  val splitData = genVSData(
655    data = data(splitPtr.value),
656    elemIdx = elemIdxInVd,
657    alignedType = alignedType
658  )
659  val flowData  = genVWdata(splitData, alignedType) // TODO: connect vstd, pass vector data
660  val wmask     = genVWmask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
661  val bmask     = genBasemask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
662  val dcacheReqVaddr = Mux(isMisalignReg, misalignVaddr, latchVaddr)
663  val dcacheReqVaddrDup = Mux(isMisalignReg, misalignVaddrDup, latchVaddrDup)
664  val dcacheReqPaddr = Mux(isMisalignReg, Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, PageOffsetWidth), misalignVaddr(PageOffsetWidth - 1, 0)), instMicroOp.paddr)
665  /**
666   * rdcache req, write request don't need to query dcache, because we write element to sbuffer
667   */
668  io.rdcache.req                    := DontCare
669  io.rdcache.req.valid              := state === s_cache_req && isVSegLoad
670  io.rdcache.req.bits.cmd           := MemoryOpConstants.M_XRD
671  io.rdcache.req.bits.vaddr         := dcacheReqVaddr
672  io.rdcache.req.bits.vaddr_dup     := dcacheReqVaddrDup
673  io.rdcache.req.bits.mask          := mask
674  io.rdcache.req.bits.data          := flowData
675  io.rdcache.pf_source              := LOAD_SOURCE.U
676  io.rdcache.req.bits.id            := DontCare
677  io.rdcache.resp.ready             := true.B
678  io.rdcache.s1_paddr_dup_lsu       := dcacheReqPaddr
679  io.rdcache.s1_paddr_dup_dcache    := dcacheReqPaddr
680  io.rdcache.s1_kill                := false.B
681  io.rdcache.s1_kill_data_read      := false.B
682  io.rdcache.s2_kill                := false.B
683  if (env.FPGAPlatform){
684    io.rdcache.s0_pc                := DontCare
685    io.rdcache.s1_pc                := DontCare
686    io.rdcache.s2_pc                := DontCare
687  }else{
688    io.rdcache.s0_pc                := instMicroOp.uop.pc
689    io.rdcache.s1_pc                := instMicroOp.uop.pc
690    io.rdcache.s2_pc                := instMicroOp.uop.pc
691  }
692  io.rdcache.replacementUpdated     := false.B
693  io.rdcache.is128Req               := notCross16ByteReg
694
695
696  /**
697   * write data to sbuffer
698   * */
699  val sbufferAddrLow4bit = latchVaddr(3, 0)
700
701  val notCross16BytePaddr          = Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, 4), 0.U(4.W))
702  val notCross16ByteData           = flowData << (sbufferAddrLow4bit << 3)
703
704  val Cross16ByteMask = Wire(UInt(32.W))
705  val Cross16ByteData = Wire(UInt(256.W))
706  Cross16ByteMask := bmask << sbufferAddrLow4bit
707  Cross16ByteData := flowData << (sbufferAddrLow4bit << 3)
708
709  val vaddrLow  = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W))
710  val vaddrHigh = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W)) + 8.U
711
712
713  val paddrLow  = Cat(lowPagePaddr(lowPagePaddr.getWidth - 1, 3), 0.U(3.W))
714  val paddrHigh = Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, 3), 0.U(3.W))
715
716  val maskLow   = Cross16ByteMask(15, 0)
717  val maskHigh  = Cross16ByteMask(31, 16)
718
719  val dataLow   = Cross16ByteData(127, 0)
720  val dataHigh  = Cross16ByteData(255, 128)
721
722  val sbuffermisalignMask          = Mux(notCross16ByteReg, wmask, Mux(isFirstSplit, maskLow, maskHigh))
723  val sbuffermisalignData          = Mux(notCross16ByteReg, notCross16ByteData, Mux(isFirstSplit, dataLow, dataHigh))
724  val sbuffermisalignPaddr         = Mux(notCross16ByteReg, notCross16BytePaddr, Mux(isFirstSplit, paddrLow, paddrHigh))
725  val sbuffermisalignVaddr         = Mux(notCross16ByteReg, notCross16ByteVaddr, Mux(isFirstSplit, vaddrLow, vaddrHigh))
726
727  val sbufferMask                  = Mux(isMisalignReg, sbuffermisalignMask, wmask)
728  val sbufferData                  = Mux(isMisalignReg, sbuffermisalignData, flowData)
729  val sbufferVaddr                 = Mux(isMisalignReg, sbuffermisalignVaddr, latchVaddr)
730  val sbufferPaddr                 = Mux(isMisalignReg, sbuffermisalignPaddr, instMicroOp.paddr)
731
732  dontTouch(wmask)
733  dontTouch(Cross16ByteMask)
734  sbufferOut.bits                  := DontCare
735  sbufferOut.valid                 := state === s_send_data && segmentActive
736  sbufferOut.bits.vecValid         := state === s_send_data && segmentActive
737  sbufferOut.bits.mask             := sbufferMask
738  sbufferOut.bits.data             := sbufferData
739  sbufferOut.bits.vaddr            := sbufferVaddr
740  sbufferOut.bits.cmd              := MemoryOpConstants.M_XWR
741  sbufferOut.bits.id               := DontCare
742  sbufferOut.bits.addr             := sbufferPaddr
743
744  NewPipelineConnect(
745    sbufferOut, io.sbuffer, io.sbuffer.fire,
746    false.B,
747    Option(s"VSegmentUnitPipelineConnect")
748  )
749
750  io.vecDifftestInfo.valid         := io.sbuffer.valid
751  io.vecDifftestInfo.bits          := uopq(deqPtr.value).uop
752
753  /**
754   * update ptr
755   * */
756  private val fieldActiveWirteFinish = sbufferOut.fire && segmentActive // writedata finish and is a active segment
757  XSError(sbufferOut.fire && !segmentActive, "Attempt write inactive segment to sbuffer, something wrong!\n")
758
759  private val segmentInactiveFinish = ((state === s_latch_and_merge_data) || (state === s_send_data && stateNext =/= s_send_data)) && !segmentActive
760
761  val splitPtrOffset = Mux(
762    isIndexed(instType),
763    Mux(lmul.asSInt < 0.S, 1.U, (1.U << lmul).asUInt),
764    Mux(emul.asSInt < 0.S, 1.U, (1.U << emul).asUInt)
765  )
766  splitPtrNext :=
767    Mux(fieldIdx === maxNfields || !segmentActive, // if segment is active, need to complete this segment, otherwise jump to next segment
768      // segment finish, By shifting 'issueUopFlowNumLog2' to the right to ensure that emul != 1 can correctly generate lateral offset.
769     (deqPtr + ((segmentIdx +& 1.U) >> issueUopFlowNumLog2).asUInt),
770      // next field.
771     (splitPtr + splitPtrOffset)
772    )
773
774  if (backendParams.debugEn){
775    dontTouch(issueUopFlowNumLog2)
776    dontTouch(issueEmul)
777    dontTouch(splitPtrNext)
778    dontTouch(stridePtr)
779    dontTouch(segmentActive)
780  }
781
782  // update splitPtr
783  when(state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && (fieldActiveWirteFinish || !segmentActive))){
784    splitPtr := splitPtrNext
785  }.elsewhen(io.in.fire && !instMicroOpValid){
786    splitPtr := deqPtr // initial splitPtr
787  }
788
789  // update stridePtr, only use in index
790  val strideOffset = Mux(isIndexed(issueInstType), segmentIdx >> issueMaxIdxInIndexLog2, 0.U)
791  stridePtr       := deqPtr + strideOffset
792
793  // update fieldIdx
794  when(io.in.fire && !instMicroOpValid){ // init
795    fieldIdx := 0.U
796  }.elsewhen(state === s_latch_and_merge_data && segmentActive ||
797            (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish)){ // only if segment is active
798
799    /* next segment, only if segment complete */
800    fieldIdx := Mux(fieldIdx === maxNfields, 0.U, fieldIdx + 1.U)
801  }.elsewhen(segmentInactiveFinish){ // segment is inactive, go to next segment
802    fieldIdx := 0.U
803  }
804  //update segmentIdx
805  when(io.in.fire && !instMicroOpValid){
806    segmentIdx := 0.U
807  }.elsewhen(fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish)) &&
808             segmentIdx =/= maxSegIdx){ // next segment, only if segment is active
809
810    segmentIdx := segmentIdx + 1.U
811  }.elsewhen(segmentInactiveFinish && segmentIdx =/= maxSegIdx){ // if segment is inactive, go to next segment
812    segmentIdx := segmentIdx + 1.U
813  }
814
815  //update segmentOffset
816  /* when segment is active or segment is inactive, increase segmentOffset */
817  when((fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish))) ||
818       segmentInactiveFinish){
819
820    segmentOffset := segmentOffset + Mux(isUnitStride(issueInstType), (maxNfields +& 1.U) << issueEew(1, 0), stride(stridePtr.value))
821  }
822
823  //update deqPtr
824  when((state === s_finish) && !isEmpty(enqPtr, deqPtr)){
825    deqPtr := deqPtr + 1.U
826  }
827
828
829  /*************************************************************************
830   *                            fof logic
831   *************************************************************************/
832
833  //Enq
834  when(isEnqFixVlUop && !fofBufferValid) { fofBuffer := io.in.bits.uop }
835  when(isEnqFixVlUop && !fofBufferValid) { fofBufferValid := true.B }
836
837  //Deq
838  val fofFixVlValid                    = state === s_fof_fix_vl && fofBufferValid
839
840  when(fofFixVlValid) { fofBuffer      := 0.U.asTypeOf(new DynInst) }
841  when(fofFixVlValid) { fofBufferValid := false.B }
842
843
844  /*************************************************************************
845   *                            dequeue logic
846   *************************************************************************/
847  val vdIdxInField = GenUopIdxInField(Mux(isIndexed(instType), issueLmul, issueEmul), uopq(deqPtr.value).uop.vpu.vuopIdx)
848  /*select mask of vd, maybe remove in feature*/
849  val realEw        = Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0))
850  val maskDataVec: Vec[UInt] = VecDataToMaskDataVec(instMicroOp.mask, realEw)
851  val maskUsed      = maskDataVec(vdIdxInField)
852
853  when(stateNext === s_idle){
854    instMicroOpValid := false.B
855  }
856  // writeback to backend
857  val writebackOut                     = WireInit(io.uopwriteback.bits)
858  val writebackValid                   = (state === s_finish) && !isEmpty(enqPtr, deqPtr) || fofFixVlValid
859
860  when(fofFixVlValid) {
861    writebackOut.uop                    := fofBuffer
862    writebackOut.uop.vpu.vl             := instMicroOp.exceptionVl.bits
863    writebackOut.data                   := instMicroOp.exceptionVl.bits
864    writebackOut.mask.get               := Fill(VLEN, 1.U)
865    writebackOut.uop.vpu.vmask          := Fill(VLEN, 1.U)
866  }.otherwise{
867    writebackOut.uop                    := uopq(deqPtr.value).uop
868    writebackOut.uop.vpu                := instMicroOp.uop.vpu
869    writebackOut.uop.trigger            := instMicroOp.uop.trigger
870    writebackOut.uop.exceptionVec       := instMicroOp.uop.exceptionVec
871    writebackOut.mask.get               := instMicroOp.mask
872    writebackOut.data                   := data(deqPtr.value)
873    writebackOut.vdIdx.get              := vdIdxInField
874    writebackOut.uop.vpu.vl             := Mux(instMicroOp.exceptionVl.valid, instMicroOp.exceptionVl.bits, instMicroOp.vl)
875    writebackOut.uop.vpu.vstart         := Mux(instMicroOp.uop.exceptionVec.asUInt.orR || TriggerAction.isDmode(instMicroOp.uop.trigger), instMicroOp.exceptionVstart, instMicroOp.vstart)
876    writebackOut.uop.vpu.vmask          := maskUsed
877    writebackOut.uop.vpu.vuopIdx        := uopq(deqPtr.value).uop.vpu.vuopIdx
878    // when exception updates vl, should use vtu strategy.
879    writebackOut.uop.vpu.vta            := Mux(instMicroOp.exceptionVl.valid, VType.tu, instMicroOp.uop.vpu.vta)
880    writebackOut.debug                  := DontCare
881    writebackOut.vdIdxInField.get       := vdIdxInField
882    writebackOut.uop.robIdx             := instMicroOp.uop.robIdx
883    writebackOut.uop.fuOpType           := instMicroOp.uop.fuOpType
884  }
885
886  io.uopwriteback.valid               := RegNext(writebackValid)
887  io.uopwriteback.bits                := RegEnable(writebackOut, writebackValid)
888
889  dontTouch(writebackValid)
890
891  //to RS
892  val feedbackOut                      = WireInit(0.U.asTypeOf(io.feedback.bits))
893  val feedbackValid                    = state === s_finish && !isEmpty(enqPtr, deqPtr)
894  feedbackOut.hit                     := true.B
895  feedbackOut.robIdx                  := instMicroOp.uop.robIdx
896  feedbackOut.sourceType              := DontCare
897  feedbackOut.flushState              := DontCare
898  feedbackOut.dataInvalidSqIdx        := DontCare
899  feedbackOut.sqIdx                   := uopq(deqPtr.value).uop.sqIdx
900  feedbackOut.lqIdx                   := uopq(deqPtr.value).uop.lqIdx
901
902  io.feedback.valid                   := RegNext(feedbackValid)
903  io.feedback.bits                    := RegEnable(feedbackOut, feedbackValid)
904
905  dontTouch(feedbackValid)
906
907  // exception
908  io.exceptionInfo                    := DontCare
909  io.exceptionInfo.bits.robidx        := instMicroOp.uop.robIdx
910  io.exceptionInfo.bits.uopidx        := uopq(deqPtr.value).uop.vpu.vuopIdx
911  io.exceptionInfo.bits.vstart        := instMicroOp.exceptionVstart
912  io.exceptionInfo.bits.vaddr         := instMicroOp.exceptionVaddr
913  io.exceptionInfo.bits.gpaddr        := instMicroOp.exceptionGpaddr
914  io.exceptionInfo.bits.isForVSnonLeafPTE := instMicroOp.exceptionIsForVSnonLeafPTE
915  io.exceptionInfo.bits.vl            := instMicroOp.exceptionVl.bits
916  io.exceptionInfo.valid              := (state === s_finish) && instMicroOp.uop.exceptionVec.asUInt.orR && !isEmpty(enqPtr, deqPtr)
917}
918
919