xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala (revision 16c2d8bb27e9a24ed5ef5e4885693e6a30b536df)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.{FuType, PMPRespBundle}
29import freechips.rocketchip.diplomacy.BufferParams
30import xiangshan.cache.mmu._
31import xiangshan.cache._
32import xiangshan.cache.wpu.ReplayCarry
33import xiangshan.backend.fu.util.SdtrigExt
34import xiangshan.ExceptionNO._
35import xiangshan.backend.fu.vector.Bundles.{VConfig, VType}
36import xiangshan.backend.datapath.NewPipelineConnect
37import xiangshan.backend.fu.NewCSR._
38import xiangshan.backend.fu.vector.Utils.VecDataToMaskDataVec
39
40class VSegmentBundle(implicit p: Parameters) extends VLSUBundle
41{
42  val baseVaddr        = UInt(XLEN.W)
43  val uop              = new DynInst
44  val paddr            = UInt(PAddrBits.W)
45  val mask             = UInt(VLEN.W)
46  val alignedType      = UInt(alignTypeBits.W)
47  val vl               = UInt(elemIdxBits.W)
48  val uopFlowNum       = UInt(elemIdxBits.W)
49  val uopFlowNumMask   = UInt(elemIdxBits.W)
50  val isVSegLoad       = Bool()
51  val isVSegStore      = Bool()
52  // for exception
53  val vstart           = UInt(elemIdxBits.W)
54  val exceptionVaddr   = UInt(XLEN.W)
55  val exceptionGpaddr  = UInt(XLEN.W)
56  val exceptionIsForVSnonLeafPTE = Bool()
57  val exception_va     = Bool()
58  val exception_gpa    = Bool()
59  val exception_pa     = Bool()
60  val exceptionVstart  = UInt(elemIdxBits.W)
61  // valid: have fof exception but can not trigger, need update all writebacked uop.vl with exceptionVl
62  val exceptionVl      = ValidIO(UInt(elemIdxBits.W))
63  val isFof            = Bool()
64}
65
66// latch each uop's VecWen, pdest, v0Wen, uopIdx
67class VSegmentUop(implicit p: Parameters) extends VLSUBundle{
68  val uop              = new DynInst
69}
70
71class VSegmentUnit (implicit p: Parameters) extends VLSUModule
72  with HasDCacheParameters
73  with MemoryOpConstants
74  with SdtrigExt
75  with HasLoadHelper
76{
77  val io               = IO(new VSegmentUnitIO)
78
79  val maxSize          = VSegmentBufferSize
80
81  class VSegUPtr(implicit p: Parameters) extends CircularQueuePtr[VSegUPtr](maxSize){
82  }
83
84  object VSegUPtr {
85    def apply(f: Bool, v: UInt)(implicit p: Parameters): VSegUPtr = {
86      val ptr           = Wire(new VSegUPtr)
87      ptr.flag         := f
88      ptr.value        := v
89      ptr
90    }
91  }
92
93  val maxSplitNum = 2
94
95  /**
96  ********************************************************************************************************
97  *  Use an example to illustrate the working logic of a segmentunit:                                    *
98  *    For:                                                                                              *
99  *      lmul=2 sew=32 emul=2 eew=32  vl=16                                                              *
100  *    Then:                                                                                             *
101  *      Access memory in the order:                                                                     *
102  *        (V2,S0),(V4,S0),(V6,S0),(V8,S0),                                                              *
103  *        (V2,S1),(V4,S1),(V6,S1),(V8,S1),                                                              *
104  *        (V2,S2),(V4,S2),(V6,S2),(V8,S2),                                                              *
105  *        (V2,S3),(V4,S3),(V6,S3),(V8,S3),                                                              *
106  *        (V3,S4),(V5,S4),(V7,S4),(V9,S4),                                                              *
107  *        (V3,S5),(V5,S5),(V7,S5),(V9,S5),                                                              *
108  *        (V3,S6),(V5,S6),(V7,S6),(V9,S6),                                                              *
109  *        (V3,S7),(V5,S7),(V7,S7),(V9,S7),                                                              *
110  *                                                                                                      *
111  *                                                                                                      *
112  *    [[data]] saves the data generated by the access and corresponds to the register.                  *
113  *    [[splitPtr]] controls the destination register written to.                                        *
114  *                                                                                                      *
115  *    splitptr offset can be seen in [[splitPtrNext]] is assignment logic,                              *
116  *    which is mainly calculated in terms of [[fieldIdx]] and [[segmentIdx]]                            *
117  *    First access different fields of the same segment, and then visit different segments.             *
118  *    For the case of 'emul' greater than 1, such as the following example,                             *
119  *    although 'v2' and 'v3' are different vd and the same field, they are still different segments,    *
120  *    so they should be accessed sequentially.Just like the 'Access memory in the order' above.         *
121  *                                                                                                      *
122  *                         [[segmentIdx]]                                                               *
123  *                               |                                                                      *
124  *                               |                                                                      *
125  *                               V                                                                      *
126  *                                                                                                      *
127  *                               S0               S1                S2                 S3               *
128  *                      ----------------------------------------------------------------------------    *
129  *  [[splitPtr]]--> v2  |     field0     |      field0     |      field0     |      field0         |    *
130  *                      ----------------------------------------------------------------------------    *
131  *                               S4               S5                S6                 S7               *
132  *                      ----------------------------------------------------------------------------    *
133  *                  v3  |     field0     |      field0     |      field0     |      field0         |    *
134  *                      ----------------------------------------------------------------------------    *
135  *                               S0               S1                S2                 S3               *
136  *                      ----------------------------------------------------------------------------    *
137  *                  v4  |     field1     |      field1     |      field1     |      field1         |    *
138  *                      ----------------------------------------------------------------------------    *
139  *                               S4               S5                S6                 S7               *
140  *                      ----------------------------------------------------------------------------    *
141  *                  v5  |     field1     |      field1     |      field1     |      field1         |    *
142  *                      ----------------------------------------------------------------------------    *
143  *                               S0               S1                S2                 S3               *
144  *                      ----------------------------------------------------------------------------    *
145  *                  v6  |     field2     |      field2     |      field2     |      field2         |    *
146  *                      ----------------------------------------------------------------------------    *
147  *                               S4               S5                S6                 S7               *
148  *                      ----------------------------------------------------------------------------    *
149  *                  v7  |     field2     |      field2     |      field2     |      field2         |    *
150  *                      ----------------------------------------------------------------------------    *
151  *                               S0               S1                S2                 S3               *
152  *                      ----------------------------------------------------------------------------    *
153  *                  v8  |     field3     |      field3     |      field3     |      field3         |    *
154  *                      ----------------------------------------------------------------------------    *
155  *                               S4               S5                S6                 S7               *
156  *                      ----------------------------------------------------------------------------    *
157  *                  v9  |     field3     |      field3     |      field3     |      field3         |    *
158  *                      ----------------------------------------------------------------------------    *                                                                                    *
159  *                                                                                                      *                                                                                    *
160  *                                                                                                      *                                                                                    *
161  ********************************************************************************************************
162  **/
163
164
165  // buffer uop
166  val instMicroOp       = Reg(new VSegmentBundle)
167  val instMicroOpValid  = RegInit(false.B)
168  val data              = Reg(Vec(maxSize, UInt(VLEN.W)))
169  val uopq              = Reg(Vec(maxSize, new VSegmentUop))
170  val stride            = Reg(Vec(maxSize, UInt(VLEN.W)))
171  val allocated         = RegInit(VecInit(Seq.fill(maxSize)(false.B)))
172  val enqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
173  val deqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
174  val stridePtr         = WireInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index
175  val stridePtrReg      = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index
176
177  val segmentIdx        = RegInit(0.U(elemIdxBits.W))
178  val fieldIdx          = RegInit(0.U(fieldBits.W))
179  val segmentOffset     = RegInit(0.U(XLEN.W))
180  val splitPtr          = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select load/store data
181  val splitPtrNext      = WireInit(0.U.asTypeOf(new VSegUPtr))
182
183  val exception_va      = WireInit(false.B)
184  val exception_gpa     = WireInit(false.B)
185  val exception_pa      = WireInit(false.B)
186
187  val maxSegIdx         = instMicroOp.vl - 1.U
188  val maxNfields        = instMicroOp.uop.vpu.nf
189  val latchVaddr        = RegInit(0.U(VAddrBits.W))
190  val latchVaddrDup     = RegInit(0.U(VAddrBits.W))
191
192  XSError((segmentIdx > maxSegIdx) && instMicroOpValid, s"segmentIdx > vl, something error!\n")
193  XSError((fieldIdx > maxNfields) &&  instMicroOpValid, s"fieldIdx > nfields, something error!\n")
194
195  // MicroOp
196  val baseVaddr                       = instMicroOp.baseVaddr
197  val alignedType                     = instMicroOp.alignedType
198  val fuType                          = instMicroOp.uop.fuType
199  val isVSegLoad                      = instMicroOp.isVSegLoad
200  val isVSegStore                     = instMicroOp.isVSegStore
201  val mask                            = instMicroOp.mask
202  val exceptionVec                    = instMicroOp.uop.exceptionVec
203  val issueEew                        = instMicroOp.uop.vpu.veew
204  val issueLmul                       = instMicroOp.uop.vpu.vtype.vlmul
205  val issueSew                        = instMicroOp.uop.vpu.vtype.vsew
206  val issueEmul                       = EewLog2(issueEew) - issueSew + issueLmul
207  val elemIdxInVd                     = segmentIdx & instMicroOp.uopFlowNumMask
208  val issueInstType                   = Cat(true.B, instMicroOp.uop.fuOpType(6, 5)) // always segment instruction
209  val issueUopFlowNumLog2             = GenRealFlowLog2(issueInstType, issueEmul, issueLmul, issueEew, issueSew, true) // max element number log2 in vd
210  val issueVlMax                      = instMicroOp.uopFlowNum // max elementIdx in vd
211  val issueMaxIdxInIndex              = GenVLMAX(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0)) // index element index in index register
212  val issueMaxIdxInIndexMask          = GenVlMaxMask(issueMaxIdxInIndex, elemIdxBits)
213  val issueMaxIdxInIndexLog2          = GenVLMAXLog2(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0))
214  val issueIndexIdx                   = segmentIdx & issueMaxIdxInIndexMask
215  val segmentActive                   = (mask & UIntToOH(segmentIdx)).orR
216
217  // sbuffer write interface
218  val sbufferOut                      = Wire(Decoupled(new DCacheWordReqWithVaddrAndPfFlag))
219
220
221  // segment fof instrction buffer
222  val fofBuffer                       = RegInit(0.U.asTypeOf(new DynInst))
223  val fofBufferValid                  = RegInit(false.B)
224
225
226  // Segment instruction's FSM
227  /*
228  * s_idle: wait request
229  * s_flush_sbuffer_req: flush sbuffer
230  * s_wait_flush_sbuffer_resp: wait sbuffer empty
231  * s_tlb_req: request tlb
232  * s_wait_tlb_resp: wait tlb resp
233  * s_pm: check pmp
234  * s_cache_req: request cache
235  * s_cache_resp: wait cache resp
236  * s_misalign_merge_data: merge unaligned data
237  * s_latch_and_merge_data: for read data
238  * s_send_data: for send write data
239  * s_wait_to_sbuffer: Wait for data from the sbufferOut pipelayer to be sent to the sbuffer
240  * s_finish: normal uop is complete
241  * s_fof_fix_vl: Writeback the uop of the fof instruction to modify vl.
242  * */
243  val s_idle :: s_flush_sbuffer_req :: s_wait_flush_sbuffer_resp :: s_tlb_req :: s_wait_tlb_resp :: s_pm ::s_cache_req :: s_cache_resp :: s_misalign_merge_data :: s_latch_and_merge_data :: s_send_data :: s_wait_to_sbuffer :: s_finish :: s_fof_fix_vl :: Nil = Enum(14)
244  val state             = RegInit(s_idle)
245  val stateNext         = WireInit(s_idle)
246  val sbufferEmpty      = io.flush_sbuffer.empty
247  val isEnqfof          = io.in.bits.uop.fuOpType === VlduType.vleff && io.in.valid
248  val isEnqFixVlUop     = isEnqfof && io.in.bits.uop.vpu.lastUop
249  val nextBaseVaddr     = Wire(UInt(XLEN.W))
250
251  // handle misalign sign
252  val curPtr             = RegInit(false.B)
253  val canHandleMisalign  = WireInit(false.B)
254  val isMisalignReg      = RegInit(false.B)
255  val isMisalignWire     = WireInit(false.B)
256  val notCross16ByteReg  = RegInit(false.B)
257  val notCross16ByteWire = WireInit(false.B)
258  val combinedData       = RegInit(0.U(XLEN.W))
259
260  val lowPagePaddr       = RegInit(0.U(PAddrBits.W))
261  val lowPageGPaddr      = RegInit(0.U(GPAddrBits.W))
262
263  val highPagePaddr      = RegInit(0.U(PAddrBits.W))
264  val highPageGPaddr     = RegInit(0.U(GPAddrBits.W))
265
266  val isFirstSplit       = !curPtr
267  val isSecondSplit      = curPtr
268  /**
269   * state update
270   */
271  state  := stateNext
272
273  /**
274   * state transfer
275   */
276  when(state === s_idle){
277    stateNext := Mux(isAfter(enqPtr, deqPtr), s_flush_sbuffer_req, s_idle)
278  }.elsewhen(state === s_flush_sbuffer_req){
279    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) // if sbuffer is empty, go to query tlb
280
281  }.elsewhen(state === s_wait_flush_sbuffer_resp){
282    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp)
283
284  }.elsewhen(state === s_tlb_req){
285    stateNext := Mux(segmentActive, s_wait_tlb_resp, Mux(isVSegLoad, s_latch_and_merge_data, s_send_data))
286
287  }.elsewhen(state === s_wait_tlb_resp){
288    stateNext := Mux(io.dtlb.resp.fire,
289                      Mux(!io.dtlb.resp.bits.miss,
290                          s_pm,
291                          s_tlb_req),
292                      s_wait_tlb_resp)
293
294  }.elsewhen(state === s_pm){
295    when(exception_pa || exception_va || exception_gpa) {
296      stateNext := s_finish
297    } .otherwise {
298      when(canHandleMisalign && isMisalignWire && !notCross16ByteWire || (isMisalignReg && !notCross16ByteReg && isFirstSplit && isVSegStore)) {
299        stateNext := s_tlb_req
300      } .otherwise {
301        /* if is vStore, send data to sbuffer, so don't need query dcache */
302        stateNext := Mux(isVSegLoad, s_cache_req, s_send_data)
303      }
304    }
305
306  }.elsewhen(state === s_cache_req){
307    stateNext := Mux(io.rdcache.req.fire, s_cache_resp, s_cache_req)
308
309  }.elsewhen(state === s_cache_resp){
310    when(io.rdcache.resp.fire) {
311      when(io.rdcache.resp.bits.miss || io.rdcache.s2_bank_conflict) {
312        stateNext := s_cache_req
313      }.otherwise {
314
315        stateNext := Mux(isVSegLoad, Mux(isMisalignReg && !notCross16ByteReg, s_misalign_merge_data, s_latch_and_merge_data), s_send_data)
316      }
317    }.otherwise{
318      stateNext := s_cache_resp
319    }
320  }.elsewhen(state === s_misalign_merge_data) {
321    stateNext := Mux(!curPtr, s_tlb_req, s_latch_and_merge_data)
322  }.elsewhen(state === s_latch_and_merge_data) {
323    when((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields) ||
324      ((segmentIdx === maxSegIdx) && !segmentActive)) {
325
326      stateNext := s_finish // segment instruction finish
327    }.otherwise {
328      stateNext := s_tlb_req // need continue
329    }
330    /* if segment is inactive, don't need to wait access all of the field */
331  }.elsewhen(state === s_send_data) { // when sbuffer accept data
332    when(!sbufferOut.fire && segmentActive || (isMisalignReg && !notCross16ByteReg && isFirstSplit)) {
333      stateNext := s_send_data
334    }.elsewhen(segmentIdx === maxSegIdx && (fieldIdx === maxNfields && sbufferOut.fire || !segmentActive && io.sbuffer.valid && !io.sbuffer.ready)) {
335      stateNext := s_wait_to_sbuffer
336    }.elsewhen(segmentIdx === maxSegIdx && !segmentActive){
337      stateNext := s_finish // segment instruction finish
338    }.otherwise {
339      stateNext := s_tlb_req // need continue
340    }
341
342  }.elsewhen(state === s_wait_to_sbuffer){
343    stateNext := Mux(io.sbuffer.fire, s_finish, s_wait_to_sbuffer)
344
345  }.elsewhen(state === s_finish){ // writeback uop
346    stateNext := Mux(
347      distanceBetween(enqPtr, deqPtr) === 0.U,
348      Mux(fofBufferValid, s_fof_fix_vl, s_idle),
349      s_finish
350    )
351  }.elsewhen(state === s_fof_fix_vl){ // writeback uop
352    stateNext := Mux(!fofBufferValid, s_idle, s_fof_fix_vl)
353  }.otherwise{ // unknown state
354    stateNext := s_idle
355    assert(false.B)
356  }
357
358  /*************************************************************************
359   *                            enqueue logic
360   *************************************************************************/
361  io.in.ready                         := true.B
362  val fuOpType                         = io.in.bits.uop.fuOpType
363  val vtype                            = io.in.bits.uop.vpu.vtype
364  val mop                              = fuOpType(6, 5)
365  val instType                         = Cat(true.B, mop)
366  val eew                              = io.in.bits.uop.vpu.veew
367  val sew                              = vtype.vsew
368  val lmul                             = vtype.vlmul
369  val emul                             = EewLog2(eew) - sew + lmul
370  val vl                               = instMicroOp.vl
371  val vm                               = instMicroOp.uop.vpu.vm
372  val vstart                           = instMicroOp.uop.vpu.vstart
373  val srcMask                          = GenFlowMask(Mux(vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vstart, vl, true)
374  // first uop enqueue, we need to latch microOp of segment instruction
375  when(io.in.fire && !instMicroOpValid && !isEnqFixVlUop){
376    // element number in a vd
377    // TODO Rewrite it in a more elegant way.
378    val uopFlowNum                    = ZeroExt(GenRealFlowNum(instType, emul, lmul, eew, sew, true), elemIdxBits)
379    instMicroOp.baseVaddr             := io.in.bits.src_rs1
380    instMicroOpValid                  := true.B // if is first uop
381    instMicroOp.alignedType           := Mux(isIndexed(instType), sew(1, 0), eew)
382    instMicroOp.uop                   := io.in.bits.uop
383    instMicroOp.mask                  := srcMask
384    instMicroOp.vstart                := 0.U
385    instMicroOp.uopFlowNum            := uopFlowNum
386    instMicroOp.uopFlowNumMask        := GenVlMaxMask(uopFlowNum, elemIdxBits) // for merge data
387    instMicroOp.vl                    := io.in.bits.src_vl.asTypeOf(VConfig()).vl
388    instMicroOp.exceptionVl.valid     := false.B
389    instMicroOp.exceptionVl.bits      := io.in.bits.src_vl.asTypeOf(VConfig()).vl
390    segmentOffset                     := 0.U
391    instMicroOp.isFof                 := (fuOpType === VlduType.vleff) && FuType.isVSegLoad(io.in.bits.uop.fuType)
392    instMicroOp.isVSegLoad            := FuType.isVSegLoad(io.in.bits.uop.fuType)
393    instMicroOp.isVSegStore           := FuType.isVSegStore(io.in.bits.uop.fuType)
394    isMisalignReg                     := false.B
395    notCross16ByteReg                 := false.B
396  }
397  // latch data
398  when(io.in.fire && !isEnqFixVlUop){
399    data(enqPtr.value)                := io.in.bits.src_vs3
400    stride(enqPtr.value)              := io.in.bits.src_stride
401    uopq(enqPtr.value).uop            := io.in.bits.uop
402  }
403
404  // update enqptr, only 1 port
405  when(io.in.fire && !isEnqFixVlUop){
406    enqPtr                            := enqPtr + 1.U
407  }
408
409  /*************************************************************************
410   *                            output logic
411   *************************************************************************/
412
413  val indexStride                     = IndexAddr( // index for indexed instruction
414                                                    index = stride(stridePtrReg.value),
415                                                    flow_inner_idx = issueIndexIdx,
416                                                    eew = issueEew
417                                                  )
418  val realSegmentOffset               = Mux(isIndexed(issueInstType),
419                                            indexStride,
420                                            segmentOffset)
421
422  val vaddr                           = nextBaseVaddr + realSegmentOffset
423
424  val misalignLowVaddr                = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W))
425  val misalignLowVaddrDup             = Cat(latchVaddrDup(latchVaddrDup.getWidth - 1, 3), 0.U(3.W))
426  val misalignHighVaddr               = Cat(latchVaddr(latchVaddr.getWidth - 1, 3) + 1.U, 0.U(3.W))
427  val misalignHighVaddrDup            = Cat(latchVaddrDup(latchVaddrDup.getWidth - 1, 3) + 1.U, 0.U(3.W))
428  val notCross16ByteVaddr             = Cat(latchVaddr(latchVaddr.getWidth - 1, 4), 0.U(4.W))
429  val notCross16ByteVaddrDup          = Cat(latchVaddrDup(latchVaddrDup.getWidth - 1, 4), 0.U(4.W))
430 //  val misalignVaddr                   = Mux(notCross16ByteReg, notCross16ByteVaddr, Mux(isFirstSplit, misalignLowVaddr, misalignHighVaddr))
431  val misalignVaddr                   = Mux(isFirstSplit, misalignLowVaddr, misalignHighVaddr)
432  val misalignVaddrDup                = Mux(isFirstSplit, misalignLowVaddrDup, misalignHighVaddrDup)
433  val tlbReqVaddr                     = Mux(isMisalignReg, misalignVaddr, vaddr)
434  //latch vaddr
435  when(state === s_tlb_req && !isMisalignReg){
436    latchVaddr := vaddr(VAddrBits - 1, 0)
437    latchVaddrDup := vaddr(VAddrBits - 1, 0)
438  }
439  /**
440   * tlb req and tlb resq
441   */
442
443  // query DTLB IO Assign
444  io.dtlb.req                         := DontCare
445  io.dtlb.resp.ready                  := true.B
446  io.dtlb.req.valid                   := state === s_tlb_req && segmentActive
447  io.dtlb.req.bits.cmd                := Mux(isVSegLoad, TlbCmd.read, TlbCmd.write)
448  io.dtlb.req.bits.vaddr              := tlbReqVaddr(VAddrBits - 1, 0)
449  io.dtlb.req.bits.fullva             := tlbReqVaddr
450  io.dtlb.req.bits.checkfullva        := true.B
451  io.dtlb.req.bits.size               := instMicroOp.alignedType(2,0)
452  io.dtlb.req.bits.memidx.is_ld       := isVSegLoad
453  io.dtlb.req.bits.memidx.is_st       := isVSegStore
454  io.dtlb.req.bits.debug.robIdx       := instMicroOp.uop.robIdx
455  io.dtlb.req.bits.no_translate       := false.B
456  io.dtlb.req.bits.debug.pc           := instMicroOp.uop.pc
457  io.dtlb.req.bits.debug.isFirstIssue := DontCare
458  io.dtlb.req_kill                    := false.B
459
460  val canTriggerException              = segmentIdx === 0.U || !instMicroOp.isFof // only elementIdx = 0 or is not fof can trigger
461
462  val segmentTrigger = Module(new VSegmentTrigger)
463  segmentTrigger.io.fromCsrTrigger.tdataVec             := io.fromCsrTrigger.tdataVec
464  segmentTrigger.io.fromCsrTrigger.tEnableVec           := io.fromCsrTrigger.tEnableVec
465  segmentTrigger.io.fromCsrTrigger.triggerCanRaiseBpExp := io.fromCsrTrigger.triggerCanRaiseBpExp
466  segmentTrigger.io.fromCsrTrigger.debugMode            := io.fromCsrTrigger.debugMode
467  segmentTrigger.io.memType                             := isVSegLoad
468  segmentTrigger.io.fromLoadStore.vaddr                 := Mux(isMisalignReg, misalignVaddr, latchVaddr)
469  segmentTrigger.io.fromLoadStore.isVectorUnitStride    := false.B
470  segmentTrigger.io.fromLoadStore.mask                  := 0.U
471
472  val triggerAction = segmentTrigger.io.toLoadStore.triggerAction
473  val triggerDebugMode = TriggerAction.isDmode(triggerAction)
474  val triggerBreakpoint = TriggerAction.isExp(triggerAction)
475
476  // tlb resp
477  when(io.dtlb.resp.fire && state === s_wait_tlb_resp){
478      exceptionVec(storePageFault)      := io.dtlb.resp.bits.excp(0).pf.st
479      exceptionVec(loadPageFault)       := io.dtlb.resp.bits.excp(0).pf.ld
480      exceptionVec(storeGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.st
481      exceptionVec(loadGuestPageFault)  := io.dtlb.resp.bits.excp(0).gpf.ld
482      exceptionVec(storeAccessFault)    := io.dtlb.resp.bits.excp(0).af.st
483      exceptionVec(loadAccessFault)     := io.dtlb.resp.bits.excp(0).af.ld
484      when(!io.dtlb.resp.bits.miss){
485        instMicroOp.paddr             := io.dtlb.resp.bits.paddr(0)
486        instMicroOp.exceptionVaddr    := io.dtlb.resp.bits.fullva
487        instMicroOp.exceptionGpaddr   := io.dtlb.resp.bits.gpaddr(0)
488        instMicroOp.exceptionIsForVSnonLeafPTE  := io.dtlb.resp.bits.isForVSnonLeafPTE
489        lowPagePaddr  := Mux(isMisalignReg && !notCross16ByteReg && !curPtr, io.dtlb.resp.bits.paddr(0), lowPagePaddr)
490        lowPageGPaddr := Mux(isMisalignReg && !notCross16ByteReg && !curPtr, io.dtlb.resp.bits.gpaddr(0), lowPageGPaddr)
491
492        highPagePaddr  := Mux(isMisalignReg && !notCross16ByteReg && curPtr, io.dtlb.resp.bits.paddr(0), highPagePaddr)
493        highPageGPaddr := Mux(isMisalignReg && !notCross16ByteReg && curPtr, io.dtlb.resp.bits.gpaddr(0), highPageGPaddr)
494      }
495  }
496  // pmp
497  // NOTE: only handle load/store exception here, if other exception happens, don't send here
498  val exceptionWithPf = exceptionVec(storePageFault) || exceptionVec(loadPageFault) || exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
499  val pmp = (io.pmpResp.asUInt & Fill(io.pmpResp.asUInt.getWidth, !exceptionWithPf)).asTypeOf(new PMPRespBundle())
500  when(state === s_pm) {
501    val highAddress = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
502      "b00".U -> 0.U,
503      "b01".U -> 1.U,
504      "b10".U -> 3.U,
505      "b11".U -> 7.U
506    )) + vaddr(4, 0)
507
508    val addr_aligned = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
509      "b00".U   -> true.B,                   //b
510      "b01".U   -> (vaddr(0)    === 0.U), //h
511      "b10".U   -> (vaddr(1, 0) === 0.U), //w
512      "b11".U   -> (vaddr(2, 0) === 0.U)  //d
513    ))
514
515    notCross16ByteWire   := highAddress(4) === vaddr(4)
516    isMisalignWire       := !addr_aligned && !isMisalignReg
517    canHandleMisalign    := !pmp.mmio && !triggerBreakpoint && !triggerDebugMode
518    exceptionVec(loadAddrMisaligned)  := isMisalignWire && isVSegLoad  && canTriggerException && pmp.mmio
519    exceptionVec(storeAddrMisaligned) := isMisalignWire && isVSegStore && canTriggerException && pmp.mmio
520
521    exception_va  := exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
522                     exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault) ||
523                     triggerBreakpoint || triggerDebugMode || pmp.mmio
524    exception_gpa := exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
525    exception_pa  := pmp.st || pmp.ld || pmp.mmio
526
527    instMicroOp.exception_pa  := exception_pa
528    instMicroOp.exception_va  := exception_va
529    instMicroOp.exception_gpa := exception_gpa
530    // update storeAccessFault bit. Currently, we don't support vector MMIO
531    exceptionVec(loadAccessFault)  := (exceptionVec(loadAccessFault) || pmp.ld || pmp.mmio)   && isVSegLoad  && canTriggerException
532    exceptionVec(storeAccessFault) := (exceptionVec(storeAccessFault) || pmp.st || pmp.mmio)  && isVSegStore && canTriggerException
533    exceptionVec(breakPoint)       := triggerBreakpoint && canTriggerException
534
535    exceptionVec(storePageFault)      := exceptionVec(storePageFault)      && isVSegStore && canTriggerException
536    exceptionVec(loadPageFault)       := exceptionVec(loadPageFault)       && isVSegLoad  && canTriggerException
537    exceptionVec(storeGuestPageFault) := exceptionVec(storeGuestPageFault) && isVSegStore && canTriggerException
538    exceptionVec(loadGuestPageFault)  := exceptionVec(loadGuestPageFault)  && isVSegLoad  && canTriggerException
539
540    when(exception_va || exception_gpa || exception_pa) {
541      when(canTriggerException) {
542        instMicroOp.exceptionVstart := segmentIdx // for exception
543      }.otherwise {
544        instMicroOp.exceptionVl.valid := true.B
545        instMicroOp.exceptionVl.bits := segmentIdx
546      }
547    }
548
549    when(exceptionVec(breakPoint) || triggerDebugMode) {
550      instMicroOp.uop.trigger := triggerAction
551    }
552
553    when(isMisalignWire && !(exception_va || exception_gpa || exception_pa)) {
554      notCross16ByteReg := notCross16ByteWire
555      isMisalignReg       := true.B
556    }
557  }
558
559  /**
560   * flush sbuffer IO Assign
561   */
562  io.flush_sbuffer.valid           := !sbufferEmpty && (state === s_flush_sbuffer_req)
563
564  /**
565  * update curPtr
566  * */
567  when(state === s_finish || state === s_latch_and_merge_data || state === s_send_data && stateNext =/= s_send_data) {
568    isMisalignReg     := false.B
569    notCross16ByteReg := false.B
570    curPtr := false.B
571  } .otherwise {
572    when(isVSegLoad) {
573      when(isMisalignReg && !notCross16ByteReg && state === s_misalign_merge_data) {
574        curPtr := true.B
575      }
576    } .otherwise {
577      when(isMisalignReg && !notCross16ByteReg && state === s_pm) {
578        curPtr := !curPtr
579      } .elsewhen(isMisalignReg && !notCross16ByteReg && state === s_pm && stateNext === s_send_data) {
580        curPtr := false.B
581      } .elsewhen(isMisalignReg && !notCross16ByteReg && state === s_send_data && stateNext === s_send_data && sbufferOut.fire) {
582        curPtr := !curPtr
583      }
584    }
585  }
586
587
588
589  /**
590   * merge data for load
591   */
592  val cacheData = LookupTree(latchVaddr(3,0), List(
593    "b0000".U -> io.rdcache.resp.bits.data_delayed(63,    0),
594    "b0001".U -> io.rdcache.resp.bits.data_delayed(63,    8),
595    "b0010".U -> io.rdcache.resp.bits.data_delayed(63,   16),
596    "b0011".U -> io.rdcache.resp.bits.data_delayed(63,   24),
597    "b0100".U -> io.rdcache.resp.bits.data_delayed(63,   32),
598    "b0101".U -> io.rdcache.resp.bits.data_delayed(63,   40),
599    "b0110".U -> io.rdcache.resp.bits.data_delayed(63,   48),
600    "b0111".U -> io.rdcache.resp.bits.data_delayed(63,   56),
601    "b1000".U -> io.rdcache.resp.bits.data_delayed(127,  64),
602    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
603    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
604    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
605    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
606    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
607    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
608    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
609  ))
610
611  val misalignLowData  = LookupTree(latchVaddr(3,0), List(
612    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
613    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
614    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
615    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
616    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
617    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
618    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
619  ))
620
621  val misalignCombinedData = LookupTree(latchVaddr(3,0), List(
622    "b1001".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(55,    0))(63, 0),
623    "b1010".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(47,    0))(63, 0),
624    "b1011".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(39,    0))(63, 0),
625    "b1100".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(31,    0))(63, 0),
626    "b1101".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(23,    0))(63, 0),
627    "b1110".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(15,    0))(63, 0),
628    "b1111".U -> Cat(io.rdcache.resp.bits.data_delayed, combinedData(7,     0))(63, 0)
629  ))
630  when(state === s_misalign_merge_data && segmentActive){
631    when(!curPtr) {
632      combinedData := misalignLowData
633    } .otherwise {
634      combinedData := misalignCombinedData
635    }
636  }
637
638  val shiftData    = (io.rdcache.resp.bits.data_delayed >> (latchVaddr(3, 0) << 3)).asUInt(63, 0)
639  val mergemisalignData = Mux(notCross16ByteReg, shiftData, combinedData)
640  val pickData  = rdataVecHelper(alignedType(1,0), Mux(isMisalignReg, mergemisalignData, cacheData))
641  val mergedData = mergeDataWithElemIdx(
642    oldData = data(splitPtr.value),
643    newData = Seq(pickData),
644    alignedType = alignedType(1,0),
645    elemIdx = Seq(elemIdxInVd),
646    valids = Seq(true.B)
647  )
648  when(state === s_latch_and_merge_data && segmentActive){
649    data(splitPtr.value) := mergedData
650  }
651
652
653  /**
654   * split data for store
655   * */
656  val splitData = genVSData(
657    data = data(splitPtr.value),
658    elemIdx = elemIdxInVd,
659    alignedType = alignedType
660  )
661  val flowData  = genVWdata(splitData, alignedType) // TODO: connect vstd, pass vector data
662  val wmask     = genVWmask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
663  val bmask     = genBasemask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
664  val dcacheReqVaddr = Mux(isMisalignReg, misalignVaddr, latchVaddr)
665  val dcacheReqVaddrDup = Mux(isMisalignReg, misalignVaddrDup, latchVaddrDup)
666  val dcacheReqPaddr = Mux(isMisalignReg, Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, PageOffsetWidth), misalignVaddr(PageOffsetWidth - 1, 0)), instMicroOp.paddr)
667  /**
668   * rdcache req, write request don't need to query dcache, because we write element to sbuffer
669   */
670  io.rdcache.req                    := DontCare
671  io.rdcache.req.valid              := state === s_cache_req && isVSegLoad
672  io.rdcache.req.bits.cmd           := MemoryOpConstants.M_XRD
673  io.rdcache.req.bits.vaddr         := dcacheReqVaddr
674  io.rdcache.req.bits.vaddr_dup     := dcacheReqVaddrDup
675  io.rdcache.req.bits.mask          := mask
676  io.rdcache.req.bits.data          := flowData
677  io.rdcache.pf_source              := LOAD_SOURCE.U
678  io.rdcache.req.bits.id            := DontCare
679  io.rdcache.resp.ready             := true.B
680  io.rdcache.s1_paddr_dup_lsu       := dcacheReqPaddr
681  io.rdcache.s1_paddr_dup_dcache    := dcacheReqPaddr
682  io.rdcache.s1_kill                := false.B
683  io.rdcache.s1_kill_data_read      := false.B
684  io.rdcache.s2_kill                := false.B
685  if (env.FPGAPlatform){
686    io.rdcache.s0_pc                := DontCare
687    io.rdcache.s1_pc                := DontCare
688    io.rdcache.s2_pc                := DontCare
689  }else{
690    io.rdcache.s0_pc                := instMicroOp.uop.pc
691    io.rdcache.s1_pc                := instMicroOp.uop.pc
692    io.rdcache.s2_pc                := instMicroOp.uop.pc
693  }
694  io.rdcache.replacementUpdated     := false.B
695  io.rdcache.is128Req               := notCross16ByteReg
696
697
698  /**
699   * write data to sbuffer
700   * */
701  val sbufferAddrLow4bit = latchVaddr(3, 0)
702
703  val notCross16BytePaddr          = Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, 4), 0.U(4.W))
704  val notCross16ByteData           = flowData << (sbufferAddrLow4bit << 3)
705
706  val Cross16ByteMask = Wire(UInt(32.W))
707  val Cross16ByteData = Wire(UInt(256.W))
708  Cross16ByteMask := bmask << sbufferAddrLow4bit
709  Cross16ByteData := flowData << (sbufferAddrLow4bit << 3)
710
711  val vaddrLow  = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W))
712  val vaddrHigh = Cat(latchVaddr(latchVaddr.getWidth - 1, 3), 0.U(3.W)) + 8.U
713
714
715  val paddrLow  = Cat(lowPagePaddr(lowPagePaddr.getWidth - 1, 3), 0.U(3.W))
716  val paddrHigh = Cat(instMicroOp.paddr(instMicroOp.paddr.getWidth - 1, 3), 0.U(3.W))
717
718  val maskLow   = Cross16ByteMask(15, 0)
719  val maskHigh  = Cross16ByteMask(31, 16)
720
721  val dataLow   = Cross16ByteData(127, 0)
722  val dataHigh  = Cross16ByteData(255, 128)
723
724  val sbuffermisalignMask          = Mux(notCross16ByteReg, wmask, Mux(isFirstSplit, maskLow, maskHigh))
725  val sbuffermisalignData          = Mux(notCross16ByteReg, notCross16ByteData, Mux(isFirstSplit, dataLow, dataHigh))
726  val sbuffermisalignPaddr         = Mux(notCross16ByteReg, notCross16BytePaddr, Mux(isFirstSplit, paddrLow, paddrHigh))
727  val sbuffermisalignVaddr         = Mux(notCross16ByteReg, notCross16ByteVaddr, Mux(isFirstSplit, vaddrLow, vaddrHigh))
728
729  val sbufferMask                  = Mux(isMisalignReg, sbuffermisalignMask, wmask)
730  val sbufferData                  = Mux(isMisalignReg, sbuffermisalignData, flowData)
731  val sbufferVaddr                 = Mux(isMisalignReg, sbuffermisalignVaddr, latchVaddr)
732  val sbufferPaddr                 = Mux(isMisalignReg, sbuffermisalignPaddr, instMicroOp.paddr)
733
734  dontTouch(wmask)
735  dontTouch(Cross16ByteMask)
736  sbufferOut.bits                  := DontCare
737  sbufferOut.valid                 := state === s_send_data && segmentActive
738  sbufferOut.bits.vecValid         := state === s_send_data && segmentActive
739  sbufferOut.bits.mask             := sbufferMask
740  sbufferOut.bits.data             := sbufferData
741  sbufferOut.bits.vaddr            := sbufferVaddr
742  sbufferOut.bits.cmd              := MemoryOpConstants.M_XWR
743  sbufferOut.bits.id               := DontCare
744  sbufferOut.bits.addr             := sbufferPaddr
745
746  NewPipelineConnect(
747    sbufferOut, io.sbuffer, io.sbuffer.fire,
748    false.B,
749    Option(s"VSegmentUnitPipelineConnect")
750  )
751
752  io.vecDifftestInfo.valid         := io.sbuffer.valid
753  io.vecDifftestInfo.bits          := uopq(deqPtr.value).uop
754
755  /**
756   * update ptr
757   * */
758  private val fieldActiveWirteFinish = sbufferOut.fire && segmentActive // writedata finish and is a active segment
759  XSError(sbufferOut.fire && !segmentActive, "Attempt write inactive segment to sbuffer, something wrong!\n")
760
761  private val segmentInactiveFinish = ((state === s_latch_and_merge_data) || (state === s_send_data && stateNext =/= s_send_data)) && !segmentActive
762
763  val splitPtrOffset = Mux(
764    isIndexed(instType),
765    Mux(lmul.asSInt < 0.S, 1.U, (1.U << lmul).asUInt),
766    Mux(emul.asSInt < 0.S, 1.U, (1.U << emul).asUInt)
767  )
768  splitPtrNext :=
769    Mux(fieldIdx === maxNfields || !segmentActive, // if segment is active, need to complete this segment, otherwise jump to next segment
770      // segment finish, By shifting 'issueUopFlowNumLog2' to the right to ensure that emul != 1 can correctly generate lateral offset.
771     (deqPtr + ((segmentIdx +& 1.U) >> issueUopFlowNumLog2).asUInt),
772      // next field.
773     (splitPtr + splitPtrOffset)
774    )
775
776  if (backendParams.debugEn){
777    dontTouch(issueUopFlowNumLog2)
778    dontTouch(issueEmul)
779    dontTouch(splitPtrNext)
780    dontTouch(stridePtr)
781    dontTouch(segmentActive)
782  }
783
784  // update splitPtr
785  when(state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && (fieldActiveWirteFinish || !segmentActive))){
786    splitPtr := splitPtrNext
787  }.elsewhen(io.in.fire && !instMicroOpValid){
788    splitPtr := deqPtr // initial splitPtr
789  }
790
791
792  val fieldIdxWire      = WireInit(fieldIdx)
793  val segmentIdxWire    = WireInit(segmentIdx)
794  val nextBaseVaddrWire = (baseVaddr + (fieldIdxWire << alignedType).asUInt)
795
796  nextBaseVaddr  := RegEnable(nextBaseVaddrWire, 0.U, stateNext === s_tlb_req)
797
798  // update stridePtr, only use in index
799  val strideOffset     = Mux(isIndexed(issueInstType), segmentIdx >> issueMaxIdxInIndexLog2, 0.U)
800  val strideOffsetWire = Mux(isIndexed(issueInstType), segmentIdxWire >> issueMaxIdxInIndexLog2, 0.U)
801  stridePtr       := deqPtr + strideOffset
802  stridePtrReg    := deqPtr + strideOffsetWire
803
804  // update fieldIdx
805  when(io.in.fire && !instMicroOpValid){ // init
806    fieldIdxWire := 0.U
807    fieldIdx := fieldIdxWire
808  }.elsewhen(state === s_latch_and_merge_data && segmentActive ||
809            (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish)){ // only if segment is active
810
811    /* next segment, only if segment complete */
812    fieldIdxWire := Mux(fieldIdx === maxNfields, 0.U, fieldIdx + 1.U)
813    fieldIdx := fieldIdxWire
814  }.elsewhen(segmentInactiveFinish){ // segment is inactive, go to next segment
815    fieldIdxWire := 0.U
816    fieldIdx := fieldIdxWire
817  }
818
819
820  //update segmentIdx
821  when(io.in.fire && !instMicroOpValid){
822    segmentIdxWire := 0.U
823    segmentIdx := segmentIdxWire
824  }.elsewhen(fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish)) &&
825             segmentIdx =/= maxSegIdx){ // next segment, only if segment is active
826
827    segmentIdxWire := segmentIdx + 1.U
828    segmentIdx := segmentIdxWire
829  }.elsewhen(segmentInactiveFinish && segmentIdx =/= maxSegIdx){ // if segment is inactive, go to next segment
830    segmentIdxWire := segmentIdx + 1.U
831    segmentIdx := segmentIdxWire
832  }
833
834
835  //update segmentOffset
836  /* when segment is active or segment is inactive, increase segmentOffset */
837  when((fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && stateNext =/= s_send_data && fieldActiveWirteFinish))) ||
838       segmentInactiveFinish){
839
840    segmentOffset := segmentOffset + Mux(isUnitStride(issueInstType), (maxNfields +& 1.U) << issueEew(1, 0), stride(stridePtr.value))
841  }
842
843
844  //update deqPtr
845  when((state === s_finish) && !isEmpty(enqPtr, deqPtr)){
846    deqPtr := deqPtr + 1.U
847  }
848
849
850  /*************************************************************************
851   *                            fof logic
852   *************************************************************************/
853
854  //Enq
855  when(isEnqFixVlUop && !fofBufferValid) { fofBuffer := io.in.bits.uop }
856  when(isEnqFixVlUop && !fofBufferValid) { fofBufferValid := true.B }
857
858  //Deq
859  val fofFixVlValid                    = state === s_fof_fix_vl && fofBufferValid
860
861  when(fofFixVlValid) { fofBuffer      := 0.U.asTypeOf(new DynInst) }
862  when(fofFixVlValid) { fofBufferValid := false.B }
863
864
865  /*************************************************************************
866   *                            dequeue logic
867   *************************************************************************/
868  val vdIdxInField = GenUopIdxInField(Mux(isIndexed(instType), issueLmul, issueEmul), uopq(deqPtr.value).uop.vpu.vuopIdx)
869  /*select mask of vd, maybe remove in feature*/
870  val realEw        = Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0))
871  val maskDataVec: Vec[UInt] = VecDataToMaskDataVec(instMicroOp.mask, realEw)
872  val maskUsed      = maskDataVec(vdIdxInField)
873
874  when(stateNext === s_idle){
875    instMicroOpValid := false.B
876  }
877  // writeback to backend
878  val writebackOut                     = WireInit(io.uopwriteback.bits)
879  val writebackValid                   = (state === s_finish) && !isEmpty(enqPtr, deqPtr) || fofFixVlValid
880
881  when(fofFixVlValid) {
882    writebackOut.uop                    := fofBuffer
883    writebackOut.uop.vpu.vl             := instMicroOp.exceptionVl.bits
884    writebackOut.data                   := instMicroOp.exceptionVl.bits
885    writebackOut.mask.get               := Fill(VLEN, 1.U)
886    writebackOut.uop.vpu.vmask          := Fill(VLEN, 1.U)
887  }.otherwise{
888    writebackOut.uop                    := uopq(deqPtr.value).uop
889    writebackOut.uop.vpu                := instMicroOp.uop.vpu
890    writebackOut.uop.trigger            := instMicroOp.uop.trigger
891    writebackOut.uop.exceptionVec       := instMicroOp.uop.exceptionVec
892    writebackOut.mask.get               := instMicroOp.mask
893    writebackOut.data                   := data(deqPtr.value)
894    writebackOut.vdIdx.get              := vdIdxInField
895    writebackOut.uop.vpu.vl             := Mux(instMicroOp.exceptionVl.valid, instMicroOp.exceptionVl.bits, instMicroOp.vl)
896    writebackOut.uop.vpu.vstart         := Mux(instMicroOp.uop.exceptionVec.asUInt.orR || TriggerAction.isDmode(instMicroOp.uop.trigger), instMicroOp.exceptionVstart, instMicroOp.vstart)
897    writebackOut.uop.vpu.vmask          := maskUsed
898    writebackOut.uop.vpu.vuopIdx        := uopq(deqPtr.value).uop.vpu.vuopIdx
899    // when exception updates vl, should use vtu strategy.
900    writebackOut.uop.vpu.vta            := Mux(instMicroOp.exceptionVl.valid, VType.tu, instMicroOp.uop.vpu.vta)
901    writebackOut.debug                  := DontCare
902    writebackOut.vdIdxInField.get       := vdIdxInField
903    writebackOut.uop.robIdx             := instMicroOp.uop.robIdx
904    writebackOut.uop.fuOpType           := instMicroOp.uop.fuOpType
905  }
906
907  io.uopwriteback.valid               := RegNext(writebackValid)
908  io.uopwriteback.bits                := RegEnable(writebackOut, writebackValid)
909
910  dontTouch(writebackValid)
911
912  //to RS
913  val feedbackOut                      = WireInit(0.U.asTypeOf(io.feedback.bits))
914  val feedbackValid                    = state === s_finish && !isEmpty(enqPtr, deqPtr)
915  feedbackOut.hit                     := true.B
916  feedbackOut.robIdx                  := instMicroOp.uop.robIdx
917  feedbackOut.sourceType              := DontCare
918  feedbackOut.flushState              := DontCare
919  feedbackOut.dataInvalidSqIdx        := DontCare
920  feedbackOut.sqIdx                   := uopq(deqPtr.value).uop.sqIdx
921  feedbackOut.lqIdx                   := uopq(deqPtr.value).uop.lqIdx
922
923  io.feedback.valid                   := RegNext(feedbackValid)
924  io.feedback.bits                    := RegEnable(feedbackOut, feedbackValid)
925
926  dontTouch(feedbackValid)
927
928  // exception
929  io.exceptionInfo                    := DontCare
930  io.exceptionInfo.bits.robidx        := instMicroOp.uop.robIdx
931  io.exceptionInfo.bits.uopidx        := uopq(deqPtr.value).uop.vpu.vuopIdx
932  io.exceptionInfo.bits.vstart        := instMicroOp.exceptionVstart
933  io.exceptionInfo.bits.vaddr         := instMicroOp.exceptionVaddr
934  io.exceptionInfo.bits.gpaddr        := instMicroOp.exceptionGpaddr
935  io.exceptionInfo.bits.isForVSnonLeafPTE := instMicroOp.exceptionIsForVSnonLeafPTE
936  io.exceptionInfo.bits.vl            := instMicroOp.exceptionVl.bits
937  io.exceptionInfo.valid              := (state === s_finish) && instMicroOp.uop.exceptionVec.asUInt.orR && !isEmpty(enqPtr, deqPtr)
938}
939
940