xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala (revision 211d620b07edb797ba35b635d24fef4e7294bae2)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.{FuType, PMPRespBundle}
29import freechips.rocketchip.diplomacy.BufferParams
30import xiangshan.cache.mmu._
31import xiangshan.cache._
32import xiangshan.cache.wpu.ReplayCarry
33import xiangshan.backend.fu.util.SdtrigExt
34import xiangshan.ExceptionNO._
35import xiangshan.backend.fu.vector.Bundles.{VConfig, VType}
36import xiangshan.backend.datapath.NewPipelineConnect
37import xiangshan.backend.fu.NewCSR._
38import xiangshan.backend.fu.vector.Utils.VecDataToMaskDataVec
39
40class VSegmentBundle(implicit p: Parameters) extends VLSUBundle
41{
42  val baseVaddr        = UInt(XLEN.W)
43  val uop              = new DynInst
44  val paddr            = UInt(PAddrBits.W)
45  val mask             = UInt(VLEN.W)
46  val alignedType      = UInt(alignTypeBits.W)
47  val vl               = UInt(elemIdxBits.W)
48  val uopFlowNum       = UInt(elemIdxBits.W)
49  val uopFlowNumMask   = UInt(elemIdxBits.W)
50  // for exception
51  val vstart           = UInt(elemIdxBits.W)
52  val exceptionVaddr   = UInt(XLEN.W)
53  val exceptionGpaddr  = UInt(XLEN.W)
54  val exceptionIsForVSnonLeafPTE = Bool()
55  val exception_va     = Bool()
56  val exception_gpa    = Bool()
57  val exception_pa     = Bool()
58  val exceptionVstart  = UInt(elemIdxBits.W)
59  // valid: have fof exception but can not trigger, need update all writebacked uop.vl with exceptionVl
60  val exceptionVl      = ValidIO(UInt(elemIdxBits.W))
61  val isFof            = Bool()
62}
63
64// latch each uop's VecWen, pdest, v0Wen, uopIdx
65class VSegmentUop(implicit p: Parameters) extends VLSUBundle{
66  val uop              = new DynInst
67}
68
69class VSegmentUnit (implicit p: Parameters) extends VLSUModule
70  with HasDCacheParameters
71  with MemoryOpConstants
72  with SdtrigExt
73  with HasLoadHelper
74{
75  val io               = IO(new VSegmentUnitIO)
76
77  val maxSize          = VSegmentBufferSize
78
79  class VSegUPtr(implicit p: Parameters) extends CircularQueuePtr[VSegUPtr](maxSize){
80  }
81
82  object VSegUPtr {
83    def apply(f: Bool, v: UInt)(implicit p: Parameters): VSegUPtr = {
84      val ptr           = Wire(new VSegUPtr)
85      ptr.flag         := f
86      ptr.value        := v
87      ptr
88    }
89  }
90
91
92  /**
93  ********************************************************************************************************
94  *  Use an example to illustrate the working logic of a segmentunit:                                    *
95  *    For:                                                                                              *
96  *      lmul=2 sew=32 emul=2 eew=32  vl=16                                                              *
97  *    Then:                                                                                             *
98  *      Access memory in the order:                                                                     *
99  *        (V2,S0),(V4,S0),(V6,S0),(V8,S0),                                                              *
100  *        (V2,S1),(V4,S1),(V6,S1),(V8,S1),                                                              *
101  *        (V2,S2),(V4,S2),(V6,S2),(V8,S2),                                                              *
102  *        (V2,S3),(V4,S3),(V6,S3),(V8,S3),                                                              *
103  *        (V3,S4),(V5,S4),(V7,S4),(V9,S4),                                                              *
104  *        (V3,S5),(V5,S5),(V7,S5),(V9,S5),                                                              *
105  *        (V3,S6),(V5,S6),(V7,S6),(V9,S6),                                                              *
106  *        (V3,S7),(V5,S7),(V7,S7),(V9,S7),                                                              *
107  *                                                                                                      *
108  *                                                                                                      *
109  *    [[data]] saves the data generated by the access and corresponds to the register.                  *
110  *    [[splitPtr]] controls the destination register written to.                                        *
111  *                                                                                                      *
112  *    splitptr offset can be seen in [[splitPtrNext]] is assignment logic,                              *
113  *    which is mainly calculated in terms of [[fieldIdx]] and [[segmentIdx]]                            *
114  *    First access different fields of the same segment, and then visit different segments.             *
115  *    For the case of 'emul' greater than 1, such as the following example,                             *
116  *    although 'v2' and 'v3' are different vd and the same field, they are still different segments,    *
117  *    so they should be accessed sequentially.Just like the 'Access memory in the order' above.         *
118  *                                                                                                      *
119  *                         [[segmentIdx]]                                                               *
120  *                               |                                                                      *
121  *                               |                                                                      *
122  *                               V                                                                      *
123  *                                                                                                      *
124  *                               S0               S1                S2                 S3               *
125  *                      ----------------------------------------------------------------------------    *
126  *  [[splitPtr]]--> v2  |     field0     |      field0     |      field0     |      field0         |    *
127  *                      ----------------------------------------------------------------------------    *
128  *                               S4               S5                S6                 S7               *
129  *                      ----------------------------------------------------------------------------    *
130  *                  v3  |     field0     |      field0     |      field0     |      field0         |    *
131  *                      ----------------------------------------------------------------------------    *
132  *                               S0               S1                S2                 S3               *
133  *                      ----------------------------------------------------------------------------    *
134  *                  v4  |     field1     |      field1     |      field1     |      field1         |    *
135  *                      ----------------------------------------------------------------------------    *
136  *                               S4               S5                S6                 S7               *
137  *                      ----------------------------------------------------------------------------    *
138  *                  v5  |     field1     |      field1     |      field1     |      field1         |    *
139  *                      ----------------------------------------------------------------------------    *
140  *                               S0               S1                S2                 S3               *
141  *                      ----------------------------------------------------------------------------    *
142  *                  v6  |     field2     |      field2     |      field2     |      field2         |    *
143  *                      ----------------------------------------------------------------------------    *
144  *                               S4               S5                S6                 S7               *
145  *                      ----------------------------------------------------------------------------    *
146  *                  v7  |     field2     |      field2     |      field2     |      field2         |    *
147  *                      ----------------------------------------------------------------------------    *
148  *                               S0               S1                S2                 S3               *
149  *                      ----------------------------------------------------------------------------    *
150  *                  v8  |     field3     |      field3     |      field3     |      field3         |    *
151  *                      ----------------------------------------------------------------------------    *
152  *                               S4               S5                S6                 S7               *
153  *                      ----------------------------------------------------------------------------    *
154  *                  v9  |     field3     |      field3     |      field3     |      field3         |    *
155  *                      ----------------------------------------------------------------------------    *                                                                                    *
156  *                                                                                                      *                                                                                    *
157  *                                                                                                      *                                                                                    *
158  ********************************************************************************************************
159  **/
160
161
162  // buffer uop
163  val instMicroOp       = Reg(new VSegmentBundle)
164  val instMicroOpValid  = RegInit(false.B)
165  val data              = Reg(Vec(maxSize, UInt(VLEN.W)))
166  val uopq              = Reg(Vec(maxSize, new VSegmentUop))
167  val stride            = Reg(Vec(maxSize, UInt(VLEN.W)))
168  val allocated         = RegInit(VecInit(Seq.fill(maxSize)(false.B)))
169  val enqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
170  val deqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
171  val stridePtr         = WireInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index
172
173  val segmentIdx        = RegInit(0.U(elemIdxBits.W))
174  val fieldIdx          = RegInit(0.U(fieldBits.W))
175  val segmentOffset     = RegInit(0.U(XLEN.W))
176  val splitPtr          = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select load/store data
177  val splitPtrNext      = WireInit(0.U.asTypeOf(new VSegUPtr))
178
179  val exception_va      = WireInit(false.B)
180  val exception_gpa     = WireInit(false.B)
181  val exception_pa      = WireInit(false.B)
182
183  val maxSegIdx         = instMicroOp.vl - 1.U
184  val maxNfields        = instMicroOp.uop.vpu.nf
185  val latchVaddr        = RegInit(0.U(VAddrBits.W))
186
187  XSError((segmentIdx > maxSegIdx) && instMicroOpValid, s"segmentIdx > vl, something error!\n")
188  XSError((fieldIdx > maxNfields) &&  instMicroOpValid, s"fieldIdx > nfields, something error!\n")
189
190  // MicroOp
191  val baseVaddr                       = instMicroOp.baseVaddr
192  val alignedType                     = instMicroOp.alignedType
193  val fuType                          = instMicroOp.uop.fuType
194  val mask                            = instMicroOp.mask
195  val exceptionVec                    = instMicroOp.uop.exceptionVec
196  val issueEew                        = instMicroOp.uop.vpu.veew
197  val issueLmul                       = instMicroOp.uop.vpu.vtype.vlmul
198  val issueSew                        = instMicroOp.uop.vpu.vtype.vsew
199  val issueEmul                       = EewLog2(issueEew) - issueSew + issueLmul
200  val elemIdxInVd                     = segmentIdx & instMicroOp.uopFlowNumMask
201  val issueInstType                   = Cat(true.B, instMicroOp.uop.fuOpType(6, 5)) // always segment instruction
202  val issueUopFlowNumLog2             = GenRealFlowLog2(issueInstType, issueEmul, issueLmul, issueEew, issueSew, true) // max element number log2 in vd
203  val issueVlMax                      = instMicroOp.uopFlowNum // max elementIdx in vd
204  val issueMaxIdxInIndex              = GenVLMAX(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0)) // index element index in index register
205  val issueMaxIdxInIndexMask          = GenVlMaxMask(issueMaxIdxInIndex, elemIdxBits)
206  val issueMaxIdxInIndexLog2          = GenVLMAXLog2(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0))
207  val issueIndexIdx                   = segmentIdx & issueMaxIdxInIndexMask
208  val segmentActive                   = (mask & UIntToOH(segmentIdx)).orR
209
210  // sbuffer write interface
211  val sbufferOut                      = Wire(Decoupled(new DCacheWordReqWithVaddrAndPfFlag))
212
213
214  // segment fof instrction buffer
215  val fofBuffer                       = RegInit(0.U.asTypeOf(new DynInst))
216  val fofBufferValid                  = RegInit(false.B)
217
218
219  // Segment instruction's FSM
220  /*
221  * s_idle: wait request
222  * s_flush_sbuffer_req: flush sbuffer
223  * s_wait_flush_sbuffer_resp: wait sbuffer empty
224  * s_tlb_req: request tlb
225  * s_wait_tlb_resp: wait tlb resp
226  * s_pm: check pmp
227  * s_cache_req: request cache
228  * s_cache_resp: wait cache resp
229  * s_latch_and_merge_data: for read data
230  * s_send_data: for send write data
231  * s_wait_to_sbuffer: Wait for data from the sbufferOut pipelayer to be sent to the sbuffer
232  * s_finish:
233  * s_fof_fix_vl: Writeback the uop of the fof instruction to modify vl.
234  * */
235  val s_idle :: s_flush_sbuffer_req :: s_wait_flush_sbuffer_resp :: s_tlb_req :: s_wait_tlb_resp :: s_pm ::s_cache_req :: s_cache_resp :: s_latch_and_merge_data :: s_send_data :: s_wait_to_sbuffer :: s_finish :: s_fof_fix_vl :: Nil = Enum(13)
236  val state             = RegInit(s_idle)
237  val stateNext         = WireInit(s_idle)
238  val sbufferEmpty      = io.flush_sbuffer.empty
239  val isVSegLoad        = FuType.isVSegLoad(instMicroOp.uop.fuType)
240  val isEnqfof          = io.in.bits.uop.fuOpType === VlduType.vleff && io.in.valid
241  val isEnqFixVlUop     = isEnqfof && io.in.bits.uop.vpu.lastUop
242
243  /**
244   * state update
245   */
246  state  := stateNext
247
248  /**
249   * state transfer
250   */
251  when(state === s_idle){
252    stateNext := Mux(isAfter(enqPtr, deqPtr), s_flush_sbuffer_req, s_idle)
253  }.elsewhen(state === s_flush_sbuffer_req){
254    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) // if sbuffer is empty, go to query tlb
255
256  }.elsewhen(state === s_wait_flush_sbuffer_resp){
257    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp)
258
259  }.elsewhen(state === s_tlb_req){
260    stateNext := Mux(segmentActive, s_wait_tlb_resp, Mux(isVSegLoad, s_latch_and_merge_data, s_send_data))
261
262  }.elsewhen(state === s_wait_tlb_resp){
263    stateNext := Mux(io.dtlb.resp.fire,
264                      Mux(!io.dtlb.resp.bits.miss,
265                          s_pm,
266                          s_tlb_req),
267                      s_wait_tlb_resp)
268
269  }.elsewhen(state === s_pm){
270    /* if is vStore, send data to sbuffer, so don't need query dcache */
271    stateNext := Mux(exception_pa || exception_va || exception_gpa,
272                     s_finish,
273                     Mux(isVSegLoad, s_cache_req, s_send_data))
274
275  }.elsewhen(state === s_cache_req){
276    stateNext := Mux(io.rdcache.req.fire, s_cache_resp, s_cache_req)
277
278  }.elsewhen(state === s_cache_resp){
279    when(io.rdcache.resp.fire) {
280      when(io.rdcache.resp.bits.miss || io.rdcache.s2_bank_conflict) {
281        stateNext := s_cache_req
282      }.otherwise {
283        stateNext := Mux(isVSegLoad, s_latch_and_merge_data, s_send_data)
284      }
285    }.otherwise{
286      stateNext := s_cache_resp
287    }
288    /* if segment is inactive, don't need to wait access all of the field */
289  }.elsewhen(state === s_latch_and_merge_data) {
290    when((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields) ||
291      ((segmentIdx === maxSegIdx) && !segmentActive)) {
292
293      stateNext := s_finish // segment instruction finish
294    }.otherwise {
295      stateNext := s_tlb_req // need continue
296    }
297    /* if segment is inactive, don't need to wait access all of the field */
298  }.elsewhen(state === s_send_data) { // when sbuffer accept data
299    when(!sbufferOut.fire && segmentActive) {
300      stateNext := s_send_data
301    }.elsewhen(segmentIdx === maxSegIdx && (fieldIdx === maxNfields && sbufferOut.fire || !segmentActive && io.sbuffer.valid && !io.sbuffer.ready)) {
302      stateNext := s_wait_to_sbuffer
303    }.elsewhen(segmentIdx === maxSegIdx && !segmentActive){
304      stateNext := s_finish // segment instruction finish
305    }.otherwise {
306      stateNext := s_tlb_req // need continue
307    }
308
309  }.elsewhen(state === s_wait_to_sbuffer){
310    stateNext := Mux(io.sbuffer.fire, s_finish, s_wait_to_sbuffer)
311
312  }.elsewhen(state === s_finish){ // writeback uop
313    stateNext := Mux(
314      distanceBetween(enqPtr, deqPtr) === 0.U,
315      Mux(fofBufferValid, s_fof_fix_vl, s_idle),
316      s_finish
317    )
318  }.elsewhen(state === s_fof_fix_vl){ // writeback uop
319    stateNext := Mux(!fofBufferValid, s_idle, s_fof_fix_vl)
320
321  }.otherwise{
322    stateNext := s_idle
323    XSError(true.B, s"Unknown state!\n")
324  }
325
326  /*************************************************************************
327   *                            enqueue logic
328   *************************************************************************/
329  io.in.ready                         := true.B
330  val fuOpType                         = io.in.bits.uop.fuOpType
331  val vtype                            = io.in.bits.uop.vpu.vtype
332  val mop                              = fuOpType(6, 5)
333  val instType                         = Cat(true.B, mop)
334  val eew                              = io.in.bits.uop.vpu.veew
335  val sew                              = vtype.vsew
336  val lmul                             = vtype.vlmul
337  val emul                             = EewLog2(eew) - sew + lmul
338  val vl                               = instMicroOp.vl
339  val vm                               = instMicroOp.uop.vpu.vm
340  val vstart                           = instMicroOp.uop.vpu.vstart
341  val srcMask                          = GenFlowMask(Mux(vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vstart, vl, true)
342  // first uop enqueue, we need to latch microOp of segment instruction
343  when(io.in.fire && !instMicroOpValid && !isEnqFixVlUop){
344    // element number in a vd
345    // TODO Rewrite it in a more elegant way.
346    val uopFlowNum                    = ZeroExt(GenRealFlowNum(instType, emul, lmul, eew, sew, true), elemIdxBits)
347    instMicroOp.baseVaddr             := io.in.bits.src_rs1
348    instMicroOpValid                  := true.B // if is first uop
349    instMicroOp.alignedType           := Mux(isIndexed(instType), sew(1, 0), eew)
350    instMicroOp.uop                   := io.in.bits.uop
351    instMicroOp.mask                  := srcMask
352    instMicroOp.vstart                := 0.U
353    instMicroOp.uopFlowNum            := uopFlowNum
354    instMicroOp.uopFlowNumMask        := GenVlMaxMask(uopFlowNum, elemIdxBits) // for merge data
355    instMicroOp.vl                    := io.in.bits.src_vl.asTypeOf(VConfig()).vl
356    instMicroOp.exceptionVl.valid     := false.B
357    instMicroOp.exceptionVl.bits      := io.in.bits.src_vl.asTypeOf(VConfig()).vl
358    segmentOffset                     := 0.U
359    instMicroOp.isFof                 := (fuOpType === VlduType.vleff) && FuType.isVLoad(fuType)
360  }
361  // latch data
362  when(io.in.fire && !isEnqFixVlUop){
363    data(enqPtr.value)                := io.in.bits.src_vs3
364    stride(enqPtr.value)              := io.in.bits.src_stride
365    uopq(enqPtr.value).uop            := io.in.bits.uop
366  }
367
368  // update enqptr, only 1 port
369  when(io.in.fire && !isEnqFixVlUop){
370    enqPtr                            := enqPtr + 1.U
371  }
372
373  /*************************************************************************
374   *                            output logic
375   *************************************************************************/
376
377  val indexStride                     = IndexAddr( // index for indexed instruction
378                                                    index = stride(stridePtr.value),
379                                                    flow_inner_idx = issueIndexIdx,
380                                                    eew = issueEew
381                                                  )
382  val realSegmentOffset               = Mux(isIndexed(issueInstType),
383                                            indexStride,
384                                            segmentOffset)
385  val vaddr                           = baseVaddr + (fieldIdx << alignedType).asUInt + realSegmentOffset
386
387  //latch vaddr
388  when(state === s_tlb_req){
389    latchVaddr := vaddr(VAddrBits - 1, 0)
390  }
391  /**
392   * tlb req and tlb resq
393   */
394
395  // query DTLB IO Assign
396  io.dtlb.req                         := DontCare
397  io.dtlb.resp.ready                  := true.B
398  io.dtlb.req.valid                   := state === s_tlb_req && segmentActive
399  io.dtlb.req.bits.cmd                := Mux(FuType.isVLoad(fuType), TlbCmd.read, TlbCmd.write)
400  io.dtlb.req.bits.vaddr              := vaddr(VAddrBits - 1, 0)
401  io.dtlb.req.bits.fullva             := vaddr
402  io.dtlb.req.bits.checkfullva        := true.B
403  io.dtlb.req.bits.size               := instMicroOp.alignedType(2,0)
404  io.dtlb.req.bits.memidx.is_ld       := FuType.isVLoad(fuType)
405  io.dtlb.req.bits.memidx.is_st       := FuType.isVStore(fuType)
406  io.dtlb.req.bits.debug.robIdx       := instMicroOp.uop.robIdx
407  io.dtlb.req.bits.no_translate       := false.B
408  io.dtlb.req.bits.debug.pc           := instMicroOp.uop.pc
409  io.dtlb.req.bits.debug.isFirstIssue := DontCare
410  io.dtlb.req_kill                    := false.B
411
412  val canTriggerException              = segmentIdx === 0.U || !instMicroOp.isFof // only elementIdx = 0 or is not fof can trigger
413
414  val segmentTrigger = Module(new VSegmentTrigger)
415  segmentTrigger.io.fromCsrTrigger.tdataVec             := io.fromCsrTrigger.tdataVec
416  segmentTrigger.io.fromCsrTrigger.tEnableVec           := io.fromCsrTrigger.tEnableVec
417  segmentTrigger.io.fromCsrTrigger.triggerCanRaiseBpExp := io.fromCsrTrigger.triggerCanRaiseBpExp
418  segmentTrigger.io.fromCsrTrigger.debugMode            := io.fromCsrTrigger.debugMode
419  segmentTrigger.io.memType                             := isVSegLoad
420  segmentTrigger.io.fromLoadStore.vaddr                 := latchVaddr
421  segmentTrigger.io.fromLoadStore.isVectorUnitStride    := false.B
422  segmentTrigger.io.fromLoadStore.mask                  := 0.U
423
424  val triggerAction = segmentTrigger.io.toLoadStore.triggerAction
425  val triggerDebugMode = TriggerAction.isDmode(triggerAction)
426  val triggerBreakpoint = TriggerAction.isExp(triggerAction)
427
428  // tlb resp
429  when(io.dtlb.resp.fire && state === s_wait_tlb_resp){
430      exceptionVec(storePageFault)      := io.dtlb.resp.bits.excp(0).pf.st
431      exceptionVec(loadPageFault)       := io.dtlb.resp.bits.excp(0).pf.ld
432      exceptionVec(storeGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.st
433      exceptionVec(loadGuestPageFault)  := io.dtlb.resp.bits.excp(0).gpf.ld
434      exceptionVec(storeAccessFault)    := io.dtlb.resp.bits.excp(0).af.st
435      exceptionVec(loadAccessFault)     := io.dtlb.resp.bits.excp(0).af.ld
436      when(!io.dtlb.resp.bits.miss){
437        instMicroOp.paddr             := io.dtlb.resp.bits.paddr(0)
438        instMicroOp.exceptionGpaddr   := io.dtlb.resp.bits.gpaddr(0)
439        instMicroOp.exceptionIsForVSnonLeafPTE  := io.dtlb.resp.bits.isForVSnonLeafPTE
440      }
441  }
442  // pmp
443  // NOTE: only handle load/store exception here, if other exception happens, don't send here
444  val exceptionWithPf = exceptionVec(storePageFault) || exceptionVec(loadPageFault) || exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
445  val pmp = (io.pmpResp.asUInt & Fill(io.pmpResp.asUInt.getWidth, !exceptionWithPf)).asTypeOf(new PMPRespBundle())
446  when(state === s_pm) {
447    val addr_aligned = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
448      "b00".U   -> true.B,                   //b
449      "b01".U   -> (vaddr(0)    === 0.U), //h
450      "b10".U   -> (vaddr(1, 0) === 0.U), //w
451      "b11".U   -> (vaddr(2, 0) === 0.U)  //d
452    ))
453    val missAligned = !addr_aligned
454    exceptionVec(loadAddrMisaligned)  := missAligned && FuType.isVSegLoad(fuType)  && canTriggerException
455    exceptionVec(storeAddrMisaligned) := missAligned && FuType.isVSegStore(fuType) && canTriggerException
456
457    exception_va  := exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
458                     exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault) ||
459                     triggerBreakpoint || triggerDebugMode || missAligned
460    exception_gpa := exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
461    exception_pa  := pmp.st || pmp.ld || pmp.mmio
462
463    instMicroOp.exception_pa  := exception_pa
464    instMicroOp.exception_va  := exception_va
465    instMicroOp.exception_gpa := exception_gpa
466    // update storeAccessFault bit. Currently, we don't support vector MMIO
467    exceptionVec(loadAccessFault)  := (exceptionVec(loadAccessFault) || pmp.ld || pmp.mmio)   && FuType.isVSegLoad(fuType)  && canTriggerException
468    exceptionVec(storeAccessFault) := (exceptionVec(storeAccessFault) || pmp.st || pmp.mmio)  && FuType.isVSegStore(fuType) && canTriggerException
469    exceptionVec(breakPoint)       := triggerBreakpoint && canTriggerException
470
471    exceptionVec(storePageFault)      := exceptionVec(storePageFault)      && FuType.isVSegStore(fuType) && canTriggerException
472    exceptionVec(loadPageFault)       := exceptionVec(loadPageFault)       && FuType.isVSegLoad(fuType)  && canTriggerException
473    exceptionVec(storeGuestPageFault) := exceptionVec(storeGuestPageFault) && FuType.isVSegStore(fuType) && canTriggerException
474    exceptionVec(loadGuestPageFault)  := exceptionVec(loadGuestPageFault)  && FuType.isVSegLoad(fuType)  && canTriggerException
475
476    when(exception_va || exception_gpa || exception_pa) {
477      when(canTriggerException) {
478        instMicroOp.exceptionVaddr  := vaddr
479        instMicroOp.exceptionVstart := segmentIdx // for exception
480      }.otherwise {
481        instMicroOp.exceptionVl.valid := true.B
482        instMicroOp.exceptionVl.bits := segmentIdx
483      }
484    }
485
486    when(exceptionVec(breakPoint) || triggerDebugMode) {
487      instMicroOp.uop.trigger := triggerAction
488    }
489  }
490
491  /**
492   * flush sbuffer IO Assign
493   */
494  io.flush_sbuffer.valid           := !sbufferEmpty && (state === s_flush_sbuffer_req)
495
496
497  /**
498   * merge data for load
499   */
500  val cacheData = LookupTree(latchVaddr(3,0), List(
501    "b0000".U -> io.rdcache.resp.bits.data_delayed(63,    0),
502    "b0001".U -> io.rdcache.resp.bits.data_delayed(63,    8),
503    "b0010".U -> io.rdcache.resp.bits.data_delayed(63,   16),
504    "b0011".U -> io.rdcache.resp.bits.data_delayed(63,   24),
505    "b0100".U -> io.rdcache.resp.bits.data_delayed(63,   32),
506    "b0101".U -> io.rdcache.resp.bits.data_delayed(63,   40),
507    "b0110".U -> io.rdcache.resp.bits.data_delayed(63,   48),
508    "b0111".U -> io.rdcache.resp.bits.data_delayed(63,   56),
509    "b1000".U -> io.rdcache.resp.bits.data_delayed(127,  64),
510    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
511    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
512    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
513    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
514    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
515    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
516    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
517  ))
518  val pickData  = rdataVecHelper(alignedType(1,0), cacheData)
519  val mergedData = mergeDataWithElemIdx(
520    oldData = data(splitPtr.value),
521    newData = Seq(pickData),
522    alignedType = alignedType(1,0),
523    elemIdx = Seq(elemIdxInVd),
524    valids = Seq(true.B)
525  )
526  when(state === s_latch_and_merge_data && segmentActive){
527    data(splitPtr.value) := mergedData
528  }
529  /**
530   * split data for store
531   * */
532  val splitData = genVSData(
533    data = data(splitPtr.value),
534    elemIdx = elemIdxInVd,
535    alignedType = alignedType
536  )
537  val flowData  = genVWdata(splitData, alignedType) // TODO: connect vstd, pass vector data
538  val wmask     = genVWmask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
539
540  /**
541   * rdcache req, write request don't need to query dcache, because we write element to sbuffer
542   */
543  io.rdcache.req                    := DontCare
544  io.rdcache.req.valid              := state === s_cache_req && FuType.isVLoad(fuType)
545  io.rdcache.req.bits.cmd           := MemoryOpConstants.M_XRD
546  io.rdcache.req.bits.vaddr         := latchVaddr
547  io.rdcache.req.bits.mask          := mask
548  io.rdcache.req.bits.data          := flowData
549  io.rdcache.pf_source              := LOAD_SOURCE.U
550  io.rdcache.req.bits.id            := DontCare
551  io.rdcache.resp.ready             := true.B
552  io.rdcache.s1_paddr_dup_lsu       := instMicroOp.paddr
553  io.rdcache.s1_paddr_dup_dcache    := instMicroOp.paddr
554  io.rdcache.s1_kill                := false.B
555  io.rdcache.s1_kill_data_read      := false.B
556  io.rdcache.s2_kill                := false.B
557  if (env.FPGAPlatform){
558    io.rdcache.s0_pc                := DontCare
559    io.rdcache.s1_pc                := DontCare
560    io.rdcache.s2_pc                := DontCare
561  }else{
562    io.rdcache.s0_pc                := instMicroOp.uop.pc
563    io.rdcache.s1_pc                := instMicroOp.uop.pc
564    io.rdcache.s2_pc                := instMicroOp.uop.pc
565  }
566  io.rdcache.replacementUpdated     := false.B
567  io.rdcache.is128Req               := false.B
568
569
570  /**
571   * write data to sbuffer
572   * */
573  sbufferOut.bits                  := DontCare
574  sbufferOut.valid                 := state === s_send_data && segmentActive
575  sbufferOut.bits.vecValid         := state === s_send_data && segmentActive
576  sbufferOut.bits.mask             := wmask
577  sbufferOut.bits.data             := flowData
578  sbufferOut.bits.vaddr            := latchVaddr
579  sbufferOut.bits.cmd              := MemoryOpConstants.M_XWR
580  sbufferOut.bits.id               := DontCare
581  sbufferOut.bits.addr             := instMicroOp.paddr
582
583  NewPipelineConnect(
584    sbufferOut, io.sbuffer, io.sbuffer.fire,
585    false.B,
586    Option(s"VSegmentUnitPipelineConnect")
587  )
588
589  io.vecDifftestInfo.valid         := io.sbuffer.valid
590  io.vecDifftestInfo.bits          := uopq(deqPtr.value).uop
591
592  /**
593   * update ptr
594   * */
595  private val fieldActiveWirteFinish = sbufferOut.fire && segmentActive // writedata finish and is a active segment
596  XSError(sbufferOut.fire && !segmentActive, "Attempt write inactive segment to sbuffer, something wrong!\n")
597
598  private val segmentInactiveFinish = ((state === s_latch_and_merge_data) || (state === s_send_data)) && !segmentActive
599
600  val splitPtrOffset = Mux(
601    isIndexed(instType),
602    Mux(lmul.asSInt < 0.S, 1.U, (1.U << lmul).asUInt),
603    Mux(emul.asSInt < 0.S, 1.U, (1.U << emul).asUInt)
604  )
605  splitPtrNext :=
606    Mux(fieldIdx === maxNfields || !segmentActive, // if segment is active, need to complete this segment, otherwise jump to next segment
607      // segment finish, By shifting 'issueUopFlowNumLog2' to the right to ensure that emul != 1 can correctly generate lateral offset.
608     (deqPtr + ((segmentIdx +& 1.U) >> issueUopFlowNumLog2).asUInt),
609      // next field.
610     (splitPtr + splitPtrOffset)
611    )
612
613  if (backendParams.debugEn){
614    dontTouch(issueUopFlowNumLog2)
615    dontTouch(issueEmul)
616    dontTouch(splitPtrNext)
617    dontTouch(stridePtr)
618    dontTouch(segmentActive)
619  }
620
621  // update splitPtr
622  when(state === s_latch_and_merge_data || (state === s_send_data && (fieldActiveWirteFinish || !segmentActive))){
623    splitPtr := splitPtrNext
624  }.elsewhen(io.in.fire && !instMicroOpValid){
625    splitPtr := deqPtr // initial splitPtr
626  }
627
628  // update stridePtr, only use in index
629  val strideOffset = Mux(isIndexed(issueInstType), segmentIdx >> issueMaxIdxInIndexLog2, 0.U)
630  stridePtr       := deqPtr + strideOffset
631
632  // update fieldIdx
633  when(io.in.fire && !instMicroOpValid){ // init
634    fieldIdx := 0.U
635  }.elsewhen(state === s_latch_and_merge_data && segmentActive ||
636            (state === s_send_data && fieldActiveWirteFinish)){ // only if segment is active
637
638    /* next segment, only if segment complete */
639    fieldIdx := Mux(fieldIdx === maxNfields, 0.U, fieldIdx + 1.U)
640  }.elsewhen(segmentInactiveFinish){ // segment is inactive, go to next segment
641    fieldIdx := 0.U
642  }
643  //update segmentIdx
644  when(io.in.fire && !instMicroOpValid){
645    segmentIdx := 0.U
646  }.elsewhen(fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && fieldActiveWirteFinish)) &&
647             segmentIdx =/= maxSegIdx){ // next segment, only if segment is active
648
649    segmentIdx := segmentIdx + 1.U
650  }.elsewhen(segmentInactiveFinish && segmentIdx =/= maxSegIdx){ // if segment is inactive, go to next segment
651    segmentIdx := segmentIdx + 1.U
652  }
653
654  //update segmentOffset
655  /* when segment is active or segment is inactive, increase segmentOffset */
656  when((fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && fieldActiveWirteFinish))) ||
657       segmentInactiveFinish){
658
659    segmentOffset := segmentOffset + Mux(isUnitStride(issueInstType), (maxNfields +& 1.U) << issueEew(1, 0), stride(stridePtr.value))
660  }
661
662  //update deqPtr
663  when((state === s_finish) && !isEmpty(enqPtr, deqPtr)){
664    deqPtr := deqPtr + 1.U
665  }
666
667
668  /*************************************************************************
669   *                            fof logic
670   *************************************************************************/
671
672  //Enq
673  when(isEnqFixVlUop && !fofBufferValid) { fofBuffer := io.in.bits.uop }
674  when(isEnqFixVlUop && !fofBufferValid) { fofBufferValid := true.B }
675
676  //Deq
677  val fofFixVlValid                    = state === s_fof_fix_vl && fofBufferValid
678
679  when(fofFixVlValid) { fofBuffer      := 0.U.asTypeOf(new DynInst) }
680  when(fofFixVlValid) { fofBufferValid := false.B }
681
682
683  /*************************************************************************
684   *                            dequeue logic
685   *************************************************************************/
686  val vdIdxInField = GenUopIdxInField(Mux(isIndexed(instType), issueLmul, issueEmul), uopq(deqPtr.value).uop.vpu.vuopIdx)
687  /*select mask of vd, maybe remove in feature*/
688  val realEw        = Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0))
689  val maskDataVec: Vec[UInt] = VecDataToMaskDataVec(instMicroOp.mask, realEw)
690  val maskUsed      = maskDataVec(vdIdxInField)
691
692  when(stateNext === s_idle){
693    instMicroOpValid := false.B
694  }
695  // writeback to backend
696  val writebackOut                     = WireInit(io.uopwriteback.bits)
697  val writebackValid                   = (state === s_finish) && !isEmpty(enqPtr, deqPtr) || fofFixVlValid
698
699  when(fofFixVlValid) {
700    writebackOut.uop                    := fofBuffer
701    writebackOut.uop.vpu.vl             := instMicroOp.exceptionVl.bits
702    writebackOut.data                   := instMicroOp.exceptionVl.bits
703    writebackOut.mask.get               := Fill(VLEN, 1.U)
704    writebackOut.uop.vpu.vmask          := Fill(VLEN, 1.U)
705  }.otherwise{
706    writebackOut.uop                    := uopq(deqPtr.value).uop
707    writebackOut.uop.vpu                := instMicroOp.uop.vpu
708    writebackOut.uop.trigger            := instMicroOp.uop.trigger
709    writebackOut.uop.exceptionVec       := instMicroOp.uop.exceptionVec
710    writebackOut.mask.get               := instMicroOp.mask
711    writebackOut.data                   := data(deqPtr.value)
712    writebackOut.vdIdx.get              := vdIdxInField
713    writebackOut.uop.vpu.vl             := Mux(instMicroOp.exceptionVl.valid, instMicroOp.exceptionVl.bits, instMicroOp.vl)
714    writebackOut.uop.vpu.vstart         := Mux(instMicroOp.uop.exceptionVec.asUInt.orR || TriggerAction.isDmode(instMicroOp.uop.trigger), instMicroOp.exceptionVstart, instMicroOp.vstart)
715    writebackOut.uop.vpu.vmask          := maskUsed
716    writebackOut.uop.vpu.vuopIdx        := uopq(deqPtr.value).uop.vpu.vuopIdx
717    // when exception updates vl, should use vtu strategy.
718    writebackOut.uop.vpu.vta            := Mux(instMicroOp.exceptionVl.valid, VType.tu, instMicroOp.uop.vpu.vta)
719    writebackOut.debug                  := DontCare
720    writebackOut.vdIdxInField.get       := vdIdxInField
721    writebackOut.uop.robIdx             := instMicroOp.uop.robIdx
722    writebackOut.uop.fuOpType           := instMicroOp.uop.fuOpType
723  }
724
725  io.uopwriteback.valid               := RegNext(writebackValid)
726  io.uopwriteback.bits                := RegEnable(writebackOut, writebackValid)
727
728  dontTouch(writebackValid)
729
730  //to RS
731  val feedbackOut                      = WireInit(0.U.asTypeOf(io.feedback.bits))
732  val feedbackValid                    = state === s_finish && !isEmpty(enqPtr, deqPtr)
733  feedbackOut.hit                     := true.B
734  feedbackOut.robIdx                  := instMicroOp.uop.robIdx
735  feedbackOut.sourceType              := DontCare
736  feedbackOut.flushState              := DontCare
737  feedbackOut.dataInvalidSqIdx        := DontCare
738  feedbackOut.sqIdx                   := uopq(deqPtr.value).uop.sqIdx
739  feedbackOut.lqIdx                   := uopq(deqPtr.value).uop.lqIdx
740
741  io.feedback.valid                   := RegNext(feedbackValid)
742  io.feedback.bits                    := RegEnable(feedbackOut, feedbackValid)
743
744  dontTouch(feedbackValid)
745
746  // exception
747  io.exceptionInfo                    := DontCare
748  io.exceptionInfo.bits.robidx        := instMicroOp.uop.robIdx
749  io.exceptionInfo.bits.uopidx        := uopq(deqPtr.value).uop.vpu.vuopIdx
750  io.exceptionInfo.bits.vstart        := instMicroOp.exceptionVstart
751  io.exceptionInfo.bits.vaddr         := instMicroOp.exceptionVaddr
752  io.exceptionInfo.bits.gpaddr        := instMicroOp.exceptionGpaddr
753  io.exceptionInfo.bits.isForVSnonLeafPTE := instMicroOp.exceptionIsForVSnonLeafPTE
754  io.exceptionInfo.bits.vl            := instMicroOp.exceptionVl.bits
755  io.exceptionInfo.valid              := (state === s_finish) && instMicroOp.uop.exceptionVec.asUInt.orR && !isEmpty(enqPtr, deqPtr)
756}
757
758