xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VSegmentUnit.scala (revision 149e918c520847554be4cf7f6594881d6d3a32c8)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27import xiangshan.mem._
28import xiangshan.backend.fu.{FuType, PMPRespBundle}
29import freechips.rocketchip.diplomacy.BufferParams
30import xiangshan.cache.mmu._
31import xiangshan.cache._
32import xiangshan.cache.wpu.ReplayCarry
33import xiangshan.backend.fu.util.SdtrigExt
34import xiangshan.ExceptionNO._
35import xiangshan.backend.fu.vector.Bundles.{VConfig, VType}
36import xiangshan.backend.datapath.NewPipelineConnect
37import xiangshan.backend.fu.NewCSR._
38import xiangshan.backend.fu.vector.Utils.VecDataToMaskDataVec
39
40class VSegmentBundle(implicit p: Parameters) extends VLSUBundle
41{
42  val baseVaddr        = UInt(XLEN.W)
43  val uop              = new DynInst
44  val paddr            = UInt(PAddrBits.W)
45  val mask             = UInt(VLEN.W)
46  val alignedType      = UInt(alignTypeBits.W)
47  val vl               = UInt(elemIdxBits.W)
48  val uopFlowNum       = UInt(elemIdxBits.W)
49  val uopFlowNumMask   = UInt(elemIdxBits.W)
50  val isVSegLoad       = Bool()
51  val isVSegStore      = Bool()
52  // for exception
53  val vstart           = UInt(elemIdxBits.W)
54  val exceptionVaddr   = UInt(XLEN.W)
55  val exceptionGpaddr  = UInt(XLEN.W)
56  val exceptionIsForVSnonLeafPTE = Bool()
57  val exception_va     = Bool()
58  val exception_gpa    = Bool()
59  val exception_pa     = Bool()
60  val exceptionVstart  = UInt(elemIdxBits.W)
61  // valid: have fof exception but can not trigger, need update all writebacked uop.vl with exceptionVl
62  val exceptionVl      = ValidIO(UInt(elemIdxBits.W))
63  val isFof            = Bool()
64}
65
66// latch each uop's VecWen, pdest, v0Wen, uopIdx
67class VSegmentUop(implicit p: Parameters) extends VLSUBundle{
68  val uop              = new DynInst
69}
70
71class VSegmentUnit (implicit p: Parameters) extends VLSUModule
72  with HasDCacheParameters
73  with MemoryOpConstants
74  with SdtrigExt
75  with HasLoadHelper
76{
77  val io               = IO(new VSegmentUnitIO)
78
79  val maxSize          = VSegmentBufferSize
80
81  class VSegUPtr(implicit p: Parameters) extends CircularQueuePtr[VSegUPtr](maxSize){
82  }
83
84  object VSegUPtr {
85    def apply(f: Bool, v: UInt)(implicit p: Parameters): VSegUPtr = {
86      val ptr           = Wire(new VSegUPtr)
87      ptr.flag         := f
88      ptr.value        := v
89      ptr
90    }
91  }
92
93
94  /**
95  ********************************************************************************************************
96  *  Use an example to illustrate the working logic of a segmentunit:                                    *
97  *    For:                                                                                              *
98  *      lmul=2 sew=32 emul=2 eew=32  vl=16                                                              *
99  *    Then:                                                                                             *
100  *      Access memory in the order:                                                                     *
101  *        (V2,S0),(V4,S0),(V6,S0),(V8,S0),                                                              *
102  *        (V2,S1),(V4,S1),(V6,S1),(V8,S1),                                                              *
103  *        (V2,S2),(V4,S2),(V6,S2),(V8,S2),                                                              *
104  *        (V2,S3),(V4,S3),(V6,S3),(V8,S3),                                                              *
105  *        (V3,S4),(V5,S4),(V7,S4),(V9,S4),                                                              *
106  *        (V3,S5),(V5,S5),(V7,S5),(V9,S5),                                                              *
107  *        (V3,S6),(V5,S6),(V7,S6),(V9,S6),                                                              *
108  *        (V3,S7),(V5,S7),(V7,S7),(V9,S7),                                                              *
109  *                                                                                                      *
110  *                                                                                                      *
111  *    [[data]] saves the data generated by the access and corresponds to the register.                  *
112  *    [[splitPtr]] controls the destination register written to.                                        *
113  *                                                                                                      *
114  *    splitptr offset can be seen in [[splitPtrNext]] is assignment logic,                              *
115  *    which is mainly calculated in terms of [[fieldIdx]] and [[segmentIdx]]                            *
116  *    First access different fields of the same segment, and then visit different segments.             *
117  *    For the case of 'emul' greater than 1, such as the following example,                             *
118  *    although 'v2' and 'v3' are different vd and the same field, they are still different segments,    *
119  *    so they should be accessed sequentially.Just like the 'Access memory in the order' above.         *
120  *                                                                                                      *
121  *                         [[segmentIdx]]                                                               *
122  *                               |                                                                      *
123  *                               |                                                                      *
124  *                               V                                                                      *
125  *                                                                                                      *
126  *                               S0               S1                S2                 S3               *
127  *                      ----------------------------------------------------------------------------    *
128  *  [[splitPtr]]--> v2  |     field0     |      field0     |      field0     |      field0         |    *
129  *                      ----------------------------------------------------------------------------    *
130  *                               S4               S5                S6                 S7               *
131  *                      ----------------------------------------------------------------------------    *
132  *                  v3  |     field0     |      field0     |      field0     |      field0         |    *
133  *                      ----------------------------------------------------------------------------    *
134  *                               S0               S1                S2                 S3               *
135  *                      ----------------------------------------------------------------------------    *
136  *                  v4  |     field1     |      field1     |      field1     |      field1         |    *
137  *                      ----------------------------------------------------------------------------    *
138  *                               S4               S5                S6                 S7               *
139  *                      ----------------------------------------------------------------------------    *
140  *                  v5  |     field1     |      field1     |      field1     |      field1         |    *
141  *                      ----------------------------------------------------------------------------    *
142  *                               S0               S1                S2                 S3               *
143  *                      ----------------------------------------------------------------------------    *
144  *                  v6  |     field2     |      field2     |      field2     |      field2         |    *
145  *                      ----------------------------------------------------------------------------    *
146  *                               S4               S5                S6                 S7               *
147  *                      ----------------------------------------------------------------------------    *
148  *                  v7  |     field2     |      field2     |      field2     |      field2         |    *
149  *                      ----------------------------------------------------------------------------    *
150  *                               S0               S1                S2                 S3               *
151  *                      ----------------------------------------------------------------------------    *
152  *                  v8  |     field3     |      field3     |      field3     |      field3         |    *
153  *                      ----------------------------------------------------------------------------    *
154  *                               S4               S5                S6                 S7               *
155  *                      ----------------------------------------------------------------------------    *
156  *                  v9  |     field3     |      field3     |      field3     |      field3         |    *
157  *                      ----------------------------------------------------------------------------    *                                                                                    *
158  *                                                                                                      *                                                                                    *
159  *                                                                                                      *                                                                                    *
160  ********************************************************************************************************
161  **/
162
163
164  // buffer uop
165  val instMicroOp       = Reg(new VSegmentBundle)
166  val instMicroOpValid  = RegInit(false.B)
167  val data              = Reg(Vec(maxSize, UInt(VLEN.W)))
168  val uopq              = Reg(Vec(maxSize, new VSegmentUop))
169  val stride            = Reg(Vec(maxSize, UInt(VLEN.W)))
170  val allocated         = RegInit(VecInit(Seq.fill(maxSize)(false.B)))
171  val enqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
172  val deqPtr            = RegInit(0.U.asTypeOf(new VSegUPtr))
173  val stridePtr         = WireInit(0.U.asTypeOf(new VSegUPtr)) // for select stride/index
174
175  val segmentIdx        = RegInit(0.U(elemIdxBits.W))
176  val fieldIdx          = RegInit(0.U(fieldBits.W))
177  val segmentOffset     = RegInit(0.U(XLEN.W))
178  val splitPtr          = RegInit(0.U.asTypeOf(new VSegUPtr)) // for select load/store data
179  val splitPtrNext      = WireInit(0.U.asTypeOf(new VSegUPtr))
180
181  val exception_va      = WireInit(false.B)
182  val exception_gpa     = WireInit(false.B)
183  val exception_pa      = WireInit(false.B)
184
185  val maxSegIdx         = instMicroOp.vl - 1.U
186  val maxNfields        = instMicroOp.uop.vpu.nf
187  val latchVaddr        = RegInit(0.U(VAddrBits.W))
188
189  XSError((segmentIdx > maxSegIdx) && instMicroOpValid, s"segmentIdx > vl, something error!\n")
190  XSError((fieldIdx > maxNfields) &&  instMicroOpValid, s"fieldIdx > nfields, something error!\n")
191
192  // MicroOp
193  val baseVaddr                       = instMicroOp.baseVaddr
194  val alignedType                     = instMicroOp.alignedType
195  val fuType                          = instMicroOp.uop.fuType
196  val isVSegLoad                      = instMicroOp.isVSegLoad
197  val isVSegStore                     = instMicroOp.isVSegStore
198  val mask                            = instMicroOp.mask
199  val exceptionVec                    = instMicroOp.uop.exceptionVec
200  val issueEew                        = instMicroOp.uop.vpu.veew
201  val issueLmul                       = instMicroOp.uop.vpu.vtype.vlmul
202  val issueSew                        = instMicroOp.uop.vpu.vtype.vsew
203  val issueEmul                       = EewLog2(issueEew) - issueSew + issueLmul
204  val elemIdxInVd                     = segmentIdx & instMicroOp.uopFlowNumMask
205  val issueInstType                   = Cat(true.B, instMicroOp.uop.fuOpType(6, 5)) // always segment instruction
206  val issueUopFlowNumLog2             = GenRealFlowLog2(issueInstType, issueEmul, issueLmul, issueEew, issueSew, true) // max element number log2 in vd
207  val issueVlMax                      = instMicroOp.uopFlowNum // max elementIdx in vd
208  val issueMaxIdxInIndex              = GenVLMAX(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0)) // index element index in index register
209  val issueMaxIdxInIndexMask          = GenVlMaxMask(issueMaxIdxInIndex, elemIdxBits)
210  val issueMaxIdxInIndexLog2          = GenVLMAXLog2(Mux(issueEmul.asSInt > 0.S, 0.U, issueEmul), issueEew(1, 0))
211  val issueIndexIdx                   = segmentIdx & issueMaxIdxInIndexMask
212  val segmentActive                   = (mask & UIntToOH(segmentIdx)).orR
213
214  // sbuffer write interface
215  val sbufferOut                      = Wire(Decoupled(new DCacheWordReqWithVaddrAndPfFlag))
216
217
218  // segment fof instrction buffer
219  val fofBuffer                       = RegInit(0.U.asTypeOf(new DynInst))
220  val fofBufferValid                  = RegInit(false.B)
221
222
223  // Segment instruction's FSM
224  /*
225  * s_idle: wait request
226  * s_flush_sbuffer_req: flush sbuffer
227  * s_wait_flush_sbuffer_resp: wait sbuffer empty
228  * s_tlb_req: request tlb
229  * s_wait_tlb_resp: wait tlb resp
230  * s_pm: check pmp
231  * s_cache_req: request cache
232  * s_cache_resp: wait cache resp
233  * s_latch_and_merge_data: for read data
234  * s_send_data: for send write data
235  * s_wait_to_sbuffer: Wait for data from the sbufferOut pipelayer to be sent to the sbuffer
236  * s_finish:
237  * s_fof_fix_vl: Writeback the uop of the fof instruction to modify vl.
238  * */
239  val s_idle :: s_flush_sbuffer_req :: s_wait_flush_sbuffer_resp :: s_tlb_req :: s_wait_tlb_resp :: s_pm ::s_cache_req :: s_cache_resp :: s_latch_and_merge_data :: s_send_data :: s_wait_to_sbuffer :: s_finish :: s_fof_fix_vl :: Nil = Enum(13)
240  val state             = RegInit(s_idle)
241  val stateNext         = WireInit(s_idle)
242  val sbufferEmpty      = io.flush_sbuffer.empty
243  val isEnqfof          = io.in.bits.uop.fuOpType === VlduType.vleff && io.in.valid
244  val isEnqFixVlUop     = isEnqfof && io.in.bits.uop.vpu.lastUop
245
246  /**
247   * state update
248   */
249  state  := stateNext
250
251  /**
252   * state transfer
253   */
254  when(state === s_idle){
255    stateNext := Mux(isAfter(enqPtr, deqPtr), s_flush_sbuffer_req, s_idle)
256  }.elsewhen(state === s_flush_sbuffer_req){
257    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp) // if sbuffer is empty, go to query tlb
258
259  }.elsewhen(state === s_wait_flush_sbuffer_resp){
260    stateNext := Mux(sbufferEmpty, s_tlb_req, s_wait_flush_sbuffer_resp)
261
262  }.elsewhen(state === s_tlb_req){
263    stateNext := Mux(segmentActive, s_wait_tlb_resp, Mux(isVSegLoad, s_latch_and_merge_data, s_send_data))
264
265  }.elsewhen(state === s_wait_tlb_resp){
266    stateNext := Mux(io.dtlb.resp.fire,
267                      Mux(!io.dtlb.resp.bits.miss,
268                          s_pm,
269                          s_tlb_req),
270                      s_wait_tlb_resp)
271
272  }.elsewhen(state === s_pm){
273    /* if is vStore, send data to sbuffer, so don't need query dcache */
274    stateNext := Mux(exception_pa || exception_va || exception_gpa,
275                     s_finish,
276                     Mux(isVSegLoad, s_cache_req, s_send_data))
277
278  }.elsewhen(state === s_cache_req){
279    stateNext := Mux(io.rdcache.req.fire, s_cache_resp, s_cache_req)
280
281  }.elsewhen(state === s_cache_resp){
282    when(io.rdcache.resp.fire) {
283      when(io.rdcache.resp.bits.miss || io.rdcache.s2_bank_conflict) {
284        stateNext := s_cache_req
285      }.otherwise {
286        stateNext := Mux(isVSegLoad, s_latch_and_merge_data, s_send_data)
287      }
288    }.otherwise{
289      stateNext := s_cache_resp
290    }
291    /* if segment is inactive, don't need to wait access all of the field */
292  }.elsewhen(state === s_latch_and_merge_data) {
293    when((segmentIdx === maxSegIdx) && (fieldIdx === maxNfields) ||
294      ((segmentIdx === maxSegIdx) && !segmentActive)) {
295
296      stateNext := s_finish // segment instruction finish
297    }.otherwise {
298      stateNext := s_tlb_req // need continue
299    }
300    /* if segment is inactive, don't need to wait access all of the field */
301  }.elsewhen(state === s_send_data) { // when sbuffer accept data
302    when(!sbufferOut.fire && segmentActive) {
303      stateNext := s_send_data
304    }.elsewhen(segmentIdx === maxSegIdx && (fieldIdx === maxNfields && sbufferOut.fire || !segmentActive && io.sbuffer.valid && !io.sbuffer.ready)) {
305      stateNext := s_wait_to_sbuffer
306    }.elsewhen(segmentIdx === maxSegIdx && !segmentActive){
307      stateNext := s_finish // segment instruction finish
308    }.otherwise {
309      stateNext := s_tlb_req // need continue
310    }
311
312  }.elsewhen(state === s_wait_to_sbuffer){
313    stateNext := Mux(io.sbuffer.fire, s_finish, s_wait_to_sbuffer)
314
315  }.elsewhen(state === s_finish){ // writeback uop
316    stateNext := Mux(
317      distanceBetween(enqPtr, deqPtr) === 0.U,
318      Mux(fofBufferValid, s_fof_fix_vl, s_idle),
319      s_finish
320    )
321  }.elsewhen(state === s_fof_fix_vl){ // writeback uop
322    stateNext := Mux(!fofBufferValid, s_idle, s_fof_fix_vl)
323
324  }.otherwise{
325    stateNext := s_idle
326    XSError(true.B, s"Unknown state!\n")
327  }
328
329  /*************************************************************************
330   *                            enqueue logic
331   *************************************************************************/
332  io.in.ready                         := true.B
333  val fuOpType                         = io.in.bits.uop.fuOpType
334  val vtype                            = io.in.bits.uop.vpu.vtype
335  val mop                              = fuOpType(6, 5)
336  val instType                         = Cat(true.B, mop)
337  val eew                              = io.in.bits.uop.vpu.veew
338  val sew                              = vtype.vsew
339  val lmul                             = vtype.vlmul
340  val emul                             = EewLog2(eew) - sew + lmul
341  val vl                               = instMicroOp.vl
342  val vm                               = instMicroOp.uop.vpu.vm
343  val vstart                           = instMicroOp.uop.vpu.vstart
344  val srcMask                          = GenFlowMask(Mux(vm, Fill(VLEN, 1.U(1.W)), io.in.bits.src_mask), vstart, vl, true)
345  // first uop enqueue, we need to latch microOp of segment instruction
346  when(io.in.fire && !instMicroOpValid && !isEnqFixVlUop){
347    // element number in a vd
348    // TODO Rewrite it in a more elegant way.
349    val uopFlowNum                    = ZeroExt(GenRealFlowNum(instType, emul, lmul, eew, sew, true), elemIdxBits)
350    instMicroOp.baseVaddr             := io.in.bits.src_rs1
351    instMicroOpValid                  := true.B // if is first uop
352    instMicroOp.alignedType           := Mux(isIndexed(instType), sew(1, 0), eew)
353    instMicroOp.uop                   := io.in.bits.uop
354    instMicroOp.mask                  := srcMask
355    instMicroOp.vstart                := 0.U
356    instMicroOp.uopFlowNum            := uopFlowNum
357    instMicroOp.uopFlowNumMask        := GenVlMaxMask(uopFlowNum, elemIdxBits) // for merge data
358    instMicroOp.vl                    := io.in.bits.src_vl.asTypeOf(VConfig()).vl
359    instMicroOp.exceptionVl.valid     := false.B
360    instMicroOp.exceptionVl.bits      := io.in.bits.src_vl.asTypeOf(VConfig()).vl
361    segmentOffset                     := 0.U
362    instMicroOp.isFof                 := (fuOpType === VlduType.vleff) && FuType.isVSegLoad(io.in.bits.uop.fuType)
363    instMicroOp.isVSegLoad            := FuType.isVSegLoad(io.in.bits.uop.fuType)
364    instMicroOp.isVSegStore           := FuType.isVSegStore(io.in.bits.uop.fuType)
365  }
366  // latch data
367  when(io.in.fire && !isEnqFixVlUop){
368    data(enqPtr.value)                := io.in.bits.src_vs3
369    stride(enqPtr.value)              := io.in.bits.src_stride
370    uopq(enqPtr.value).uop            := io.in.bits.uop
371  }
372
373  // update enqptr, only 1 port
374  when(io.in.fire && !isEnqFixVlUop){
375    enqPtr                            := enqPtr + 1.U
376  }
377
378  /*************************************************************************
379   *                            output logic
380   *************************************************************************/
381
382  val indexStride                     = IndexAddr( // index for indexed instruction
383                                                    index = stride(stridePtr.value),
384                                                    flow_inner_idx = issueIndexIdx,
385                                                    eew = issueEew
386                                                  )
387  val realSegmentOffset               = Mux(isIndexed(issueInstType),
388                                            indexStride,
389                                            segmentOffset)
390  val vaddr                           = baseVaddr + (fieldIdx << alignedType).asUInt + realSegmentOffset
391
392  //latch vaddr
393  when(state === s_tlb_req){
394    latchVaddr := vaddr(VAddrBits - 1, 0)
395  }
396  /**
397   * tlb req and tlb resq
398   */
399
400  // query DTLB IO Assign
401  io.dtlb.req                         := DontCare
402  io.dtlb.resp.ready                  := true.B
403  io.dtlb.req.valid                   := state === s_tlb_req && segmentActive
404  io.dtlb.req.bits.cmd                := Mux(isVSegLoad, TlbCmd.read, TlbCmd.write)
405  io.dtlb.req.bits.vaddr              := vaddr(VAddrBits - 1, 0)
406  io.dtlb.req.bits.fullva             := vaddr
407  io.dtlb.req.bits.checkfullva        := true.B
408  io.dtlb.req.bits.size               := instMicroOp.alignedType(2,0)
409  io.dtlb.req.bits.memidx.is_ld       := isVSegLoad
410  io.dtlb.req.bits.memidx.is_st       := isVSegStore
411  io.dtlb.req.bits.debug.robIdx       := instMicroOp.uop.robIdx
412  io.dtlb.req.bits.no_translate       := false.B
413  io.dtlb.req.bits.debug.pc           := instMicroOp.uop.pc
414  io.dtlb.req.bits.debug.isFirstIssue := DontCare
415  io.dtlb.req_kill                    := false.B
416
417  val canTriggerException              = segmentIdx === 0.U || !instMicroOp.isFof // only elementIdx = 0 or is not fof can trigger
418
419  val segmentTrigger = Module(new VSegmentTrigger)
420  segmentTrigger.io.fromCsrTrigger.tdataVec             := io.fromCsrTrigger.tdataVec
421  segmentTrigger.io.fromCsrTrigger.tEnableVec           := io.fromCsrTrigger.tEnableVec
422  segmentTrigger.io.fromCsrTrigger.triggerCanRaiseBpExp := io.fromCsrTrigger.triggerCanRaiseBpExp
423  segmentTrigger.io.fromCsrTrigger.debugMode            := io.fromCsrTrigger.debugMode
424  segmentTrigger.io.memType                             := isVSegLoad
425  segmentTrigger.io.fromLoadStore.vaddr                 := latchVaddr
426  segmentTrigger.io.fromLoadStore.isVectorUnitStride    := false.B
427  segmentTrigger.io.fromLoadStore.mask                  := 0.U
428
429  val triggerAction = segmentTrigger.io.toLoadStore.triggerAction
430  val triggerDebugMode = TriggerAction.isDmode(triggerAction)
431  val triggerBreakpoint = TriggerAction.isExp(triggerAction)
432
433  // tlb resp
434  when(io.dtlb.resp.fire && state === s_wait_tlb_resp){
435      exceptionVec(storePageFault)      := io.dtlb.resp.bits.excp(0).pf.st
436      exceptionVec(loadPageFault)       := io.dtlb.resp.bits.excp(0).pf.ld
437      exceptionVec(storeGuestPageFault) := io.dtlb.resp.bits.excp(0).gpf.st
438      exceptionVec(loadGuestPageFault)  := io.dtlb.resp.bits.excp(0).gpf.ld
439      exceptionVec(storeAccessFault)    := io.dtlb.resp.bits.excp(0).af.st
440      exceptionVec(loadAccessFault)     := io.dtlb.resp.bits.excp(0).af.ld
441      when(!io.dtlb.resp.bits.miss){
442        instMicroOp.paddr             := io.dtlb.resp.bits.paddr(0)
443        instMicroOp.exceptionGpaddr   := io.dtlb.resp.bits.gpaddr(0)
444        instMicroOp.exceptionIsForVSnonLeafPTE  := io.dtlb.resp.bits.isForVSnonLeafPTE
445      }
446  }
447  // pmp
448  // NOTE: only handle load/store exception here, if other exception happens, don't send here
449  val exceptionWithPf = exceptionVec(storePageFault) || exceptionVec(loadPageFault) || exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
450  val pmp = (io.pmpResp.asUInt & Fill(io.pmpResp.asUInt.getWidth, !exceptionWithPf)).asTypeOf(new PMPRespBundle())
451  when(state === s_pm) {
452    val addr_aligned = LookupTree(Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0)), List(
453      "b00".U   -> true.B,                   //b
454      "b01".U   -> (vaddr(0)    === 0.U), //h
455      "b10".U   -> (vaddr(1, 0) === 0.U), //w
456      "b11".U   -> (vaddr(2, 0) === 0.U)  //d
457    ))
458    val missAligned = !addr_aligned
459    exceptionVec(loadAddrMisaligned)  := missAligned && isVSegLoad  && canTriggerException
460    exceptionVec(storeAddrMisaligned) := missAligned && isVSegStore && canTriggerException
461    exception_va  := exceptionVec(storePageFault) || exceptionVec(loadPageFault) ||
462                     exceptionVec(storeAccessFault) || exceptionVec(loadAccessFault) ||
463                     triggerBreakpoint || triggerDebugMode || missAligned
464    exception_gpa := exceptionVec(storeGuestPageFault) || exceptionVec(loadGuestPageFault)
465    exception_pa  := pmp.st || pmp.ld || pmp.mmio
466
467    instMicroOp.exception_pa  := exception_pa
468    instMicroOp.exception_va  := exception_va
469    instMicroOp.exception_gpa := exception_gpa
470    // update storeAccessFault bit. Currently, we don't support vector MMIO
471    exceptionVec(loadAccessFault)  := (exceptionVec(loadAccessFault) || pmp.ld || pmp.mmio)   && isVSegLoad  && canTriggerException
472    exceptionVec(storeAccessFault) := (exceptionVec(storeAccessFault) || pmp.st || pmp.mmio)  && isVSegStore && canTriggerException
473    exceptionVec(breakPoint)       := triggerBreakpoint && canTriggerException
474
475    exceptionVec(storePageFault)      := exceptionVec(storePageFault)      && isVSegStore && canTriggerException
476    exceptionVec(loadPageFault)       := exceptionVec(loadPageFault)       && isVSegLoad  && canTriggerException
477    exceptionVec(storeGuestPageFault) := exceptionVec(storeGuestPageFault) && isVSegStore && canTriggerException
478    exceptionVec(loadGuestPageFault)  := exceptionVec(loadGuestPageFault)  && isVSegLoad  && canTriggerException
479
480    when(exception_va || exception_gpa || exception_pa) {
481      when(canTriggerException) {
482        instMicroOp.exceptionVaddr  := vaddr
483        instMicroOp.exceptionVstart := segmentIdx // for exception
484      }.otherwise {
485        instMicroOp.exceptionVl.valid := true.B
486        instMicroOp.exceptionVl.bits := segmentIdx
487      }
488    }
489
490    when(exceptionVec(breakPoint) || triggerDebugMode) {
491      instMicroOp.uop.trigger := triggerAction
492    }
493  }
494
495  /**
496   * flush sbuffer IO Assign
497   */
498  io.flush_sbuffer.valid           := !sbufferEmpty && (state === s_flush_sbuffer_req)
499
500
501  /**
502   * merge data for load
503   */
504  val cacheData = LookupTree(latchVaddr(3,0), List(
505    "b0000".U -> io.rdcache.resp.bits.data_delayed(63,    0),
506    "b0001".U -> io.rdcache.resp.bits.data_delayed(63,    8),
507    "b0010".U -> io.rdcache.resp.bits.data_delayed(63,   16),
508    "b0011".U -> io.rdcache.resp.bits.data_delayed(63,   24),
509    "b0100".U -> io.rdcache.resp.bits.data_delayed(63,   32),
510    "b0101".U -> io.rdcache.resp.bits.data_delayed(63,   40),
511    "b0110".U -> io.rdcache.resp.bits.data_delayed(63,   48),
512    "b0111".U -> io.rdcache.resp.bits.data_delayed(63,   56),
513    "b1000".U -> io.rdcache.resp.bits.data_delayed(127,  64),
514    "b1001".U -> io.rdcache.resp.bits.data_delayed(127,  72),
515    "b1010".U -> io.rdcache.resp.bits.data_delayed(127,  80),
516    "b1011".U -> io.rdcache.resp.bits.data_delayed(127,  88),
517    "b1100".U -> io.rdcache.resp.bits.data_delayed(127,  96),
518    "b1101".U -> io.rdcache.resp.bits.data_delayed(127, 104),
519    "b1110".U -> io.rdcache.resp.bits.data_delayed(127, 112),
520    "b1111".U -> io.rdcache.resp.bits.data_delayed(127, 120)
521  ))
522  val pickData  = rdataVecHelper(alignedType(1,0), cacheData)
523  val mergedData = mergeDataWithElemIdx(
524    oldData = data(splitPtr.value),
525    newData = Seq(pickData),
526    alignedType = alignedType(1,0),
527    elemIdx = Seq(elemIdxInVd),
528    valids = Seq(true.B)
529  )
530  when(state === s_latch_and_merge_data && segmentActive){
531    data(splitPtr.value) := mergedData
532  }
533  /**
534   * split data for store
535   * */
536  val splitData = genVSData(
537    data = data(splitPtr.value),
538    elemIdx = elemIdxInVd,
539    alignedType = alignedType
540  )
541  val flowData  = genVWdata(splitData, alignedType) // TODO: connect vstd, pass vector data
542  val wmask     = genVWmask(latchVaddr, alignedType(1, 0)) & Fill(VLENB, segmentActive)
543
544  /**
545   * rdcache req, write request don't need to query dcache, because we write element to sbuffer
546   */
547  io.rdcache.req                    := DontCare
548  io.rdcache.req.valid              := state === s_cache_req && isVSegLoad
549  io.rdcache.req.bits.cmd           := MemoryOpConstants.M_XRD
550  io.rdcache.req.bits.vaddr         := latchVaddr
551  io.rdcache.req.bits.mask          := mask
552  io.rdcache.req.bits.data          := flowData
553  io.rdcache.pf_source              := LOAD_SOURCE.U
554  io.rdcache.req.bits.id            := DontCare
555  io.rdcache.resp.ready             := true.B
556  io.rdcache.s1_paddr_dup_lsu       := instMicroOp.paddr
557  io.rdcache.s1_paddr_dup_dcache    := instMicroOp.paddr
558  io.rdcache.s1_kill                := false.B
559  io.rdcache.s1_kill_data_read      := false.B
560  io.rdcache.s2_kill                := false.B
561  if (env.FPGAPlatform){
562    io.rdcache.s0_pc                := DontCare
563    io.rdcache.s1_pc                := DontCare
564    io.rdcache.s2_pc                := DontCare
565  }else{
566    io.rdcache.s0_pc                := instMicroOp.uop.pc
567    io.rdcache.s1_pc                := instMicroOp.uop.pc
568    io.rdcache.s2_pc                := instMicroOp.uop.pc
569  }
570  io.rdcache.replacementUpdated     := false.B
571  io.rdcache.is128Req               := false.B
572
573
574  /**
575   * write data to sbuffer
576   * */
577  sbufferOut.bits                  := DontCare
578  sbufferOut.valid                 := state === s_send_data && segmentActive
579  sbufferOut.bits.vecValid         := state === s_send_data && segmentActive
580  sbufferOut.bits.mask             := wmask
581  sbufferOut.bits.data             := flowData
582  sbufferOut.bits.vaddr            := latchVaddr
583  sbufferOut.bits.cmd              := MemoryOpConstants.M_XWR
584  sbufferOut.bits.id               := DontCare
585  sbufferOut.bits.addr             := instMicroOp.paddr
586
587  NewPipelineConnect(
588    sbufferOut, io.sbuffer, io.sbuffer.fire,
589    false.B,
590    Option(s"VSegmentUnitPipelineConnect")
591  )
592
593  io.vecDifftestInfo.valid         := io.sbuffer.valid
594  io.vecDifftestInfo.bits          := uopq(deqPtr.value).uop
595
596  /**
597   * update ptr
598   * */
599  private val fieldActiveWirteFinish = sbufferOut.fire && segmentActive // writedata finish and is a active segment
600  XSError(sbufferOut.fire && !segmentActive, "Attempt write inactive segment to sbuffer, something wrong!\n")
601
602  private val segmentInactiveFinish = ((state === s_latch_and_merge_data) || (state === s_send_data)) && !segmentActive
603
604  val splitPtrOffset = Mux(
605    isIndexed(instType),
606    Mux(lmul.asSInt < 0.S, 1.U, (1.U << lmul).asUInt),
607    Mux(emul.asSInt < 0.S, 1.U, (1.U << emul).asUInt)
608  )
609  splitPtrNext :=
610    Mux(fieldIdx === maxNfields || !segmentActive, // if segment is active, need to complete this segment, otherwise jump to next segment
611      // segment finish, By shifting 'issueUopFlowNumLog2' to the right to ensure that emul != 1 can correctly generate lateral offset.
612     (deqPtr + ((segmentIdx +& 1.U) >> issueUopFlowNumLog2).asUInt),
613      // next field.
614     (splitPtr + splitPtrOffset)
615    )
616
617  if (backendParams.debugEn){
618    dontTouch(issueUopFlowNumLog2)
619    dontTouch(issueEmul)
620    dontTouch(splitPtrNext)
621    dontTouch(stridePtr)
622    dontTouch(segmentActive)
623  }
624
625  // update splitPtr
626  when(state === s_latch_and_merge_data || (state === s_send_data && (fieldActiveWirteFinish || !segmentActive))){
627    splitPtr := splitPtrNext
628  }.elsewhen(io.in.fire && !instMicroOpValid){
629    splitPtr := deqPtr // initial splitPtr
630  }
631
632  // update stridePtr, only use in index
633  val strideOffset = Mux(isIndexed(issueInstType), segmentIdx >> issueMaxIdxInIndexLog2, 0.U)
634  stridePtr       := deqPtr + strideOffset
635
636  // update fieldIdx
637  when(io.in.fire && !instMicroOpValid){ // init
638    fieldIdx := 0.U
639  }.elsewhen(state === s_latch_and_merge_data && segmentActive ||
640            (state === s_send_data && fieldActiveWirteFinish)){ // only if segment is active
641
642    /* next segment, only if segment complete */
643    fieldIdx := Mux(fieldIdx === maxNfields, 0.U, fieldIdx + 1.U)
644  }.elsewhen(segmentInactiveFinish){ // segment is inactive, go to next segment
645    fieldIdx := 0.U
646  }
647  //update segmentIdx
648  when(io.in.fire && !instMicroOpValid){
649    segmentIdx := 0.U
650  }.elsewhen(fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && fieldActiveWirteFinish)) &&
651             segmentIdx =/= maxSegIdx){ // next segment, only if segment is active
652
653    segmentIdx := segmentIdx + 1.U
654  }.elsewhen(segmentInactiveFinish && segmentIdx =/= maxSegIdx){ // if segment is inactive, go to next segment
655    segmentIdx := segmentIdx + 1.U
656  }
657
658  //update segmentOffset
659  /* when segment is active or segment is inactive, increase segmentOffset */
660  when((fieldIdx === maxNfields && (state === s_latch_and_merge_data || (state === s_send_data && fieldActiveWirteFinish))) ||
661       segmentInactiveFinish){
662
663    segmentOffset := segmentOffset + Mux(isUnitStride(issueInstType), (maxNfields +& 1.U) << issueEew(1, 0), stride(stridePtr.value))
664  }
665
666  //update deqPtr
667  when((state === s_finish) && !isEmpty(enqPtr, deqPtr)){
668    deqPtr := deqPtr + 1.U
669  }
670
671
672  /*************************************************************************
673   *                            fof logic
674   *************************************************************************/
675
676  //Enq
677  when(isEnqFixVlUop && !fofBufferValid) { fofBuffer := io.in.bits.uop }
678  when(isEnqFixVlUop && !fofBufferValid) { fofBufferValid := true.B }
679
680  //Deq
681  val fofFixVlValid                    = state === s_fof_fix_vl && fofBufferValid
682
683  when(fofFixVlValid) { fofBuffer      := 0.U.asTypeOf(new DynInst) }
684  when(fofFixVlValid) { fofBufferValid := false.B }
685
686
687  /*************************************************************************
688   *                            dequeue logic
689   *************************************************************************/
690  val vdIdxInField = GenUopIdxInField(Mux(isIndexed(instType), issueLmul, issueEmul), uopq(deqPtr.value).uop.vpu.vuopIdx)
691  /*select mask of vd, maybe remove in feature*/
692  val realEw        = Mux(isIndexed(issueInstType), issueSew(1, 0), issueEew(1, 0))
693  val maskDataVec: Vec[UInt] = VecDataToMaskDataVec(instMicroOp.mask, realEw)
694  val maskUsed      = maskDataVec(vdIdxInField)
695
696  when(stateNext === s_idle){
697    instMicroOpValid := false.B
698  }
699  // writeback to backend
700  val writebackOut                     = WireInit(io.uopwriteback.bits)
701  val writebackValid                   = (state === s_finish) && !isEmpty(enqPtr, deqPtr) || fofFixVlValid
702
703  when(fofFixVlValid) {
704    writebackOut.uop                    := fofBuffer
705    writebackOut.uop.vpu.vl             := instMicroOp.exceptionVl.bits
706    writebackOut.data                   := instMicroOp.exceptionVl.bits
707    writebackOut.mask.get               := Fill(VLEN, 1.U)
708    writebackOut.uop.vpu.vmask          := Fill(VLEN, 1.U)
709  }.otherwise{
710    writebackOut.uop                    := uopq(deqPtr.value).uop
711    writebackOut.uop.vpu                := instMicroOp.uop.vpu
712    writebackOut.uop.trigger            := instMicroOp.uop.trigger
713    writebackOut.uop.exceptionVec       := instMicroOp.uop.exceptionVec
714    writebackOut.mask.get               := instMicroOp.mask
715    writebackOut.data                   := data(deqPtr.value)
716    writebackOut.vdIdx.get              := vdIdxInField
717    writebackOut.uop.vpu.vl             := Mux(instMicroOp.exceptionVl.valid, instMicroOp.exceptionVl.bits, instMicroOp.vl)
718    writebackOut.uop.vpu.vstart         := Mux(instMicroOp.uop.exceptionVec.asUInt.orR || TriggerAction.isDmode(instMicroOp.uop.trigger), instMicroOp.exceptionVstart, instMicroOp.vstart)
719    writebackOut.uop.vpu.vmask          := maskUsed
720    writebackOut.uop.vpu.vuopIdx        := uopq(deqPtr.value).uop.vpu.vuopIdx
721    // when exception updates vl, should use vtu strategy.
722    writebackOut.uop.vpu.vta            := Mux(instMicroOp.exceptionVl.valid, VType.tu, instMicroOp.uop.vpu.vta)
723    writebackOut.debug                  := DontCare
724    writebackOut.vdIdxInField.get       := vdIdxInField
725    writebackOut.uop.robIdx             := instMicroOp.uop.robIdx
726    writebackOut.uop.fuOpType           := instMicroOp.uop.fuOpType
727  }
728
729  io.uopwriteback.valid               := RegNext(writebackValid)
730  io.uopwriteback.bits                := RegEnable(writebackOut, writebackValid)
731
732  dontTouch(writebackValid)
733
734  //to RS
735  val feedbackOut                      = WireInit(0.U.asTypeOf(io.feedback.bits))
736  val feedbackValid                    = state === s_finish && !isEmpty(enqPtr, deqPtr)
737  feedbackOut.hit                     := true.B
738  feedbackOut.robIdx                  := instMicroOp.uop.robIdx
739  feedbackOut.sourceType              := DontCare
740  feedbackOut.flushState              := DontCare
741  feedbackOut.dataInvalidSqIdx        := DontCare
742  feedbackOut.sqIdx                   := uopq(deqPtr.value).uop.sqIdx
743  feedbackOut.lqIdx                   := uopq(deqPtr.value).uop.lqIdx
744
745  io.feedback.valid                   := RegNext(feedbackValid)
746  io.feedback.bits                    := RegEnable(feedbackOut, feedbackValid)
747
748  dontTouch(feedbackValid)
749
750  // exception
751  io.exceptionInfo                    := DontCare
752  io.exceptionInfo.bits.robidx        := instMicroOp.uop.robIdx
753  io.exceptionInfo.bits.uopidx        := uopq(deqPtr.value).uop.vpu.vuopIdx
754  io.exceptionInfo.bits.vstart        := instMicroOp.exceptionVstart
755  io.exceptionInfo.bits.vaddr         := instMicroOp.exceptionVaddr
756  io.exceptionInfo.bits.gpaddr        := instMicroOp.exceptionGpaddr
757  io.exceptionInfo.bits.isForVSnonLeafPTE := instMicroOp.exceptionIsForVSnonLeafPTE
758  io.exceptionInfo.bits.vl            := instMicroOp.exceptionVl.bits
759  io.exceptionInfo.valid              := (state === s_finish) && instMicroOp.uop.exceptionVec.asUInt.orR && !isEmpty(enqPtr, deqPtr)
760}
761
762