xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 60311af7289a62d684795450811a0bae65e5b1c7)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val FP_TMP_REG_MV = 32
82  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153
154  //Type of uop Div
155  val typeOfSplit = latchedInst.uopSplitType
156  val src1Type = latchedInst.srcType(0)
157  val src1IsImm = src1Type === SrcType.imm
158  val src1IsFp = src1Type === SrcType.fp
159
160  val isVstore = FuType.isVStore(latchedInst.fuType)
161
162  numOfUop := latchedUopInfo.numOfUop
163  numOfWB := latchedUopInfo.numOfWB
164
165  //uops dispatch
166  val s_idle :: s_active :: Nil = Enum(2)
167  val state = RegInit(s_idle)
168  val stateNext = WireDefault(state)
169  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
170  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
171  val uopResNext = WireInit(uopRes)
172  val e64 = 3.U(2.W)
173  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
174  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
175  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
176
177  //uop div up to maxUopSize
178  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
179  csBundle.foreach { case dst =>
180    dst := latchedInst
181    dst.numUops := latchedUopInfo.numOfUop
182    dst.numWB := latchedUopInfo.numOfWB
183    dst.firstUop := false.B
184    dst.lastUop := false.B
185    dst.vlsInstr := false.B
186  }
187
188  csBundle(0).firstUop := true.B
189  csBundle(numOfUop - 1.U).lastUop := true.B
190
191  switch(typeOfSplit) {
192    is(UopSplitType.VSET) {
193      // In simple decoder, rfWen and vecWen are not set
194      when(isVsetSimple) {
195        // Default
196        // uop0 set rd, never flushPipe
197        csBundle(0).fuType := FuType.vsetiwi.U
198        csBundle(0).flushPipe := false.B
199        csBundle(0).rfWen := true.B
200        // uop1 set vl, vsetvl will flushPipe
201        csBundle(1).ldest := VCONFIG_IDX.U
202        csBundle(1).vecWen := true.B
203        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
204          // write nothing, uop0 is a nop instruction
205          csBundle(0).rfWen := false.B
206          csBundle(0).fpWen := false.B
207          csBundle(0).vecWen := false.B
208          csBundle(1).fuType := FuType.vsetfwf.U
209          csBundle(1).srcType(0) := SrcType.vp
210          csBundle(1).lsrc(0) := VCONFIG_IDX.U
211        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
212          // uop0: mv vtype gpr to vector region
213          csBundle(0).srcType(0) := SrcType.xp
214          csBundle(0).srcType(1) := SrcType.no
215          csBundle(0).lsrc(1) := 0.U
216          csBundle(0).ldest := FP_TMP_REG_MV.U
217          csBundle(0).fuType := FuType.i2v.U
218          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
219          csBundle(0).rfWen := false.B
220          csBundle(0).fpWen := true.B
221          csBundle(0).vecWen := false.B
222          csBundle(0).flushPipe := false.B
223          // uop1: uvsetvcfg_vv
224          csBundle(1).fuType := FuType.vsetfwf.U
225          // vl
226          csBundle(1).srcType(0) := SrcType.vp
227          csBundle(1).lsrc(0) := VCONFIG_IDX.U
228          // vtype
229          csBundle(1).srcType(1) := SrcType.fp
230          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
231          csBundle(1).vecWen := true.B
232          csBundle(1).ldest := VCONFIG_IDX.U
233        }.elsewhen(dest === 0.U) {
234          // write nothing, uop0 is a nop instruction
235          csBundle(0).rfWen := false.B
236          csBundle(0).fpWen := false.B
237          csBundle(0).vecWen := false.B
238        }
239        // use bypass vtype from vtypeGen
240        csBundle(0).vpu.connectVType(io.vtypeBypass)
241        csBundle(1).vpu.connectVType(io.vtypeBypass)
242      }
243    }
244    is(UopSplitType.VEC_VVV) {
245      for (i <- 0 until MAX_VLMUL) {
246        csBundle(i).lsrc(0) := src1 + i.U
247        csBundle(i).lsrc(1) := src2 + i.U
248        csBundle(i).lsrc(2) := dest + i.U
249        csBundle(i).ldest := dest + i.U
250        csBundle(i).uopIdx := i.U
251      }
252    }
253    is(UopSplitType.VEC_VFV) {
254      /*
255      i to vector move
256       */
257      csBundle(0).srcType(0) := SrcType.fp
258      csBundle(0).srcType(1) := SrcType.imm
259      csBundle(0).lsrc(1) := 0.U
260      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
261      csBundle(0).fuType := FuType.f2v.U
262      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
263      csBundle(0).vecWen := true.B
264      csBundle(0).vpu.isReverse := false.B
265      /*
266      LMUL
267       */
268      for (i <- 0 until MAX_VLMUL) {
269        csBundle(i + 1).srcType(0) := SrcType.vp
270        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
271        csBundle(i + 1).lsrc(1) := src2 + i.U
272        csBundle(i + 1).lsrc(2) := dest + i.U
273        csBundle(i + 1).ldest := dest + i.U
274        csBundle(i + 1).uopIdx := i.U
275      }
276    }
277    is(UopSplitType.VEC_EXT2) {
278      for (i <- 0 until MAX_VLMUL / 2) {
279        csBundle(2 * i).lsrc(1) := src2 + i.U
280        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
281        csBundle(2 * i).ldest := dest + (2 * i).U
282        csBundle(2 * i).uopIdx := (2 * i).U
283        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
284        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
285        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
286        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
287      }
288    }
289    is(UopSplitType.VEC_EXT4) {
290      for (i <- 0 until MAX_VLMUL / 4) {
291        csBundle(4 * i).lsrc(1) := src2 + i.U
292        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
293        csBundle(4 * i).ldest := dest + (4 * i).U
294        csBundle(4 * i).uopIdx := (4 * i).U
295        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
296        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
297        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
298        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
299        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
300        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
301        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
302        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
303        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
304        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
305        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
306        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
307      }
308    }
309    is(UopSplitType.VEC_EXT8) {
310      for (i <- 0 until MAX_VLMUL) {
311        csBundle(i).lsrc(1) := src2
312        csBundle(i).lsrc(2) := dest + i.U
313        csBundle(i).ldest := dest + i.U
314        csBundle(i).uopIdx := i.U
315      }
316    }
317    is(UopSplitType.VEC_0XV) {
318      /*
319      i/f to vector move
320       */
321      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
322      csBundle(0).srcType(1) := SrcType.imm
323      csBundle(0).lsrc(1) := 0.U
324      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
325      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
326      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
327      csBundle(0).rfWen := false.B
328      csBundle(0).fpWen := false.B
329      csBundle(0).vecWen := true.B
330      /*
331      vmv.s.x
332       */
333      csBundle(1).srcType(0) := SrcType.vp
334      csBundle(1).srcType(1) := SrcType.imm
335      csBundle(1).srcType(2) := SrcType.vp
336      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
337      csBundle(1).lsrc(1) := 0.U
338      csBundle(1).lsrc(2) := dest
339      csBundle(1).ldest := dest
340      csBundle(1).rfWen := false.B
341      csBundle(1).fpWen := false.B
342      csBundle(1).vecWen := true.B
343      csBundle(1).uopIdx := 0.U
344    }
345    is(UopSplitType.VEC_VXV) {
346      /*
347      i to vector move
348       */
349      csBundle(0).srcType(0) := SrcType.reg
350      csBundle(0).srcType(1) := SrcType.imm
351      csBundle(0).lsrc(1) := 0.U
352      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
353      csBundle(0).fuType := FuType.i2v.U
354      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
355      csBundle(0).vecWen := true.B
356      csBundle(0).vpu.isReverse := false.B
357      /*
358      LMUL
359       */
360      for (i <- 0 until MAX_VLMUL) {
361        csBundle(i + 1).srcType(0) := SrcType.vp
362        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
363        csBundle(i + 1).lsrc(1) := src2 + i.U
364        csBundle(i + 1).lsrc(2) := dest + i.U
365        csBundle(i + 1).ldest := dest + i.U
366        csBundle(i + 1).uopIdx := i.U
367      }
368    }
369    is(UopSplitType.VEC_VVW) {
370      for (i <- 0 until MAX_VLMUL / 2) {
371        csBundle(2 * i).lsrc(0) := src1 + i.U
372        csBundle(2 * i).lsrc(1) := src2 + i.U
373        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
374        csBundle(2 * i).ldest := dest + (2 * i).U
375        csBundle(2 * i).uopIdx := (2 * i).U
376        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
377        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
378        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
379        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
380        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
381      }
382    }
383    is(UopSplitType.VEC_VFW) {
384      /*
385      f to vector move
386       */
387      csBundle(0).srcType(0) := SrcType.fp
388      csBundle(0).srcType(1) := SrcType.imm
389      csBundle(0).lsrc(1) := 0.U
390      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
391      csBundle(0).fuType := FuType.f2v.U
392      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
393      csBundle(0).rfWen := false.B
394      csBundle(0).fpWen := false.B
395      csBundle(0).vecWen := true.B
396
397      for (i <- 0 until MAX_VLMUL / 2) {
398        csBundle(2 * i + 1).srcType(0) := SrcType.vp
399        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
400        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
401        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
402        csBundle(2 * i + 1).ldest := dest + (2 * i).U
403        csBundle(2 * i + 1).uopIdx := (2 * i).U
404        csBundle(2 * i + 2).srcType(0) := SrcType.vp
405        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
406        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
407        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
408        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
409        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
410      }
411    }
412    is(UopSplitType.VEC_WVW) {
413      for (i <- 0 until MAX_VLMUL / 2) {
414        csBundle(2 * i).lsrc(0) := src1 + i.U
415        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
416        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
417        csBundle(2 * i).ldest := dest + (2 * i).U
418        csBundle(2 * i).uopIdx := (2 * i).U
419        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
420        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
421        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
422        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
423        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
424      }
425    }
426    is(UopSplitType.VEC_VXW) {
427      /*
428      i to vector move
429       */
430      csBundle(0).srcType(0) := SrcType.reg
431      csBundle(0).srcType(1) := SrcType.imm
432      csBundle(0).lsrc(1) := 0.U
433      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
434      csBundle(0).fuType := FuType.i2v.U
435      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
436      csBundle(0).vecWen := true.B
437
438      for (i <- 0 until MAX_VLMUL / 2) {
439        csBundle(2 * i + 1).srcType(0) := SrcType.vp
440        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
441        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
442        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
443        csBundle(2 * i + 1).ldest := dest + (2 * i).U
444        csBundle(2 * i + 1).uopIdx := (2 * i).U
445        csBundle(2 * i + 2).srcType(0) := SrcType.vp
446        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
447        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
448        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
449        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
450        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
451      }
452    }
453    is(UopSplitType.VEC_WXW) {
454      /*
455      i to vector move
456       */
457      csBundle(0).srcType(0) := SrcType.reg
458      csBundle(0).srcType(1) := SrcType.imm
459      csBundle(0).lsrc(1) := 0.U
460      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
461      csBundle(0).fuType := FuType.i2v.U
462      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
463      csBundle(0).vecWen := true.B
464
465      for (i <- 0 until MAX_VLMUL / 2) {
466        csBundle(2 * i + 1).srcType(0) := SrcType.vp
467        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
468        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
469        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
470        csBundle(2 * i + 1).ldest := dest + (2 * i).U
471        csBundle(2 * i + 1).uopIdx := (2 * i).U
472        csBundle(2 * i + 2).srcType(0) := SrcType.vp
473        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
474        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
475        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
476        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
477        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
478      }
479    }
480    is(UopSplitType.VEC_WVV) {
481      for (i <- 0 until MAX_VLMUL / 2) {
482
483        csBundle(2 * i).lsrc(0) := src1 + i.U
484        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
485        csBundle(2 * i).lsrc(2) := dest + i.U
486        csBundle(2 * i).ldest := dest + i.U
487        csBundle(2 * i).uopIdx := (2 * i).U
488        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
489        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
490        csBundle(2 * i + 1).lsrc(2) := dest + i.U
491        csBundle(2 * i + 1).ldest := dest + i.U
492        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
493      }
494    }
495    is(UopSplitType.VEC_WFW) {
496      /*
497      f to vector move
498       */
499      csBundle(0).srcType(0) := SrcType.fp
500      csBundle(0).srcType(1) := SrcType.imm
501      csBundle(0).lsrc(1) := 0.U
502      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
503      csBundle(0).fuType := FuType.f2v.U
504      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
505      csBundle(0).rfWen := false.B
506      csBundle(0).fpWen := false.B
507      csBundle(0).vecWen := true.B
508
509      for (i <- 0 until MAX_VLMUL / 2) {
510        csBundle(2 * i + 1).srcType(0) := SrcType.vp
511        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
512        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
513        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
514        csBundle(2 * i + 1).ldest := dest + (2 * i).U
515        csBundle(2 * i + 1).uopIdx := (2 * i).U
516        csBundle(2 * i + 2).srcType(0) := SrcType.vp
517        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
518        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
519        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
520        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
521        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
522      }
523    }
524    is(UopSplitType.VEC_WXV) {
525      /*
526      i to vector move
527       */
528      csBundle(0).srcType(0) := SrcType.reg
529      csBundle(0).srcType(1) := SrcType.imm
530      csBundle(0).lsrc(1) := 0.U
531      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
532      csBundle(0).fuType := FuType.i2v.U
533      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
534      csBundle(0).vecWen := true.B
535
536      for (i <- 0 until MAX_VLMUL / 2) {
537        csBundle(2 * i + 1).srcType(0) := SrcType.vp
538        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
539        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
540        csBundle(2 * i + 1).lsrc(2) := dest + i.U
541        csBundle(2 * i + 1).ldest := dest + i.U
542        csBundle(2 * i + 1).uopIdx := (2 * i).U
543        csBundle(2 * i + 2).srcType(0) := SrcType.vp
544        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
545        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
546        csBundle(2 * i + 2).lsrc(2) := dest + i.U
547        csBundle(2 * i + 2).ldest := dest + i.U
548        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
549      }
550    }
551    is(UopSplitType.VEC_VVM) {
552      csBundle(0).lsrc(2) := dest
553      csBundle(0).ldest := dest
554      csBundle(0).uopIdx := 0.U
555      for (i <- 1 until MAX_VLMUL) {
556        csBundle(i).lsrc(0) := src1 + i.U
557        csBundle(i).lsrc(1) := src2 + i.U
558        csBundle(i).lsrc(2) := dest
559        csBundle(i).ldest := dest
560        csBundle(i).uopIdx := i.U
561      }
562    }
563    is(UopSplitType.VEC_VFM) {
564      /*
565      f to vector move
566       */
567      csBundle(0).srcType(0) := SrcType.fp
568      csBundle(0).srcType(1) := SrcType.imm
569      csBundle(0).lsrc(1) := 0.U
570      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
571      csBundle(0).fuType := FuType.f2v.U
572      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
573      csBundle(0).rfWen := false.B
574      csBundle(0).fpWen := false.B
575      csBundle(0).vecWen := true.B
576      //LMUL
577      csBundle(1).srcType(0) := SrcType.vp
578      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
579      csBundle(1).lsrc(2) := dest
580      csBundle(1).ldest := dest
581      csBundle(1).uopIdx := 0.U
582      for (i <- 1 until MAX_VLMUL) {
583        csBundle(i + 1).srcType(0) := SrcType.vp
584        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
585        csBundle(i + 1).lsrc(1) := src2 + i.U
586        csBundle(i + 1).lsrc(2) := dest
587        csBundle(i + 1).ldest := dest
588        csBundle(i + 1).uopIdx := i.U
589      }
590      csBundle(numOfUop - 1.U).ldest := dest
591    }
592    is(UopSplitType.VEC_VXM) {
593      /*
594      i to vector move
595       */
596      csBundle(0).srcType(0) := SrcType.reg
597      csBundle(0).srcType(1) := SrcType.imm
598      csBundle(0).lsrc(1) := 0.U
599      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
600      csBundle(0).fuType := FuType.i2v.U
601      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
602      csBundle(0).vecWen := true.B
603      //LMUL
604      csBundle(1).srcType(0) := SrcType.vp
605      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
606      csBundle(1).lsrc(2) := dest
607      csBundle(1).ldest := dest
608      csBundle(1).uopIdx := 0.U
609      for (i <- 1 until MAX_VLMUL) {
610        csBundle(i + 1).srcType(0) := SrcType.vp
611        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
612        csBundle(i + 1).lsrc(1) := src2 + i.U
613        csBundle(i + 1).lsrc(2) := dest
614        csBundle(i + 1).ldest := dest
615        csBundle(i + 1).uopIdx := i.U
616      }
617      csBundle(numOfUop - 1.U).ldest := dest
618    }
619    is(UopSplitType.VEC_SLIDE1UP) {
620      /*
621      i to vector move
622       */
623      csBundle(0).srcType(0) := SrcType.reg
624      csBundle(0).srcType(1) := SrcType.imm
625      csBundle(0).lsrc(1) := 0.U
626      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
627      csBundle(0).fuType := FuType.i2v.U
628      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
629      csBundle(0).vecWen := true.B
630      //LMUL
631      csBundle(1).srcType(0) := SrcType.vp
632      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
633      csBundle(1).lsrc(2) := dest
634      csBundle(1).ldest := dest
635      csBundle(1).uopIdx := 0.U
636      for (i <- 1 until MAX_VLMUL) {
637        csBundle(i + 1).srcType(0) := SrcType.vp
638        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
639        csBundle(i + 1).lsrc(1) := src2 + i.U
640        csBundle(i + 1).lsrc(2) := dest + i.U
641        csBundle(i + 1).ldest := dest + i.U
642        csBundle(i + 1).uopIdx := i.U
643      }
644    }
645    is(UopSplitType.VEC_FSLIDE1UP) {
646      /*
647      i to vector move
648       */
649      csBundle(0).srcType(0) := SrcType.fp
650      csBundle(0).srcType(1) := SrcType.imm
651      csBundle(0).lsrc(1) := 0.U
652      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
653      csBundle(0).fuType := FuType.f2v.U
654      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
655      csBundle(0).rfWen := false.B
656      csBundle(0).fpWen := false.B
657      csBundle(0).vecWen := true.B
658      //LMUL
659      csBundle(1).srcType(0) := SrcType.vp
660      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
661      csBundle(1).lsrc(1) := src2
662      csBundle(1).lsrc(2) := dest
663      csBundle(1).ldest := dest
664      csBundle(1).uopIdx := 0.U
665      for (i <- 1 until MAX_VLMUL) {
666        csBundle(i + 1).srcType(0) := SrcType.vp
667        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
668        csBundle(i + 1).lsrc(1) := src2 + i.U
669        csBundle(i + 1).lsrc(2) := dest + i.U
670        csBundle(i + 1).ldest := dest + i.U
671        csBundle(i + 1).uopIdx := i.U
672      }
673    }
674    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
675      /*
676      i to vector move
677       */
678      csBundle(0).srcType(0) := SrcType.reg
679      csBundle(0).srcType(1) := SrcType.imm
680      csBundle(0).lsrc(1) := 0.U
681      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
682      csBundle(0).fuType := FuType.i2v.U
683      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
684      csBundle(0).vecWen := true.B
685      //LMUL
686      for (i <- 0 until MAX_VLMUL) {
687        csBundle(2 * i + 1).srcType(0) := SrcType.vp
688        csBundle(2 * i + 1).srcType(1) := SrcType.vp
689        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
690        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
691        csBundle(2 * i + 1).lsrc(2) := dest + i.U
692        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
693        csBundle(2 * i + 1).uopIdx := (2 * i).U
694        if (2 * i + 2 < MAX_VLMUL * 2) {
695          csBundle(2 * i + 2).srcType(0) := SrcType.vp
696          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
697          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
698          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
699          csBundle(2 * i + 2).ldest := dest + i.U
700          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
701        }
702      }
703      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
704      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
705      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
706    }
707    is(UopSplitType.VEC_FSLIDE1DOWN) {
708      /*
709      i to vector move
710       */
711      csBundle(0).srcType(0) := SrcType.fp
712      csBundle(0).srcType(1) := SrcType.imm
713      csBundle(0).lsrc(1) := 0.U
714      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
715      csBundle(0).fuType := FuType.f2v.U
716      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
717      csBundle(0).rfWen := false.B
718      csBundle(0).fpWen := false.B
719      csBundle(0).vecWen := true.B
720      //LMUL
721      for (i <- 0 until MAX_VLMUL) {
722        csBundle(2 * i + 1).srcType(0) := SrcType.vp
723        csBundle(2 * i + 1).srcType(1) := SrcType.vp
724        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
725        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
726        csBundle(2 * i + 1).lsrc(2) := dest + i.U
727        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
728        csBundle(2 * i + 1).uopIdx := (2 * i).U
729        if (2 * i + 2 < MAX_VLMUL * 2) {
730          csBundle(2 * i + 2).srcType(0) := SrcType.vp
731          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
732          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
733          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
734          csBundle(2 * i + 2).ldest := dest + i.U
735          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
736        }
737      }
738      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
739      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
740      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
741    }
742    is(UopSplitType.VEC_VRED) {
743      when(vlmulReg === "b001".U) {
744        csBundle(0).srcType(2) := SrcType.DC
745        csBundle(0).lsrc(0) := src2 + 1.U
746        csBundle(0).lsrc(1) := src2
747        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
748        csBundle(0).uopIdx := 0.U
749      }
750      when(vlmulReg === "b010".U) {
751        csBundle(0).srcType(2) := SrcType.DC
752        csBundle(0).lsrc(0) := src2 + 1.U
753        csBundle(0).lsrc(1) := src2
754        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
755        csBundle(0).uopIdx := 0.U
756
757        csBundle(1).srcType(2) := SrcType.DC
758        csBundle(1).lsrc(0) := src2 + 3.U
759        csBundle(1).lsrc(1) := src2 + 2.U
760        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
761        csBundle(1).uopIdx := 1.U
762
763        csBundle(2).srcType(2) := SrcType.DC
764        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
765        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
766        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
767        csBundle(2).uopIdx := 2.U
768      }
769      when(vlmulReg === "b011".U) {
770        for (i <- 0 until MAX_VLMUL) {
771          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
772            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
773            csBundle(i).lsrc(1) := src2 + (i * 2).U
774            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
775          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
776            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
777            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
778            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
779          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
780            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
781            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
782            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
783          }
784          csBundle(i).srcType(2) := SrcType.DC
785          csBundle(i).uopIdx := i.U
786        }
787      }
788      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
789        /*
790         * 2 <= vlmul <= 8
791         */
792        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
793        csBundle(numOfUop - 1.U).lsrc(0) := src1
794        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
795        csBundle(numOfUop - 1.U).lsrc(2) := dest
796        csBundle(numOfUop - 1.U).ldest := dest
797        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
798      }
799    }
800    is(UopSplitType.VEC_VFRED) {
801      val vlmul = vlmulReg
802      val vsew = vsewReg
803      when(vlmul === VLmul.m8){
804        for (i <- 0 until 4) {
805          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
806          csBundle(i).lsrc(1) := src2 + (i * 2).U
807          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
808          csBundle(i).uopIdx := i.U
809        }
810        for (i <- 4 until 6) {
811          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
812          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
813          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
814          csBundle(i).uopIdx := i.U
815        }
816        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
817        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
818        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
819        csBundle(6).uopIdx := 6.U
820        when(vsew === VSew.e64) {
821          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
822          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
823          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
824          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
825          csBundle(7).uopIdx := 7.U
826          csBundle(8).lsrc(0) := src1
827          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
828          csBundle(8).ldest := dest
829          csBundle(8).uopIdx := 8.U
830        }
831        when(vsew === VSew.e32) {
832          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
833          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
834          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
835          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
836          csBundle(7).uopIdx := 7.U
837          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
838          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
839          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
840          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
841          csBundle(8).uopIdx := 8.U
842          csBundle(9).lsrc(0) := src1
843          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
844          csBundle(9).ldest := dest
845          csBundle(9).uopIdx := 9.U
846        }
847        when(vsew === VSew.e16) {
848          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
849          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
850          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
851          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
852          csBundle(7).uopIdx := 7.U
853          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
854          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
855          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
856          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
857          csBundle(8).uopIdx := 8.U
858          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
859          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
860          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
861          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
862          csBundle(9).uopIdx := 9.U
863          csBundle(10).lsrc(0) := src1
864          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
865          csBundle(10).ldest := dest
866          csBundle(10).uopIdx := 10.U
867        }
868      }
869      when(vlmul === VLmul.m4) {
870        for (i <- 0 until 2) {
871          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
872          csBundle(i).lsrc(1) := src2 + (i * 2).U
873          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
874          csBundle(i).uopIdx := i.U
875        }
876        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
877        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
878        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
879        csBundle(2).uopIdx := 2.U
880        when(vsew === VSew.e64) {
881          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
882          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
883          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
884          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
885          csBundle(3).uopIdx := 3.U
886          csBundle(4).lsrc(0) := src1
887          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
888          csBundle(4).ldest := dest
889          csBundle(4).uopIdx := 4.U
890        }
891        when(vsew === VSew.e32) {
892          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
893          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
894          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
895          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
896          csBundle(3).uopIdx := 3.U
897          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
898          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
899          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
900          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
901          csBundle(4).uopIdx := 4.U
902          csBundle(5).lsrc(0) := src1
903          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
904          csBundle(5).ldest := dest
905          csBundle(5).uopIdx := 5.U
906        }
907        when(vsew === VSew.e16) {
908          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
909          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
910          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
911          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
912          csBundle(3).uopIdx := 3.U
913          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
914          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
915          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
916          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
917          csBundle(4).uopIdx := 4.U
918          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
919          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
920          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
921          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
922          csBundle(5).uopIdx := 5.U
923          csBundle(6).lsrc(0) := src1
924          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
925          csBundle(6).ldest := dest
926          csBundle(6).uopIdx := 6.U
927        }
928      }
929      when(vlmul === VLmul.m2) {
930        csBundle(0).lsrc(0) := src2 + 1.U
931        csBundle(0).lsrc(1) := src2 + 0.U
932        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
933        csBundle(0).uopIdx := 0.U
934        when(vsew === VSew.e64) {
935          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
936          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
937          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
938          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
939          csBundle(1).uopIdx := 1.U
940          csBundle(2).lsrc(0) := src1
941          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
942          csBundle(2).ldest := dest
943          csBundle(2).uopIdx := 2.U
944        }
945        when(vsew === VSew.e32) {
946          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
947          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
948          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
949          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
950          csBundle(1).uopIdx := 1.U
951          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
952          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
953          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
954          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
955          csBundle(2).uopIdx := 2.U
956          csBundle(3).lsrc(0) := src1
957          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
958          csBundle(3).ldest := dest
959          csBundle(3).uopIdx := 3.U
960        }
961        when(vsew === VSew.e16) {
962          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
963          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
964          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
965          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
966          csBundle(1).uopIdx := 1.U
967          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
968          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
969          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
970          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
971          csBundle(2).uopIdx := 2.U
972          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
973          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
974          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
975          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
976          csBundle(3).uopIdx := 3.U
977          csBundle(4).lsrc(0) := src1
978          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
979          csBundle(4).ldest := dest
980          csBundle(4).uopIdx := 4.U
981        }
982      }
983      when(vlmul === VLmul.m1) {
984        when(vsew === VSew.e64) {
985          csBundle(0).lsrc(0) := src2
986          csBundle(0).lsrc(1) := src2
987          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
988          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
989          csBundle(0).uopIdx := 0.U
990          csBundle(1).lsrc(0) := src1
991          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
992          csBundle(1).ldest := dest
993          csBundle(1).uopIdx := 1.U
994        }
995        when(vsew === VSew.e32) {
996          csBundle(0).lsrc(0) := src2
997          csBundle(0).lsrc(1) := src2
998          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
999          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1000          csBundle(0).uopIdx := 0.U
1001          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1002          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1003          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1004          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1005          csBundle(1).uopIdx := 1.U
1006          csBundle(2).lsrc(0) := src1
1007          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1008          csBundle(2).ldest := dest
1009          csBundle(2).uopIdx := 2.U
1010        }
1011        when(vsew === VSew.e16) {
1012          csBundle(0).lsrc(0) := src2
1013          csBundle(0).lsrc(1) := src2
1014          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1015          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1016          csBundle(0).uopIdx := 0.U
1017          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1018          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1019          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1020          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1021          csBundle(1).uopIdx := 1.U
1022          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1023          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1024          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1025          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1026          csBundle(2).uopIdx := 2.U
1027          csBundle(3).lsrc(0) := src1
1028          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1029          csBundle(3).ldest := dest
1030          csBundle(3).uopIdx := 3.U
1031        }
1032      }
1033      when(vlmul === VLmul.mf2) {
1034        when(vsew === VSew.e32) {
1035          csBundle(0).lsrc(0) := src2
1036          csBundle(0).lsrc(1) := src2
1037          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1038          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1039          csBundle(0).uopIdx := 0.U
1040          csBundle(1).lsrc(0) := src1
1041          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1042          csBundle(1).ldest := dest
1043          csBundle(1).uopIdx := 1.U
1044        }
1045        when(vsew === VSew.e16) {
1046          csBundle(0).lsrc(0) := src2
1047          csBundle(0).lsrc(1) := src2
1048          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1049          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1050          csBundle(0).uopIdx := 0.U
1051          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1052          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1053          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1054          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1055          csBundle(1).uopIdx := 1.U
1056          csBundle(2).lsrc(0) := src1
1057          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1058          csBundle(2).ldest := dest
1059          csBundle(2).uopIdx := 2.U
1060        }
1061      }
1062      when(vlmul === VLmul.mf4) {
1063        when(vsew === VSew.e16) {
1064          csBundle(0).lsrc(0) := src2
1065          csBundle(0).lsrc(1) := src2
1066          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1067          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1068          csBundle(0).uopIdx := 0.U
1069          csBundle(1).lsrc(0) := src1
1070          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1071          csBundle(1).ldest := dest
1072          csBundle(1).uopIdx := 1.U
1073        }
1074      }
1075    }
1076
1077    is(UopSplitType.VEC_VFREDOSUM) {
1078      import yunsuan.VfaluType
1079      val vlmul = vlmulReg
1080      val vsew = vsewReg
1081      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1082      when(vlmul === VLmul.m8) {
1083        when(vsew === VSew.e64) {
1084          val vlmax = 16
1085          for (i <- 0 until vlmax) {
1086            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1087            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1088            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1089            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1090            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1091            csBundle(i).uopIdx := i.U
1092          }
1093        }
1094        when(vsew === VSew.e32) {
1095          val vlmax = 32
1096          for (i <- 0 until vlmax) {
1097            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1098            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1099            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1100            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1101            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1102            csBundle(i).uopIdx := i.U
1103          }
1104        }
1105        when(vsew === VSew.e16) {
1106          val vlmax = 64
1107          for (i <- 0 until vlmax) {
1108            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1109            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1110            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1111            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1112            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1113            csBundle(i).uopIdx := i.U
1114          }
1115        }
1116      }
1117      when(vlmul === VLmul.m4) {
1118        when(vsew === VSew.e64) {
1119          val vlmax = 8
1120          for (i <- 0 until vlmax) {
1121            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1122            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1123            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1124            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1125            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1126            csBundle(i).uopIdx := i.U
1127          }
1128        }
1129        when(vsew === VSew.e32) {
1130          val vlmax = 16
1131          for (i <- 0 until vlmax) {
1132            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1133            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1134            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1135            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1136            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1137            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1138            csBundle(i).uopIdx := i.U
1139          }
1140        }
1141        when(vsew === VSew.e16) {
1142          val vlmax = 32
1143          for (i <- 0 until vlmax) {
1144            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1145            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1146            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1149            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1150            csBundle(i).uopIdx := i.U
1151          }
1152        }
1153      }
1154      when(vlmul === VLmul.m2) {
1155        when(vsew === VSew.e64) {
1156          val vlmax = 4
1157          for (i <- 0 until vlmax) {
1158            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1161            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1162            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1163            csBundle(i).uopIdx := i.U
1164          }
1165        }
1166        when(vsew === VSew.e32) {
1167          val vlmax = 8
1168          for (i <- 0 until vlmax) {
1169            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1170            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1172            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1174            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1175            csBundle(i).uopIdx := i.U
1176          }
1177        }
1178        when(vsew === VSew.e16) {
1179          val vlmax = 16
1180          for (i <- 0 until vlmax) {
1181            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1182            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1183            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1185            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1186            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1187            csBundle(i).uopIdx := i.U
1188          }
1189        }
1190      }
1191      when(vlmul === VLmul.m1) {
1192        when(vsew === VSew.e64) {
1193          val vlmax = 2
1194          for (i <- 0 until vlmax) {
1195            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1197            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1198            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1199            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1200            csBundle(i).uopIdx := i.U
1201          }
1202        }
1203        when(vsew === VSew.e32) {
1204          val vlmax = 4
1205          for (i <- 0 until vlmax) {
1206            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1207            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1208            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1209            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1211            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1212            csBundle(i).uopIdx := i.U
1213          }
1214        }
1215        when(vsew === VSew.e16) {
1216          val vlmax = 8
1217          for (i <- 0 until vlmax) {
1218            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1219            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1220            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1223            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1224            csBundle(i).uopIdx := i.U
1225          }
1226        }
1227      }
1228      when(vlmul === VLmul.mf2) {
1229        when(vsew === VSew.e32) {
1230          val vlmax = 2
1231          for (i <- 0 until vlmax) {
1232            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1235            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1236            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1237            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1238            csBundle(i).uopIdx := i.U
1239          }
1240        }
1241        when(vsew === VSew.e16) {
1242          val vlmax = 4
1243          for (i <- 0 until vlmax) {
1244            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1245            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1248            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1249            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1250            csBundle(i).uopIdx := i.U
1251          }
1252        }
1253      }
1254      when(vlmul === VLmul.mf4) {
1255        when(vsew === VSew.e16) {
1256          val vlmax = 2
1257          for (i <- 0 until vlmax) {
1258            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1261            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1262            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1263            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1264            csBundle(i).uopIdx := i.U
1265          }
1266        }
1267      }
1268    }
1269
1270    is(UopSplitType.VEC_SLIDEUP) {
1271      // i to vector move
1272      csBundle(0).srcType(0) := SrcType.reg
1273      csBundle(0).srcType(1) := SrcType.imm
1274      csBundle(0).lsrc(1) := 0.U
1275      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1276      csBundle(0).fuType := FuType.i2v.U
1277      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1278      csBundle(0).vecWen := true.B
1279      // LMUL
1280      for (i <- 0 until MAX_VLMUL)
1281        for (j <- 0 to i) {
1282          val old_vd = if (j == 0) {
1283            dest + i.U
1284          } else (VECTOR_TMP_REG_LMUL + j).U
1285          val vd = if (j == i) {
1286            dest + i.U
1287          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1288          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1289          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1290          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1291          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1292          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1293          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1294        }
1295    }
1296
1297    is(UopSplitType.VEC_SLIDEDOWN) {
1298      // i to vector move
1299      csBundle(0).srcType(0) := SrcType.reg
1300      csBundle(0).srcType(1) := SrcType.imm
1301      csBundle(0).lsrc(1) := 0.U
1302      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1303      csBundle(0).fuType := FuType.i2v.U
1304      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1305      csBundle(0).vecWen := true.B
1306      // LMUL
1307      for (i <- 0 until MAX_VLMUL)
1308        for (j <- (0 to i).reverse) {
1309          when(i.U < lmul) {
1310            val old_vd = if (j == 0) {
1311              dest + lmul - 1.U - i.U
1312            } else (VECTOR_TMP_REG_LMUL + j).U
1313            val vd = if (j == i) {
1314              dest + lmul - 1.U - i.U
1315            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1316            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1317            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1318            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1319            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1320            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1321            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1322          }
1323        }
1324    }
1325
1326    is(UopSplitType.VEC_M0X) {
1327      // LMUL
1328      for (i <- 0 until MAX_VLMUL) {
1329        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1330        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1331        csBundle(i).srcType(0) := srcType0
1332        csBundle(i).srcType(1) := SrcType.vp
1333        csBundle(i).rfWen := false.B
1334        csBundle(i).fpWen := false.B
1335        csBundle(i).vecWen := true.B
1336        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1337        csBundle(i).lsrc(1) := src2
1338        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1339        csBundle(i).ldest := ldest
1340        csBundle(i).uopIdx := i.U
1341      }
1342      csBundle(lmul - 1.U).rfWen := true.B
1343      csBundle(lmul - 1.U).fpWen := false.B
1344      csBundle(lmul - 1.U).vecWen := false.B
1345      csBundle(lmul - 1.U).ldest := dest
1346    }
1347
1348    is(UopSplitType.VEC_MVV) {
1349      // LMUL
1350      for (i <- 0 until MAX_VLMUL) {
1351        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1352        csBundle(i * 2 + 0).srcType(0) := srcType0
1353        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1354        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1355        csBundle(i * 2 + 0).lsrc(1) := src2
1356        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1357        csBundle(i * 2 + 0).ldest := dest + i.U
1358        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1359
1360        csBundle(i * 2 + 1).srcType(0) := srcType0
1361        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1362        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1363        csBundle(i * 2 + 1).lsrc(1) := src2
1364        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1365        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1366        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1367      }
1368    }
1369
1370    is(UopSplitType.VEC_M0X_VFIRST) {
1371      // LMUL
1372      csBundle(0).rfWen := true.B
1373      csBundle(0).fpWen := false.B
1374      csBundle(0).vecWen := false.B
1375      csBundle(0).ldest := dest
1376    }
1377    is(UopSplitType.VEC_VWW) {
1378      for (i <- 0 until MAX_VLMUL*2) {
1379        when(i.U < lmul){
1380          csBundle(i).srcType(2) := SrcType.DC
1381          csBundle(i).lsrc(0) := src2 + i.U
1382          csBundle(i).lsrc(1) := src2 + i.U
1383          // csBundle(i).lsrc(2) := dest + (2 * i).U
1384          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1385          csBundle(i).uopIdx :=  i.U
1386        } otherwise {
1387          csBundle(i).srcType(2) := SrcType.DC
1388          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1389          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1390          // csBundle(i).lsrc(2) := dest + (2 * i).U
1391          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1392          csBundle(i).uopIdx := i.U
1393        }
1394        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1395        csBundle(numOfUop-1.U).lsrc(0) := src1
1396        csBundle(numOfUop-1.U).lsrc(2) := dest
1397        csBundle(numOfUop-1.U).ldest := dest
1398      }
1399    }
1400    is(UopSplitType.VEC_RGATHER) {
1401      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1402        for (i <- 0 until len)
1403          for (j <- 0 until len) {
1404            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1405            // csBundle(i * len + j).srcType(1) := SrcType.vp
1406            // csBundle(i * len + j).srcType(2) := SrcType.vp
1407            csBundle(i * len + j).lsrc(0) := src1 + i.U
1408            csBundle(i * len + j).lsrc(1) := src2 + j.U
1409            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1410            csBundle(i * len + j).lsrc(2) := vd_old
1411            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1412            csBundle(i * len + j).ldest := vd
1413            csBundle(i * len + j).uopIdx := (i * len + j).U
1414          }
1415      }
1416      switch(vlmulReg) {
1417        is("b001".U ){
1418          genCsBundle_VEC_RGATHER(2)
1419        }
1420        is("b010".U ){
1421          genCsBundle_VEC_RGATHER(4)
1422        }
1423        is("b011".U ){
1424          genCsBundle_VEC_RGATHER(8)
1425        }
1426      }
1427    }
1428    is(UopSplitType.VEC_RGATHER_VX) {
1429      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1430        for (i <- 0 until len)
1431          for (j <- 0 until len) {
1432            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1433            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1434            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1435            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1436            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1437            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1438            csBundle(i * len + j + 1).lsrc(2) := vd_old
1439            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1440            csBundle(i * len + j + 1).ldest := vd
1441            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1442          }
1443      }
1444      // i to vector move
1445      csBundle(0).srcType(0) := SrcType.reg
1446      csBundle(0).srcType(1) := SrcType.imm
1447      csBundle(0).lsrc(1) := 0.U
1448      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1449      csBundle(0).fuType := FuType.i2v.U
1450      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1451      csBundle(0).rfWen := false.B
1452      csBundle(0).fpWen := false.B
1453      csBundle(0).vecWen := true.B
1454      genCsBundle_RGATHER_VX(1)
1455      switch(vlmulReg) {
1456        is("b001".U ){
1457          genCsBundle_RGATHER_VX(2)
1458        }
1459        is("b010".U ){
1460          genCsBundle_RGATHER_VX(4)
1461        }
1462        is("b011".U ){
1463          genCsBundle_RGATHER_VX(8)
1464        }
1465      }
1466    }
1467    is(UopSplitType.VEC_RGATHEREI16) {
1468      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1469        for (i <- 0 until len)
1470          for (j <- 0 until len) {
1471            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1472            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1473            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1474            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1475            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1476            csBundle((i * len + j)*2+0).ldest := vd0
1477            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1478            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1479            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1480            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1481            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1482            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1483            csBundle((i * len + j)*2+1).ldest := vd1
1484            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1485          }
1486      }
1487      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1488        for (i <- 0 until len)
1489          for (j <- 0 until len) {
1490            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1491            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1492            csBundle(i * len + j).lsrc(0) := src1 + i.U
1493            csBundle(i * len + j).lsrc(1) := src2 + j.U
1494            csBundle(i * len + j).lsrc(2) := vd_old
1495            csBundle(i * len + j).ldest := vd
1496            csBundle(i * len + j).uopIdx := (i * len + j).U
1497          }
1498      }
1499      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1500        for (i <- 0 until len)
1501          for (j <- 0 until len) {
1502            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1503            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1504            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1505            csBundle(i * len + j).lsrc(1) := src2 + j.U
1506            csBundle(i * len + j).lsrc(2) := vd_old
1507            csBundle(i * len + j).ldest := vd
1508            csBundle(i * len + j).uopIdx := (i * len + j).U
1509          }
1510      }
1511      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1512        for (i <- 0 until len)
1513          for (j <- 0 until len) {
1514            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1515            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1516            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1517            csBundle(i * len + j).lsrc(1) := src2 + j.U
1518            csBundle(i * len + j).lsrc(2) := vd_old
1519            csBundle(i * len + j).ldest := vd
1520            csBundle(i * len + j).uopIdx := (i * len + j).U
1521          }
1522      }
1523      when(!vsewReg.orR){
1524        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1525      }.elsewhen(vsewReg === VSew.e32){
1526        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1527      }.elsewhen(vsewReg === VSew.e64){
1528        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1529      }.otherwise{
1530        genCsBundle_VEC_RGATHEREI16(1)
1531      }
1532      switch(vlmulReg) {
1533        is("b001".U) {
1534          when(!vsewReg.orR) {
1535            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1536          }.elsewhen(vsewReg === VSew.e32){
1537            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1538          }.elsewhen(vsewReg === VSew.e64){
1539            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1540          }.otherwise{
1541            genCsBundle_VEC_RGATHEREI16(2)
1542          }
1543        }
1544        is("b010".U) {
1545          when(!vsewReg.orR) {
1546            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1547          }.elsewhen(vsewReg === VSew.e32){
1548            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1549          }.elsewhen(vsewReg === VSew.e64){
1550            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1551          }.otherwise{
1552            genCsBundle_VEC_RGATHEREI16(4)
1553          }
1554        }
1555        is("b011".U) {
1556          when(vsewReg === VSew.e32){
1557            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1558          }.elsewhen(vsewReg === VSew.e64){
1559            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1560          }.otherwise{
1561            genCsBundle_VEC_RGATHEREI16(8)
1562          }
1563        }
1564      }
1565    }
1566    is(UopSplitType.VEC_COMPRESS) {
1567      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1568        for (i <- 0 until len) {
1569          val jlen = if (i == len-1) i+1 else i+2
1570          for (j <- 0 until jlen) {
1571            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1572            val vd = if(i==len-1) (dest + j.U) else {
1573              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1574            }
1575            val src13Type = if (j == i+1) DontCare else SrcType.vp
1576            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1577            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1578            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1579            csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp
1580            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1581            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1582            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1583            csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1584            csBundle(i*(i+3)/2 + j).ldest := vd
1585            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1586          }
1587        }
1588      }
1589      switch(vlmulReg) {
1590        is("b001".U ){
1591          genCsBundle_VEC_COMPRESS(2)
1592        }
1593        is("b010".U ){
1594          genCsBundle_VEC_COMPRESS(4)
1595        }
1596        is("b011".U ){
1597          genCsBundle_VEC_COMPRESS(8)
1598        }
1599      }
1600    }
1601    is(UopSplitType.VEC_MVNR) {
1602      for (i <- 0 until MAX_VLMUL) {
1603        csBundle(i).lsrc(0) := src1 + i.U
1604        csBundle(i).lsrc(1) := src2 + i.U
1605        csBundle(i).lsrc(2) := dest + i.U
1606        csBundle(i).ldest := dest + i.U
1607        csBundle(i).uopIdx := i.U
1608      }
1609    }
1610    is(UopSplitType.VEC_US_LDST) {
1611      /*
1612      FMV.D.X
1613       */
1614      csBundle(0).srcType(0) := SrcType.reg
1615      csBundle(0).srcType(1) := SrcType.imm
1616      csBundle(0).lsrc(1) := 0.U
1617      csBundle(0).ldest := FP_TMP_REG_MV.U
1618      csBundle(0).fuType := FuType.i2v.U
1619      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1620      csBundle(0).rfWen := false.B
1621      csBundle(0).fpWen := true.B
1622      csBundle(0).vecWen := false.B
1623      csBundle(0).vlsInstr := true.B
1624      //LMUL
1625      for (i <- 0 until MAX_VLMUL) {
1626        csBundle(i + 1).srcType(0) := SrcType.fp
1627        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1628        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1629        csBundle(i + 1).ldest := dest + i.U
1630        csBundle(i + 1).uopIdx := i.U
1631        csBundle(i + 1).vlsInstr := true.B
1632      }
1633      csBundle.head.waitForward := isUsSegment
1634      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1635    }
1636    is(UopSplitType.VEC_S_LDST) {
1637      /*
1638      FMV.D.X
1639       */
1640      csBundle(0).srcType(0) := SrcType.reg
1641      csBundle(0).srcType(1) := SrcType.imm
1642      csBundle(0).lsrc(1) := 0.U
1643      csBundle(0).ldest := FP_TMP_REG_MV.U
1644      csBundle(0).fuType := FuType.i2v.U
1645      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1646      csBundle(0).rfWen := false.B
1647      csBundle(0).fpWen := true.B
1648      csBundle(0).vecWen := false.B
1649      csBundle(0).vlsInstr := true.B
1650
1651      csBundle(1).srcType(0) := SrcType.reg
1652      csBundle(1).srcType(1) := SrcType.imm
1653      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1654      csBundle(1).lsrc(1) := 0.U
1655      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1656      csBundle(1).fuType := FuType.i2v.U
1657      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1658      csBundle(1).rfWen := false.B
1659      csBundle(1).fpWen := true.B
1660      csBundle(1).vecWen := false.B
1661      csBundle(1).vlsInstr := true.B
1662
1663      //LMUL
1664      for (i <- 0 until MAX_VLMUL) {
1665        csBundle(i + 2).srcType(0) := SrcType.fp
1666        csBundle(i + 2).srcType(1) := SrcType.fp
1667        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1668        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1669        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1670        csBundle(i + 2).ldest := dest + i.U
1671        csBundle(i + 2).uopIdx := i.U
1672        csBundle(i + 2).vlsInstr := true.B
1673      }
1674      csBundle.head.waitForward := isSdSegment
1675      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1676    }
1677    is(UopSplitType.VEC_I_LDST) {
1678      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1679        for (i <- 0 until MAX_VLMUL) {
1680          val vecWen = if (i < lmul * nf) true.B else false.B
1681          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1682          csBundle(i + 1).srcType(0) := SrcType.fp
1683          csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1684          csBundle(i + 1).srcType(1) := SrcType.no
1685          csBundle(i + 1).lsrc(1) := src2 + i.U
1686          csBundle(i + 1).srcType(2) := src2Type
1687          csBundle(i + 1).lsrc(2) := dest + i.U
1688          csBundle(i + 1).ldest := dest + i.U
1689          csBundle(i + 1).rfWen := false.B
1690          csBundle(i + 1).fpWen := false.B
1691          csBundle(i + 1).vecWen := vecWen
1692          csBundle(i + 1).uopIdx := i.U
1693          csBundle(i + 1).vlsInstr := true.B
1694        }
1695      }
1696      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1697        for (i <- 0 until MAX_VLMUL) {
1698          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1699          csBundle(i + 1).srcType(1) := src1Type
1700          csBundle(i + 1).lsrc(1) := src2 + i.U
1701        }
1702      }
1703
1704      val vlmul = vlmulReg
1705      val vsew = Cat(0.U(1.W), vsewReg)
1706      val veew = Cat(0.U(1.W), width)
1707      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1708      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1709        "b001".U -> 1.U,
1710        "b010".U -> 2.U,
1711        "b011".U -> 3.U
1712      ))
1713      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1714        "b001".U -> 1.U,
1715        "b010".U -> 2.U,
1716        "b011".U -> 3.U
1717      ))
1718      csBundle(0).srcType(0) := SrcType.reg
1719      csBundle(0).srcType(1) := SrcType.imm
1720      csBundle(0).lsrc(1) := 0.U
1721      csBundle(0).ldest := FP_TMP_REG_MV.U
1722      csBundle(0).fuType := FuType.i2v.U
1723      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1724      csBundle(0).rfWen := false.B
1725      csBundle(0).fpWen := true.B
1726      csBundle(0).vecWen := false.B
1727      csBundle(0).vlsInstr := true.B
1728
1729      //LMUL
1730      when(nf === 0.U) {
1731        for (i <- 0 until MAX_VLMUL) {
1732          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1733          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1734          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1735          csBundle(i + 1).srcType(0) := SrcType.fp
1736          csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1737          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1738          csBundle(i + 1).srcType(2) := SrcType.vp
1739          // lsrc2 is old vd
1740          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1741          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1742          csBundle(i + 1).uopIdx := i.U
1743          csBundle(i + 1).vlsInstr := true.B
1744        }
1745      }.otherwise{
1746        // nf > 1, is segment indexed load/store
1747        // gen src0, vd
1748        switch(simple_lmul) {
1749          is(0.U) {
1750            switch(nf) {
1751              is(1.U) {
1752                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1753              }
1754              is(2.U) {
1755                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1756              }
1757              is(3.U) {
1758                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1759              }
1760              is(4.U) {
1761                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1762              }
1763              is(5.U) {
1764                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1765              }
1766              is(6.U) {
1767                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1768              }
1769              is(7.U) {
1770                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1771              }
1772            }
1773          }
1774          is(1.U) {
1775            switch(nf) {
1776              is(1.U) {
1777                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1778              }
1779              is(2.U) {
1780                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1781              }
1782              is(3.U) {
1783                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1784              }
1785            }
1786          }
1787          is(2.U) {
1788            switch(nf) {
1789              is(1.U) {
1790                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1791              }
1792            }
1793          }
1794        }
1795
1796        // gen src1
1797        switch(simple_emul) {
1798          is(0.U) {
1799            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1800          }
1801          is(1.U) {
1802            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1803          }
1804          is(2.U) {
1805            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1806          }
1807          is(3.U) {
1808            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1809          }
1810        }
1811
1812        // when is vstore instructions, not set vecwen
1813        when(isVstore) {
1814          for (i <- 0 until MAX_VLMUL) {
1815            csBundle(i + 1).vecWen := false.B
1816          }
1817        }
1818      }
1819      csBundle.head.waitForward := isIxSegment
1820      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1821    }
1822  }
1823
1824  //readyFromRename Counter
1825  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1826
1827  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1828  val thisAllOut = uopRes <= readyCounter
1829
1830  switch(state) {
1831    is(s_idle) {
1832      when (inValid) {
1833        stateNext := s_active
1834        uopResNext := inUopInfo.numOfUop
1835      }
1836    }
1837    is(s_active) {
1838      when (thisAllOut) {
1839        when (inValid) {
1840          stateNext := s_active
1841          uopResNext := inUopInfo.numOfUop
1842        }.otherwise {
1843          stateNext := s_idle
1844          uopResNext := 0.U
1845        }
1846      }.otherwise {
1847        stateNext := s_active
1848        uopResNext := uopRes - readyCounter
1849      }
1850    }
1851  }
1852
1853  state := Mux(io.redirect, s_idle, stateNext)
1854  uopRes := Mux(io.redirect, 0.U, uopResNext)
1855
1856  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1857
1858  for(i <- 0 until RenameWidth) {
1859    outValids(i) := complexNum > i.U
1860    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1861  }
1862
1863  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1864  inReady := state === s_idle || state === s_active && thisAllOut
1865
1866//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1867//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1868//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1869//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1870//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1871//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1872//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1873//
1874//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1875//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1876//    0.U)
1877//  validToRename.zipWithIndex.foreach{
1878//    case(dst, i) =>
1879//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1880//      dst := MuxCase(false.B, Seq(
1881//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1882//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1883//      ).toSeq)
1884//  }
1885//
1886//  readyToIBuf.zipWithIndex.foreach {
1887//    case (dst, i) =>
1888//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1889//      dst := MuxCase(true.B, Seq(
1890//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1891//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1892//      ).toSeq)
1893//  }
1894//
1895//  io.deq.decodedInsts := decodedInsts
1896//  io.deq.complexNum := complexNum
1897//  io.deq.validToRename := validToRename
1898//  io.deq.readyToIBuf := readyToIBuf
1899}
1900