xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision e03e0c5be8ed77bbaa66772cfdedc4d3e152a98a)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
82  val VECTOR_COMPRESS = 1 // in v0 regfile
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153
154  //Type of uop Div
155  val typeOfSplit = latchedInst.uopSplitType
156  val src1Type = latchedInst.srcType(0)
157  val src1IsImm = src1Type === SrcType.imm
158  val src1IsFp = src1Type === SrcType.fp
159
160  val isVstore = FuType.isVStore(latchedInst.fuType)
161
162  numOfUop := latchedUopInfo.numOfUop
163  numOfWB := latchedUopInfo.numOfWB
164
165  //uops dispatch
166  val s_idle :: s_active :: Nil = Enum(2)
167  val state = RegInit(s_idle)
168  val stateNext = WireDefault(state)
169  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
170  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
171  val uopResNext = WireInit(uopRes)
172  val e64 = 3.U(2.W)
173  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
174  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
175  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
176
177  //uop div up to maxUopSize
178  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
179  csBundle.foreach { case dst =>
180    dst := latchedInst
181    dst.numUops := latchedUopInfo.numOfUop
182    dst.numWB := latchedUopInfo.numOfWB
183    dst.firstUop := false.B
184    dst.lastUop := false.B
185    dst.vlsInstr := false.B
186  }
187
188  csBundle(0).firstUop := true.B
189  csBundle(numOfUop - 1.U).lastUop := true.B
190
191  switch(typeOfSplit) {
192    is(UopSplitType.VSET) {
193      // In simple decoder, rfWen and vecWen are not set
194      when(isVsetSimple) {
195        // Default
196        // uop0 set rd, never flushPipe
197        csBundle(0).fuType := FuType.vsetiwi.U
198        csBundle(0).flushPipe := false.B
199        csBundle(0).rfWen := true.B
200        // uop1 set vl, vsetvl will flushPipe
201        csBundle(1).ldest := Vl_IDX.U
202        csBundle(1).vecWen := false.B
203        csBundle(1).vlWen := true.B
204        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
205          // write nothing, uop0 is a nop instruction
206          csBundle(0).rfWen := false.B
207          csBundle(0).fpWen := false.B
208          csBundle(0).vecWen := false.B
209          csBundle(0).vlWen := false.B
210          csBundle(1).fuType := FuType.vsetfwf.U
211          csBundle(1).srcType(0) := SrcType.no
212          csBundle(1).srcType(2) := SrcType.no
213          csBundle(1).srcType(3) := SrcType.no
214          csBundle(1).srcType(4) := SrcType.vp
215          csBundle(1).lsrc(4) := Vl_IDX.U
216        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
217          // uop0: mv vtype gpr to vector region
218          csBundle(0).srcType(0) := SrcType.xp
219          csBundle(0).srcType(1) := SrcType.no
220          csBundle(0).lsrc(0) := src2
221          csBundle(0).lsrc(1) := 0.U
222          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
223          csBundle(0).fuType := FuType.i2v.U
224          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
225          csBundle(0).rfWen := false.B
226          csBundle(0).fpWen := false.B
227          csBundle(0).vecWen := true.B
228          csBundle(0).vlWen := false.B
229          // uop1: uvsetvcfg_vv
230          csBundle(1).fuType := FuType.vsetfwf.U
231          // vl
232          csBundle(1).srcType(0) := SrcType.no
233          csBundle(1).srcType(2) := SrcType.no
234          csBundle(1).srcType(3) := SrcType.no
235          csBundle(1).srcType(4) := SrcType.vp
236          csBundle(1).lsrc(4) := Vl_IDX.U
237          // vtype
238          csBundle(1).srcType(1) := SrcType.vp
239          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
240          csBundle(1).vecWen := false.B
241          csBundle(1).vlWen := true.B
242          csBundle(1).ldest := Vl_IDX.U
243        }.elsewhen(dest === 0.U) {
244          // write nothing, uop0 is a nop instruction
245          csBundle(0).rfWen := false.B
246          csBundle(0).fpWen := false.B
247          csBundle(0).vecWen := false.B
248          csBundle(0).vlWen := false.B
249        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) {
250          // because vsetvl may modified src2 when src2 == rd,
251          // we need to modify vd in second uop to avoid dependency
252          // uop0 set vl
253          csBundle(0).fuType := FuType.vsetiwf.U
254          csBundle(0).ldest := Vl_IDX.U
255          csBundle(0).rfWen := false.B
256          csBundle(0).vlWen := true.B
257          // uop1 set rd
258          csBundle(1).fuType := FuType.vsetiwi.U
259          csBundle(1).ldest := dest
260          csBundle(1).rfWen := true.B
261          csBundle(1).vlWen := false.B
262        }
263        // use bypass vtype from vtypeGen
264        csBundle(0).vpu.connectVType(io.vtypeBypass)
265        csBundle(1).vpu.connectVType(io.vtypeBypass)
266      }
267    }
268    is(UopSplitType.VEC_VVV) {
269      for (i <- 0 until MAX_VLMUL) {
270        csBundle(i).lsrc(0) := src1 + i.U
271        csBundle(i).lsrc(1) := src2 + i.U
272        csBundle(i).lsrc(2) := dest + i.U
273        csBundle(i).ldest := dest + i.U
274        csBundle(i).uopIdx := i.U
275      }
276    }
277    is(UopSplitType.VEC_VFV) {
278      /*
279      f to vector move
280       */
281      csBundle(0).srcType(0) := SrcType.fp
282      csBundle(0).srcType(1) := SrcType.imm
283      csBundle(0).srcType(2) := SrcType.imm
284      csBundle(0).lsrc(1) := 0.U
285      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
286      csBundle(0).fuType := FuType.f2v.U
287      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
288      csBundle(0).vecWen := true.B
289      csBundle(0).vpu.isReverse := false.B
290      /*
291      LMUL
292       */
293      for (i <- 0 until MAX_VLMUL) {
294        csBundle(i + 1).srcType(0) := SrcType.vp
295        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
296        csBundle(i + 1).lsrc(1) := src2 + i.U
297        csBundle(i + 1).lsrc(2) := dest + i.U
298        csBundle(i + 1).ldest := dest + i.U
299        csBundle(i + 1).uopIdx := i.U
300      }
301    }
302    is(UopSplitType.VEC_EXT2) {
303      for (i <- 0 until MAX_VLMUL / 2) {
304        csBundle(2 * i).lsrc(1) := src2 + i.U
305        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
306        csBundle(2 * i).ldest := dest + (2 * i).U
307        csBundle(2 * i).uopIdx := (2 * i).U
308        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
309        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
310        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
311        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
312      }
313    }
314    is(UopSplitType.VEC_EXT4) {
315      for (i <- 0 until MAX_VLMUL / 4) {
316        csBundle(4 * i).lsrc(1) := src2 + i.U
317        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
318        csBundle(4 * i).ldest := dest + (4 * i).U
319        csBundle(4 * i).uopIdx := (4 * i).U
320        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
321        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
322        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
323        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
324        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
325        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
326        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
327        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
328        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
329        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
330        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
331        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
332      }
333    }
334    is(UopSplitType.VEC_EXT8) {
335      for (i <- 0 until MAX_VLMUL) {
336        csBundle(i).lsrc(1) := src2
337        csBundle(i).lsrc(2) := dest + i.U
338        csBundle(i).ldest := dest + i.U
339        csBundle(i).uopIdx := i.U
340      }
341    }
342    is(UopSplitType.VEC_0XV) {
343      /*
344      i/f to vector move
345       */
346      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
347      csBundle(0).srcType(1) := SrcType.imm
348      csBundle(0).srcType(2) := SrcType.imm
349      csBundle(0).lsrc(1) := 0.U
350      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
351      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
352      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
353      csBundle(0).rfWen := false.B
354      csBundle(0).fpWen := false.B
355      csBundle(0).vecWen := true.B
356      /*
357      vmv.s.x
358       */
359      csBundle(1).srcType(0) := SrcType.vp
360      csBundle(1).srcType(1) := SrcType.imm
361      csBundle(1).srcType(2) := SrcType.vp
362      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
363      csBundle(1).lsrc(1) := 0.U
364      csBundle(1).lsrc(2) := dest
365      csBundle(1).ldest := dest
366      csBundle(1).rfWen := false.B
367      csBundle(1).fpWen := false.B
368      csBundle(1).vecWen := true.B
369      csBundle(1).uopIdx := 0.U
370    }
371    is(UopSplitType.VEC_VXV) {
372      /*
373      i to vector move
374       */
375      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
376      csBundle(0).srcType(1) := SrcType.imm
377      csBundle(0).srcType(2) := SrcType.imm
378      csBundle(0).lsrc(1) := 0.U
379      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
380      csBundle(0).fuType := FuType.i2v.U
381      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
382      csBundle(0).vecWen := true.B
383      csBundle(0).vpu.isReverse := false.B
384      /*
385      LMUL
386       */
387      for (i <- 0 until MAX_VLMUL) {
388        csBundle(i + 1).srcType(0) := SrcType.vp
389        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
390        csBundle(i + 1).lsrc(1) := src2 + i.U
391        csBundle(i + 1).lsrc(2) := dest + i.U
392        csBundle(i + 1).ldest := dest + i.U
393        csBundle(i + 1).uopIdx := i.U
394      }
395    }
396    is(UopSplitType.VEC_VVW) {
397      for (i <- 0 until MAX_VLMUL / 2) {
398        csBundle(2 * i).lsrc(0) := src1 + i.U
399        csBundle(2 * i).lsrc(1) := src2 + i.U
400        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
401        csBundle(2 * i).ldest := dest + (2 * i).U
402        csBundle(2 * i).uopIdx := (2 * i).U
403        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
404        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
405        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
406        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
407        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
408      }
409    }
410    is(UopSplitType.VEC_VFW) {
411      /*
412      f to vector move
413       */
414      csBundle(0).srcType(0) := SrcType.fp
415      csBundle(0).srcType(1) := SrcType.imm
416      csBundle(0).srcType(2) := SrcType.imm
417      csBundle(0).lsrc(1) := 0.U
418      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
419      csBundle(0).fuType := FuType.f2v.U
420      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
421      csBundle(0).rfWen := false.B
422      csBundle(0).fpWen := false.B
423      csBundle(0).vecWen := true.B
424
425      for (i <- 0 until MAX_VLMUL / 2) {
426        csBundle(2 * i + 1).srcType(0) := SrcType.vp
427        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
428        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
429        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
430        csBundle(2 * i + 1).ldest := dest + (2 * i).U
431        csBundle(2 * i + 1).uopIdx := (2 * i).U
432        csBundle(2 * i + 2).srcType(0) := SrcType.vp
433        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
434        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
435        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
436        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
437        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
438      }
439    }
440    is(UopSplitType.VEC_WVW) {
441      for (i <- 0 until MAX_VLMUL / 2) {
442        csBundle(2 * i).lsrc(0) := src1 + i.U
443        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
444        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
445        csBundle(2 * i).ldest := dest + (2 * i).U
446        csBundle(2 * i).uopIdx := (2 * i).U
447        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
448        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
449        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
450        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
451        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
452      }
453    }
454    is(UopSplitType.VEC_VXW) {
455      /*
456      i to vector move
457       */
458      csBundle(0).srcType(0) := SrcType.reg
459      csBundle(0).srcType(1) := SrcType.imm
460      csBundle(0).srcType(2) := SrcType.imm
461      csBundle(0).lsrc(1) := 0.U
462      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
463      csBundle(0).fuType := FuType.i2v.U
464      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
465      csBundle(0).vecWen := true.B
466
467      for (i <- 0 until MAX_VLMUL / 2) {
468        csBundle(2 * i + 1).srcType(0) := SrcType.vp
469        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
470        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
471        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
472        csBundle(2 * i + 1).ldest := dest + (2 * i).U
473        csBundle(2 * i + 1).uopIdx := (2 * i).U
474        csBundle(2 * i + 2).srcType(0) := SrcType.vp
475        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
476        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
477        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
478        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
479        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
480      }
481    }
482    is(UopSplitType.VEC_WXW) {
483      /*
484      i to vector move
485       */
486      csBundle(0).srcType(0) := SrcType.reg
487      csBundle(0).srcType(1) := SrcType.imm
488      csBundle(0).srcType(2) := SrcType.imm
489      csBundle(0).lsrc(1) := 0.U
490      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
491      csBundle(0).fuType := FuType.i2v.U
492      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
493      csBundle(0).vecWen := true.B
494
495      for (i <- 0 until MAX_VLMUL / 2) {
496        csBundle(2 * i + 1).srcType(0) := SrcType.vp
497        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
498        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
499        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
500        csBundle(2 * i + 1).ldest := dest + (2 * i).U
501        csBundle(2 * i + 1).uopIdx := (2 * i).U
502        csBundle(2 * i + 2).srcType(0) := SrcType.vp
503        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
504        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
505        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
506        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
507        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
508      }
509    }
510    is(UopSplitType.VEC_WVV) {
511      for (i <- 0 until MAX_VLMUL / 2) {
512
513        csBundle(2 * i).lsrc(0) := src1 + i.U
514        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
515        csBundle(2 * i).lsrc(2) := dest + i.U
516        csBundle(2 * i).ldest := dest + i.U
517        csBundle(2 * i).uopIdx := (2 * i).U
518        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
519        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
520        csBundle(2 * i + 1).lsrc(2) := dest + i.U
521        csBundle(2 * i + 1).ldest := dest + i.U
522        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
523      }
524    }
525    is(UopSplitType.VEC_WFW) {
526      /*
527      f to vector move
528       */
529      csBundle(0).srcType(0) := SrcType.fp
530      csBundle(0).srcType(1) := SrcType.imm
531      csBundle(0).srcType(2) := SrcType.imm
532      csBundle(0).lsrc(1) := 0.U
533      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
534      csBundle(0).fuType := FuType.f2v.U
535      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
536      csBundle(0).rfWen := false.B
537      csBundle(0).fpWen := false.B
538      csBundle(0).vecWen := true.B
539
540      for (i <- 0 until MAX_VLMUL / 2) {
541        csBundle(2 * i + 1).srcType(0) := SrcType.vp
542        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
543        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
544        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
545        csBundle(2 * i + 1).ldest := dest + (2 * i).U
546        csBundle(2 * i + 1).uopIdx := (2 * i).U
547        csBundle(2 * i + 2).srcType(0) := SrcType.vp
548        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
549        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
550        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
551        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
552        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
553      }
554    }
555    is(UopSplitType.VEC_WXV) {
556      /*
557      i to vector move
558       */
559      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
560      csBundle(0).srcType(1) := SrcType.imm
561      csBundle(0).srcType(2) := SrcType.imm
562      csBundle(0).lsrc(1) := 0.U
563      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
564      csBundle(0).fuType := FuType.i2v.U
565      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
566      csBundle(0).vecWen := true.B
567
568      for (i <- 0 until MAX_VLMUL / 2) {
569        csBundle(2 * i + 1).srcType(0) := SrcType.vp
570        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
571        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
572        csBundle(2 * i + 1).lsrc(2) := dest + i.U
573        csBundle(2 * i + 1).ldest := dest + i.U
574        csBundle(2 * i + 1).uopIdx := (2 * i).U
575        csBundle(2 * i + 2).srcType(0) := SrcType.vp
576        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
577        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
578        csBundle(2 * i + 2).lsrc(2) := dest + i.U
579        csBundle(2 * i + 2).ldest := dest + i.U
580        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
581      }
582    }
583    is(UopSplitType.VEC_VVM) {
584      csBundle(0).lsrc(2) := dest
585      csBundle(0).ldest := dest
586      csBundle(0).uopIdx := 0.U
587      for (i <- 1 until MAX_VLMUL) {
588        csBundle(i).lsrc(0) := src1 + i.U
589        csBundle(i).lsrc(1) := src2 + i.U
590        csBundle(i).lsrc(2) := dest
591        csBundle(i).ldest := dest
592        csBundle(i).uopIdx := i.U
593      }
594    }
595    is(UopSplitType.VEC_VFM) {
596      /*
597      f to vector move
598       */
599      csBundle(0).srcType(0) := SrcType.fp
600      csBundle(0).srcType(1) := SrcType.imm
601      csBundle(0).srcType(2) := SrcType.imm
602      csBundle(0).lsrc(1) := 0.U
603      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
604      csBundle(0).fuType := FuType.f2v.U
605      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
606      csBundle(0).rfWen := false.B
607      csBundle(0).fpWen := false.B
608      csBundle(0).vecWen := true.B
609      //LMUL
610      csBundle(1).srcType(0) := SrcType.vp
611      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
612      csBundle(1).lsrc(2) := dest
613      csBundle(1).ldest := dest
614      csBundle(1).uopIdx := 0.U
615      for (i <- 1 until MAX_VLMUL) {
616        csBundle(i + 1).srcType(0) := SrcType.vp
617        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
618        csBundle(i + 1).lsrc(1) := src2 + i.U
619        csBundle(i + 1).lsrc(2) := dest
620        csBundle(i + 1).ldest := dest
621        csBundle(i + 1).uopIdx := i.U
622      }
623      csBundle(numOfUop - 1.U).ldest := dest
624    }
625    is(UopSplitType.VEC_VXM) {
626      /*
627      i to vector move
628       */
629      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
630      csBundle(0).srcType(1) := SrcType.imm
631      csBundle(0).srcType(2) := SrcType.imm
632      csBundle(0).lsrc(1) := 0.U
633      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
634      csBundle(0).fuType := FuType.i2v.U
635      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
636      csBundle(0).vecWen := true.B
637      //LMUL
638      csBundle(1).srcType(0) := SrcType.vp
639      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
640      csBundle(1).lsrc(2) := dest
641      csBundle(1).ldest := dest
642      csBundle(1).uopIdx := 0.U
643      for (i <- 1 until MAX_VLMUL) {
644        csBundle(i + 1).srcType(0) := SrcType.vp
645        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
646        csBundle(i + 1).lsrc(1) := src2 + i.U
647        csBundle(i + 1).lsrc(2) := dest
648        csBundle(i + 1).ldest := dest
649        csBundle(i + 1).uopIdx := i.U
650      }
651      csBundle(numOfUop - 1.U).ldest := dest
652    }
653    is(UopSplitType.VEC_SLIDE1UP) {
654      /*
655      i to vector move
656       */
657      csBundle(0).srcType(0) := SrcType.reg
658      csBundle(0).srcType(1) := SrcType.imm
659      csBundle(0).srcType(2) := SrcType.imm
660      csBundle(0).lsrc(1) := 0.U
661      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
662      csBundle(0).fuType := FuType.i2v.U
663      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
664      csBundle(0).vecWen := true.B
665      //LMUL
666      csBundle(1).srcType(0) := SrcType.vp
667      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
668      csBundle(1).lsrc(2) := dest
669      csBundle(1).ldest := dest
670      csBundle(1).uopIdx := 0.U
671      for (i <- 1 until MAX_VLMUL) {
672        csBundle(i + 1).srcType(0) := SrcType.vp
673        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
674        csBundle(i + 1).lsrc(1) := src2 + i.U
675        csBundle(i + 1).lsrc(2) := dest + i.U
676        csBundle(i + 1).ldest := dest + i.U
677        csBundle(i + 1).uopIdx := i.U
678      }
679    }
680    is(UopSplitType.VEC_FSLIDE1UP) {
681      /*
682      f to vector move
683       */
684      csBundle(0).srcType(0) := SrcType.fp
685      csBundle(0).srcType(1) := SrcType.imm
686      csBundle(0).srcType(2) := SrcType.imm
687      csBundle(0).lsrc(1) := 0.U
688      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
689      csBundle(0).fuType := FuType.f2v.U
690      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
691      csBundle(0).rfWen := false.B
692      csBundle(0).fpWen := false.B
693      csBundle(0).vecWen := true.B
694      //LMUL
695      csBundle(1).srcType(0) := SrcType.vp
696      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
697      csBundle(1).lsrc(1) := src2
698      csBundle(1).lsrc(2) := dest
699      csBundle(1).ldest := dest
700      csBundle(1).uopIdx := 0.U
701      for (i <- 1 until MAX_VLMUL) {
702        csBundle(i + 1).srcType(0) := SrcType.vp
703        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
704        csBundle(i + 1).lsrc(1) := src2 + i.U
705        csBundle(i + 1).lsrc(2) := dest + i.U
706        csBundle(i + 1).ldest := dest + i.U
707        csBundle(i + 1).uopIdx := i.U
708      }
709    }
710    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
711      /*
712      i to vector move
713       */
714      csBundle(0).srcType(0) := SrcType.reg
715      csBundle(0).srcType(1) := SrcType.imm
716      csBundle(0).srcType(2) := SrcType.imm
717      csBundle(0).lsrc(1) := 0.U
718      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
719      csBundle(0).fuType := FuType.i2v.U
720      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
721      csBundle(0).vecWen := true.B
722      //LMUL
723      for (i <- 0 until MAX_VLMUL) {
724        csBundle(2 * i + 1).srcType(0) := SrcType.vp
725        csBundle(2 * i + 1).srcType(1) := SrcType.vp
726        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
727        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
728        csBundle(2 * i + 1).lsrc(2) := dest + i.U
729        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
730        csBundle(2 * i + 1).uopIdx := (2 * i).U
731        if (2 * i + 2 < MAX_VLMUL * 2) {
732          csBundle(2 * i + 2).srcType(0) := SrcType.vp
733          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
734          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
735          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
736          csBundle(2 * i + 2).ldest := dest + i.U
737          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
738        }
739      }
740      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
741      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
742      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
743    }
744    is(UopSplitType.VEC_FSLIDE1DOWN) {
745      /*
746      f to vector move
747       */
748      csBundle(0).srcType(0) := SrcType.fp
749      csBundle(0).srcType(1) := SrcType.imm
750      csBundle(0).srcType(2) := SrcType.imm
751      csBundle(0).lsrc(1) := 0.U
752      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
753      csBundle(0).fuType := FuType.f2v.U
754      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
755      csBundle(0).rfWen := false.B
756      csBundle(0).fpWen := false.B
757      csBundle(0).vecWen := true.B
758      //LMUL
759      for (i <- 0 until MAX_VLMUL) {
760        csBundle(2 * i + 1).srcType(0) := SrcType.vp
761        csBundle(2 * i + 1).srcType(1) := SrcType.vp
762        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
763        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
764        csBundle(2 * i + 1).lsrc(2) := dest + i.U
765        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
766        csBundle(2 * i + 1).uopIdx := (2 * i).U
767        if (2 * i + 2 < MAX_VLMUL * 2) {
768          csBundle(2 * i + 2).srcType(0) := SrcType.vp
769          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
770          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
771          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
772          csBundle(2 * i + 2).ldest := dest + i.U
773          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
774        }
775      }
776      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
777      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
778      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
779    }
780    is(UopSplitType.VEC_VRED) {
781      when(vlmulReg === "b001".U) {
782        csBundle(0).srcType(2) := SrcType.DC
783        csBundle(0).lsrc(0) := src2 + 1.U
784        csBundle(0).lsrc(1) := src2
785        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
786        csBundle(0).uopIdx := 0.U
787      }
788      when(vlmulReg === "b010".U) {
789        csBundle(0).srcType(2) := SrcType.DC
790        csBundle(0).lsrc(0) := src2 + 1.U
791        csBundle(0).lsrc(1) := src2
792        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
793        csBundle(0).uopIdx := 0.U
794
795        csBundle(1).srcType(2) := SrcType.DC
796        csBundle(1).lsrc(0) := src2 + 3.U
797        csBundle(1).lsrc(1) := src2 + 2.U
798        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
799        csBundle(1).uopIdx := 1.U
800
801        csBundle(2).srcType(2) := SrcType.DC
802        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
803        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
804        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
805        csBundle(2).uopIdx := 2.U
806      }
807      when(vlmulReg === "b011".U) {
808        for (i <- 0 until MAX_VLMUL) {
809          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
810            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
811            csBundle(i).lsrc(1) := src2 + (i * 2).U
812            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
813          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
814            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
815            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
816            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
817          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
818            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
819            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
820            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
821          }
822          csBundle(i).srcType(2) := SrcType.DC
823          csBundle(i).uopIdx := i.U
824        }
825      }
826      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
827        /*
828         * 2 <= vlmul <= 8
829         */
830        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
831        csBundle(numOfUop - 1.U).lsrc(0) := src1
832        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
833        csBundle(numOfUop - 1.U).lsrc(2) := dest
834        csBundle(numOfUop - 1.U).ldest := dest
835        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
836      }
837    }
838    is(UopSplitType.VEC_VFRED) {
839      val vlmul = vlmulReg
840      val vsew = vsewReg
841      when(vlmul === VLmul.m8){
842        for (i <- 0 until 4) {
843          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
844          csBundle(i).lsrc(1) := src2 + (i * 2).U
845          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
846          csBundle(i).uopIdx := i.U
847        }
848        for (i <- 4 until 6) {
849          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
850          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
851          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
852          csBundle(i).uopIdx := i.U
853        }
854        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
855        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
856        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
857        csBundle(6).uopIdx := 6.U
858        when(vsew === VSew.e64) {
859          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
860          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
861          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
862          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
863          csBundle(7).uopIdx := 7.U
864          csBundle(8).lsrc(0) := src1
865          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
866          csBundle(8).ldest := dest
867          csBundle(8).uopIdx := 8.U
868        }
869        when(vsew === VSew.e32) {
870          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
871          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
872          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
873          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
874          csBundle(7).uopIdx := 7.U
875          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
876          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
877          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
878          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
879          csBundle(8).uopIdx := 8.U
880          csBundle(9).lsrc(0) := src1
881          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
882          csBundle(9).ldest := dest
883          csBundle(9).uopIdx := 9.U
884        }
885        when(vsew === VSew.e16) {
886          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
887          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
888          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
889          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
890          csBundle(7).uopIdx := 7.U
891          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
892          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
893          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
894          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
895          csBundle(8).uopIdx := 8.U
896          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
897          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
898          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
899          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
900          csBundle(9).uopIdx := 9.U
901          csBundle(10).lsrc(0) := src1
902          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
903          csBundle(10).ldest := dest
904          csBundle(10).uopIdx := 10.U
905        }
906      }
907      when(vlmul === VLmul.m4) {
908        for (i <- 0 until 2) {
909          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
910          csBundle(i).lsrc(1) := src2 + (i * 2).U
911          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
912          csBundle(i).uopIdx := i.U
913        }
914        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
915        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
916        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
917        csBundle(2).uopIdx := 2.U
918        when(vsew === VSew.e64) {
919          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
920          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
921          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
922          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
923          csBundle(3).uopIdx := 3.U
924          csBundle(4).lsrc(0) := src1
925          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
926          csBundle(4).ldest := dest
927          csBundle(4).uopIdx := 4.U
928        }
929        when(vsew === VSew.e32) {
930          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
931          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
932          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
933          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
934          csBundle(3).uopIdx := 3.U
935          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
936          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
937          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
938          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
939          csBundle(4).uopIdx := 4.U
940          csBundle(5).lsrc(0) := src1
941          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
942          csBundle(5).ldest := dest
943          csBundle(5).uopIdx := 5.U
944        }
945        when(vsew === VSew.e16) {
946          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
947          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
948          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
949          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
950          csBundle(3).uopIdx := 3.U
951          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
952          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
953          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
954          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
955          csBundle(4).uopIdx := 4.U
956          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
957          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
958          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
959          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
960          csBundle(5).uopIdx := 5.U
961          csBundle(6).lsrc(0) := src1
962          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
963          csBundle(6).ldest := dest
964          csBundle(6).uopIdx := 6.U
965        }
966      }
967      when(vlmul === VLmul.m2) {
968        csBundle(0).lsrc(0) := src2 + 1.U
969        csBundle(0).lsrc(1) := src2 + 0.U
970        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
971        csBundle(0).uopIdx := 0.U
972        when(vsew === VSew.e64) {
973          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
974          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
975          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
976          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
977          csBundle(1).uopIdx := 1.U
978          csBundle(2).lsrc(0) := src1
979          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
980          csBundle(2).ldest := dest
981          csBundle(2).uopIdx := 2.U
982        }
983        when(vsew === VSew.e32) {
984          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
985          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
986          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
987          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
988          csBundle(1).uopIdx := 1.U
989          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
990          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
991          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
992          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
993          csBundle(2).uopIdx := 2.U
994          csBundle(3).lsrc(0) := src1
995          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
996          csBundle(3).ldest := dest
997          csBundle(3).uopIdx := 3.U
998        }
999        when(vsew === VSew.e16) {
1000          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1001          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1002          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1003          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1004          csBundle(1).uopIdx := 1.U
1005          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1006          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1007          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1008          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1009          csBundle(2).uopIdx := 2.U
1010          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1011          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1012          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1013          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
1014          csBundle(3).uopIdx := 3.U
1015          csBundle(4).lsrc(0) := src1
1016          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1017          csBundle(4).ldest := dest
1018          csBundle(4).uopIdx := 4.U
1019        }
1020      }
1021      when(vlmul === VLmul.m1) {
1022        when(vsew === VSew.e64) {
1023          csBundle(0).lsrc(0) := src2
1024          csBundle(0).lsrc(1) := src2
1025          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1026          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1027          csBundle(0).uopIdx := 0.U
1028          csBundle(1).lsrc(0) := src1
1029          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1030          csBundle(1).ldest := dest
1031          csBundle(1).uopIdx := 1.U
1032        }
1033        when(vsew === VSew.e32) {
1034          csBundle(0).lsrc(0) := src2
1035          csBundle(0).lsrc(1) := src2
1036          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1037          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1038          csBundle(0).uopIdx := 0.U
1039          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1040          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1041          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1042          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1043          csBundle(1).uopIdx := 1.U
1044          csBundle(2).lsrc(0) := src1
1045          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1046          csBundle(2).ldest := dest
1047          csBundle(2).uopIdx := 2.U
1048        }
1049        when(vsew === VSew.e16) {
1050          csBundle(0).lsrc(0) := src2
1051          csBundle(0).lsrc(1) := src2
1052          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1053          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1054          csBundle(0).uopIdx := 0.U
1055          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1056          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1057          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1058          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1059          csBundle(1).uopIdx := 1.U
1060          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1061          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1062          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1063          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1064          csBundle(2).uopIdx := 2.U
1065          csBundle(3).lsrc(0) := src1
1066          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1067          csBundle(3).ldest := dest
1068          csBundle(3).uopIdx := 3.U
1069        }
1070      }
1071      when(vlmul === VLmul.mf2) {
1072        when(vsew === VSew.e32) {
1073          csBundle(0).lsrc(0) := src2
1074          csBundle(0).lsrc(1) := src2
1075          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1076          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1077          csBundle(0).uopIdx := 0.U
1078          csBundle(1).lsrc(0) := src1
1079          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1080          csBundle(1).ldest := dest
1081          csBundle(1).uopIdx := 1.U
1082        }
1083        when(vsew === VSew.e16) {
1084          csBundle(0).lsrc(0) := src2
1085          csBundle(0).lsrc(1) := src2
1086          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1087          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1088          csBundle(0).uopIdx := 0.U
1089          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1090          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1091          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1092          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1093          csBundle(1).uopIdx := 1.U
1094          csBundle(2).lsrc(0) := src1
1095          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1096          csBundle(2).ldest := dest
1097          csBundle(2).uopIdx := 2.U
1098        }
1099      }
1100      when(vlmul === VLmul.mf4) {
1101        when(vsew === VSew.e16) {
1102          csBundle(0).lsrc(0) := src2
1103          csBundle(0).lsrc(1) := src2
1104          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1105          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1106          csBundle(0).uopIdx := 0.U
1107          csBundle(1).lsrc(0) := src1
1108          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1109          csBundle(1).ldest := dest
1110          csBundle(1).uopIdx := 1.U
1111        }
1112      }
1113    }
1114
1115    is(UopSplitType.VEC_VFREDOSUM) {
1116      import yunsuan.VfaluType
1117      val vlmul = vlmulReg
1118      val vsew = vsewReg
1119      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1120      when(vlmul === VLmul.m8) {
1121        when(vsew === VSew.e64) {
1122          val vlmax = 16
1123          for (i <- 0 until vlmax) {
1124            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1125            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1126            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1127            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1128            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1129            csBundle(i).uopIdx := i.U
1130          }
1131        }
1132        when(vsew === VSew.e32) {
1133          val vlmax = 32
1134          for (i <- 0 until vlmax) {
1135            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1136            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1137            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1138            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1139            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1140            csBundle(i).uopIdx := i.U
1141          }
1142        }
1143        when(vsew === VSew.e16) {
1144          val vlmax = 64
1145          for (i <- 0 until vlmax) {
1146            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1149            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1150            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1151            csBundle(i).uopIdx := i.U
1152          }
1153        }
1154      }
1155      when(vlmul === VLmul.m4) {
1156        when(vsew === VSew.e64) {
1157          val vlmax = 8
1158          for (i <- 0 until vlmax) {
1159            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1161            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1162            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1163            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1164            csBundle(i).uopIdx := i.U
1165          }
1166        }
1167        when(vsew === VSew.e32) {
1168          val vlmax = 16
1169          for (i <- 0 until vlmax) {
1170            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1172            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1174            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1175            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1176            csBundle(i).uopIdx := i.U
1177          }
1178        }
1179        when(vsew === VSew.e16) {
1180          val vlmax = 32
1181          for (i <- 0 until vlmax) {
1182            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1183            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1185            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1186            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1187            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1188            csBundle(i).uopIdx := i.U
1189          }
1190        }
1191      }
1192      when(vlmul === VLmul.m2) {
1193        when(vsew === VSew.e64) {
1194          val vlmax = 4
1195          for (i <- 0 until vlmax) {
1196            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1197            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1198            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1199            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1200            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1201            csBundle(i).uopIdx := i.U
1202          }
1203        }
1204        when(vsew === VSew.e32) {
1205          val vlmax = 8
1206          for (i <- 0 until vlmax) {
1207            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1208            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1209            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1211            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1212            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1213            csBundle(i).uopIdx := i.U
1214          }
1215        }
1216        when(vsew === VSew.e16) {
1217          val vlmax = 16
1218          for (i <- 0 until vlmax) {
1219            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1220            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1223            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1224            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1225            csBundle(i).uopIdx := i.U
1226          }
1227        }
1228      }
1229      when(vlmul === VLmul.m1) {
1230        when(vsew === VSew.e64) {
1231          val vlmax = 2
1232          for (i <- 0 until vlmax) {
1233            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1235            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1236            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1237            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1238            csBundle(i).uopIdx := i.U
1239          }
1240        }
1241        when(vsew === VSew.e32) {
1242          val vlmax = 4
1243          for (i <- 0 until vlmax) {
1244            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1245            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1248            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1249            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1250            csBundle(i).uopIdx := i.U
1251          }
1252        }
1253        when(vsew === VSew.e16) {
1254          val vlmax = 8
1255          for (i <- 0 until vlmax) {
1256            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1257            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1261            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1262            csBundle(i).uopIdx := i.U
1263          }
1264        }
1265      }
1266      when(vlmul === VLmul.mf2) {
1267        when(vsew === VSew.e32) {
1268          val vlmax = 2
1269          for (i <- 0 until vlmax) {
1270            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1271            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1272            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1273            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1274            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1275            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1276            csBundle(i).uopIdx := i.U
1277          }
1278        }
1279        when(vsew === VSew.e16) {
1280          val vlmax = 4
1281          for (i <- 0 until vlmax) {
1282            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1283            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1284            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1285            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1286            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1287            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1288            csBundle(i).uopIdx := i.U
1289          }
1290        }
1291      }
1292      when(vlmul === VLmul.mf4) {
1293        when(vsew === VSew.e16) {
1294          val vlmax = 2
1295          for (i <- 0 until vlmax) {
1296            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1297            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1298            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1299            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1300            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1301            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1302            csBundle(i).uopIdx := i.U
1303          }
1304        }
1305      }
1306    }
1307
1308    is(UopSplitType.VEC_SLIDEUP) {
1309      // i to vector move
1310      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1311      csBundle(0).srcType(1) := SrcType.imm
1312      csBundle(0).srcType(2) := SrcType.imm
1313      csBundle(0).lsrc(1) := 0.U
1314      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1315      csBundle(0).fuType := FuType.i2v.U
1316      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1317      csBundle(0).vecWen := true.B
1318      // LMUL
1319      for (i <- 0 until MAX_VLMUL)
1320        for (j <- 0 to i) {
1321          val old_vd = if (j == 0) {
1322            dest + i.U
1323          } else (VECTOR_TMP_REG_LMUL + j).U
1324          val vd = if (j == i) {
1325            dest + i.U
1326          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1327          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1328          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1329          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1330          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1331          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1332          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1333        }
1334    }
1335
1336    is(UopSplitType.VEC_SLIDEDOWN) {
1337      // i to vector move
1338      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1339      csBundle(0).srcType(1) := SrcType.imm
1340      csBundle(0).srcType(2) := SrcType.imm
1341      csBundle(0).lsrc(1) := 0.U
1342      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1343      csBundle(0).fuType := FuType.i2v.U
1344      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1345      csBundle(0).vecWen := true.B
1346      // LMUL
1347      for (i <- 0 until MAX_VLMUL)
1348        for (j <- (0 to i).reverse) {
1349          when(i.U < lmul) {
1350            val old_vd = if (j == 0) {
1351              dest + lmul - 1.U - i.U
1352            } else (VECTOR_TMP_REG_LMUL + j).U
1353            val vd = if (j == i) {
1354              dest + lmul - 1.U - i.U
1355            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1356            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1357            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1358            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1359            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1360            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1361            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1362          }
1363        }
1364    }
1365
1366    is(UopSplitType.VEC_M0X) {
1367      // LMUL
1368      for (i <- 0 until MAX_VLMUL) {
1369        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1370        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1371        csBundle(i).srcType(0) := srcType0
1372        csBundle(i).srcType(1) := SrcType.vp
1373        csBundle(i).rfWen := false.B
1374        csBundle(i).fpWen := false.B
1375        csBundle(i).vecWen := true.B
1376        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1377        csBundle(i).lsrc(1) := src2
1378        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1379        csBundle(i).ldest := ldest
1380        csBundle(i).uopIdx := i.U
1381      }
1382      csBundle(lmul - 1.U).rfWen := true.B
1383      csBundle(lmul - 1.U).fpWen := false.B
1384      csBundle(lmul - 1.U).vecWen := false.B
1385      csBundle(lmul - 1.U).ldest := dest
1386    }
1387
1388    is(UopSplitType.VEC_MVV) {
1389      // LMUL
1390      for (i <- 0 until MAX_VLMUL) {
1391        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1392        csBundle(i * 2 + 0).srcType(0) := srcType0
1393        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1394        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1395        csBundle(i * 2 + 0).lsrc(1) := src2
1396        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1397        csBundle(i * 2 + 0).ldest := dest + i.U
1398        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1399
1400        csBundle(i * 2 + 1).srcType(0) := srcType0
1401        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1402        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1403        csBundle(i * 2 + 1).lsrc(1) := src2
1404        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1405        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1406        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1407      }
1408    }
1409
1410    is(UopSplitType.VEC_M0X_VFIRST) {
1411      // LMUL
1412      csBundle(0).rfWen := true.B
1413      csBundle(0).fpWen := false.B
1414      csBundle(0).vecWen := false.B
1415      csBundle(0).ldest := dest
1416    }
1417    is(UopSplitType.VEC_VWW) {
1418      for (i <- 0 until MAX_VLMUL*2) {
1419        when(i.U < lmul){
1420          csBundle(i).srcType(2) := SrcType.DC
1421          csBundle(i).lsrc(0) := src2 + i.U
1422          csBundle(i).lsrc(1) := src2 + i.U
1423          // csBundle(i).lsrc(2) := dest + (2 * i).U
1424          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1425          csBundle(i).uopIdx :=  i.U
1426        } otherwise {
1427          csBundle(i).srcType(2) := SrcType.DC
1428          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1429          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1430          // csBundle(i).lsrc(2) := dest + (2 * i).U
1431          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1432          csBundle(i).uopIdx := i.U
1433        }
1434        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1435        csBundle(numOfUop-1.U).lsrc(0) := src1
1436        csBundle(numOfUop-1.U).lsrc(2) := dest
1437        csBundle(numOfUop-1.U).ldest := dest
1438      }
1439    }
1440    is(UopSplitType.VEC_RGATHER) {
1441      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1442        for (i <- 0 until len)
1443          for (j <- 0 until len) {
1444            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1445            // csBundle(i * len + j).srcType(1) := SrcType.vp
1446            // csBundle(i * len + j).srcType(2) := SrcType.vp
1447            csBundle(i * len + j).lsrc(0) := src1 + i.U
1448            csBundle(i * len + j).lsrc(1) := src2 + j.U
1449            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1450            csBundle(i * len + j).lsrc(2) := vd_old
1451            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1452            csBundle(i * len + j).ldest := vd
1453            csBundle(i * len + j).uopIdx := (i * len + j).U
1454          }
1455      }
1456      switch(vlmulReg) {
1457        is("b001".U ){
1458          genCsBundle_VEC_RGATHER(2)
1459        }
1460        is("b010".U ){
1461          genCsBundle_VEC_RGATHER(4)
1462        }
1463        is("b011".U ){
1464          genCsBundle_VEC_RGATHER(8)
1465        }
1466      }
1467    }
1468    is(UopSplitType.VEC_RGATHER_VX) {
1469      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1470        for (i <- 0 until len)
1471          for (j <- 0 until len) {
1472            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1473            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1474            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1475            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1476            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1477            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1478            csBundle(i * len + j + 1).lsrc(2) := vd_old
1479            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1480            csBundle(i * len + j + 1).ldest := vd
1481            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1482          }
1483      }
1484      // i to vector move
1485      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1486      csBundle(0).srcType(1) := SrcType.imm
1487      csBundle(0).srcType(2) := SrcType.imm
1488      csBundle(0).lsrc(1) := 0.U
1489      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1490      csBundle(0).fuType := FuType.i2v.U
1491      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1492      csBundle(0).rfWen := false.B
1493      csBundle(0).fpWen := false.B
1494      csBundle(0).vecWen := true.B
1495      genCsBundle_RGATHER_VX(1)
1496      switch(vlmulReg) {
1497        is("b001".U ){
1498          genCsBundle_RGATHER_VX(2)
1499        }
1500        is("b010".U ){
1501          genCsBundle_RGATHER_VX(4)
1502        }
1503        is("b011".U ){
1504          genCsBundle_RGATHER_VX(8)
1505        }
1506      }
1507    }
1508    is(UopSplitType.VEC_RGATHEREI16) {
1509      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1510        for (i <- 0 until len)
1511          for (j <- 0 until len) {
1512            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1513            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1514            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1515            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1516            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1517            csBundle((i * len + j)*2+0).ldest := vd0
1518            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1519            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1520            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1521            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1522            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1523            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1524            csBundle((i * len + j)*2+1).ldest := vd1
1525            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1526          }
1527      }
1528      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1529        for (i <- 0 until len)
1530          for (j <- 0 until len) {
1531            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1532            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1533            csBundle(i * len + j).lsrc(0) := src1 + i.U
1534            csBundle(i * len + j).lsrc(1) := src2 + j.U
1535            csBundle(i * len + j).lsrc(2) := vd_old
1536            csBundle(i * len + j).ldest := vd
1537            csBundle(i * len + j).uopIdx := (i * len + j).U
1538          }
1539      }
1540      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1541        for (i <- 0 until len)
1542          for (j <- 0 until len) {
1543            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1544            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1545            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1546            csBundle(i * len + j).lsrc(1) := src2 + j.U
1547            csBundle(i * len + j).lsrc(2) := vd_old
1548            csBundle(i * len + j).ldest := vd
1549            csBundle(i * len + j).uopIdx := (i * len + j).U
1550          }
1551      }
1552      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1553        for (i <- 0 until len)
1554          for (j <- 0 until len) {
1555            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1556            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1557            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1558            csBundle(i * len + j).lsrc(1) := src2 + j.U
1559            csBundle(i * len + j).lsrc(2) := vd_old
1560            csBundle(i * len + j).ldest := vd
1561            csBundle(i * len + j).uopIdx := (i * len + j).U
1562          }
1563      }
1564      when(!vsewReg.orR){
1565        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1566      }.elsewhen(vsewReg === VSew.e32){
1567        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1568      }.elsewhen(vsewReg === VSew.e64){
1569        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1570      }.otherwise{
1571        genCsBundle_VEC_RGATHEREI16(1)
1572      }
1573      switch(vlmulReg) {
1574        is("b001".U) {
1575          when(!vsewReg.orR) {
1576            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1577          }.elsewhen(vsewReg === VSew.e32){
1578            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1579          }.elsewhen(vsewReg === VSew.e64){
1580            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1581          }.otherwise{
1582            genCsBundle_VEC_RGATHEREI16(2)
1583          }
1584        }
1585        is("b010".U) {
1586          when(!vsewReg.orR) {
1587            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1588          }.elsewhen(vsewReg === VSew.e32){
1589            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1590          }.elsewhen(vsewReg === VSew.e64){
1591            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1592          }.otherwise{
1593            genCsBundle_VEC_RGATHEREI16(4)
1594          }
1595        }
1596        is("b011".U) {
1597          when(vsewReg === VSew.e32){
1598            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1599          }.elsewhen(vsewReg === VSew.e64){
1600            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1601          }.otherwise{
1602            genCsBundle_VEC_RGATHEREI16(8)
1603          }
1604        }
1605      }
1606    }
1607    is(UopSplitType.VEC_COMPRESS) {
1608      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1609        for (i <- 0 until len) {
1610          val jlen = if (i == len-1) i+1 else i+2
1611          for (j <- 0 until jlen) {
1612            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1613            val vd = if(i==len-1) (dest + j.U) else {
1614              if (j == i+1) VECTOR_TMP_REG_LMUL.U  else (VECTOR_TMP_REG_LMUL + j + 1).U
1615            }
1616            csBundle(i*(i+3)/2 + j).vecWen := true.B
1617            csBundle(i*(i+3)/2 + j).v0Wen := false.B
1618            val src13Type = if (j == i+1) DontCare else SrcType.vp
1619            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1620            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1621            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1622            if (i == 0) {
1623              csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1624            } else {
1625              csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1626            }
1627            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1628            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1629            csBundle(i*(i+3)/2 + j).ldest := vd
1630            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1631          }
1632        }
1633      }
1634      switch(vlmulReg) {
1635        is("b001".U ){
1636          genCsBundle_VEC_COMPRESS(2)
1637        }
1638        is("b010".U ){
1639          genCsBundle_VEC_COMPRESS(4)
1640        }
1641        is("b011".U ){
1642          genCsBundle_VEC_COMPRESS(8)
1643        }
1644      }
1645    }
1646    is(UopSplitType.VEC_MVNR) {
1647      for (i <- 0 until MAX_VLMUL) {
1648        csBundle(i).lsrc(0) := src1 + i.U
1649        csBundle(i).lsrc(1) := src2 + i.U
1650        csBundle(i).lsrc(2) := dest + i.U
1651        csBundle(i).ldest := dest + i.U
1652        csBundle(i).uopIdx := i.U
1653      }
1654    }
1655    is(UopSplitType.VEC_US_LDST) {
1656      /*
1657      FMV.D.X
1658       */
1659      csBundle(0).srcType(0) := SrcType.reg
1660      csBundle(0).srcType(1) := SrcType.imm
1661      csBundle(0).lsrc(1) := 0.U
1662      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1663      csBundle(0).fuType := FuType.i2v.U
1664      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1665      csBundle(0).rfWen := false.B
1666      csBundle(0).fpWen := false.B
1667      csBundle(0).vecWen := true.B
1668      csBundle(0).vlsInstr := true.B
1669      //LMUL
1670      for (i <- 0 until MAX_VLMUL) {
1671        csBundle(i + 1).srcType(0) := SrcType.vp
1672        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1673        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1674        csBundle(i + 1).ldest := dest + i.U
1675        csBundle(i + 1).uopIdx := i.U
1676        csBundle(i + 1).vlsInstr := true.B
1677      }
1678      csBundle.head.waitForward := isUsSegment
1679      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1680    }
1681    is(UopSplitType.VEC_S_LDST) {
1682      /*
1683      FMV.D.X
1684       */
1685      csBundle(0).srcType(0) := SrcType.reg
1686      csBundle(0).srcType(1) := SrcType.imm
1687      csBundle(0).lsrc(1) := 0.U
1688      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1689      csBundle(0).fuType := FuType.i2v.U
1690      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1691      csBundle(0).rfWen := false.B
1692      csBundle(0).fpWen := false.B
1693      csBundle(0).vecWen := true.B
1694      csBundle(0).vlsInstr := true.B
1695
1696      csBundle(1).srcType(0) := SrcType.reg
1697      csBundle(1).srcType(1) := SrcType.imm
1698      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1699      csBundle(1).lsrc(1) := 0.U
1700      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1701      csBundle(1).fuType := FuType.i2v.U
1702      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1703      csBundle(1).rfWen := false.B
1704      csBundle(1).fpWen := false.B
1705      csBundle(1).vecWen := true.B
1706      csBundle(1).vlsInstr := true.B
1707
1708      //LMUL
1709      for (i <- 0 until MAX_VLMUL) {
1710        csBundle(i + 2).srcType(0) := SrcType.vp
1711        csBundle(i + 2).srcType(1) := SrcType.vp
1712        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1713        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1714        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1715        csBundle(i + 2).ldest := dest + i.U
1716        csBundle(i + 2).uopIdx := i.U
1717        csBundle(i + 2).vlsInstr := true.B
1718      }
1719      csBundle.head.waitForward := isSdSegment
1720      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1721    }
1722    is(UopSplitType.VEC_I_LDST) {
1723      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1724        for (i <- 0 until MAX_VLMUL) {
1725          val vecWen = if (i < lmul * nf) true.B else false.B
1726          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1727          csBundle(i + 1).srcType(0) := SrcType.vp
1728          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1729          csBundle(i + 1).srcType(1) := SrcType.no
1730          csBundle(i + 1).lsrc(1) := src2 + i.U
1731          csBundle(i + 1).srcType(2) := src2Type
1732          csBundle(i + 1).lsrc(2) := dest + i.U
1733          csBundle(i + 1).ldest := dest + i.U
1734          csBundle(i + 1).rfWen := false.B
1735          csBundle(i + 1).fpWen := false.B
1736          csBundle(i + 1).vecWen := vecWen
1737          csBundle(i + 1).uopIdx := i.U
1738          csBundle(i + 1).vlsInstr := true.B
1739        }
1740      }
1741      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1742        for (i <- 0 until MAX_VLMUL) {
1743          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1744          csBundle(i + 1).srcType(1) := src1Type
1745          csBundle(i + 1).lsrc(1) := src2 + i.U
1746        }
1747      }
1748
1749      val vlmul = vlmulReg
1750      val vsew = Cat(0.U(1.W), vsewReg)
1751      val veew = Cat(0.U(1.W), width)
1752      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1753      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1754        "b001".U -> 1.U,
1755        "b010".U -> 2.U,
1756        "b011".U -> 3.U
1757      ))
1758      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1759        "b001".U -> 1.U,
1760        "b010".U -> 2.U,
1761        "b011".U -> 3.U
1762      ))
1763      csBundle(0).srcType(0) := SrcType.reg
1764      csBundle(0).srcType(1) := SrcType.imm
1765      csBundle(0).lsrc(1) := 0.U
1766      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1767      csBundle(0).fuType := FuType.i2v.U
1768      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1769      csBundle(0).rfWen := false.B
1770      csBundle(0).fpWen := false.B
1771      csBundle(0).vecWen := true.B
1772      csBundle(0).vlsInstr := true.B
1773
1774      //LMUL
1775      when(nf === 0.U) {
1776        for (i <- 0 until MAX_VLMUL) {
1777          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1778          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1779          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1780          csBundle(i + 1).srcType(0) := SrcType.vp
1781          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1782          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1783          csBundle(i + 1).srcType(2) := SrcType.vp
1784          // lsrc2 is old vd
1785          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1786          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1787          csBundle(i + 1).uopIdx := i.U
1788          csBundle(i + 1).vlsInstr := true.B
1789        }
1790      }.otherwise{
1791        // nf > 1, is segment indexed load/store
1792        // gen src0, vd
1793        switch(simple_lmul) {
1794          is(0.U) {
1795            switch(nf) {
1796              is(1.U) {
1797                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1798              }
1799              is(2.U) {
1800                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1801              }
1802              is(3.U) {
1803                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1804              }
1805              is(4.U) {
1806                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1807              }
1808              is(5.U) {
1809                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1810              }
1811              is(6.U) {
1812                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1813              }
1814              is(7.U) {
1815                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1816              }
1817            }
1818          }
1819          is(1.U) {
1820            switch(nf) {
1821              is(1.U) {
1822                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1823              }
1824              is(2.U) {
1825                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1826              }
1827              is(3.U) {
1828                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1829              }
1830            }
1831          }
1832          is(2.U) {
1833            switch(nf) {
1834              is(1.U) {
1835                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1836              }
1837            }
1838          }
1839        }
1840
1841        // gen src1
1842        switch(simple_emul) {
1843          is(0.U) {
1844            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1845          }
1846          is(1.U) {
1847            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1848          }
1849          is(2.U) {
1850            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1851          }
1852          is(3.U) {
1853            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1854          }
1855        }
1856
1857        // when is vstore instructions, not set vecwen
1858        when(isVstore) {
1859          for (i <- 0 until MAX_VLMUL) {
1860            csBundle(i + 1).vecWen := false.B
1861          }
1862        }
1863      }
1864      csBundle.head.waitForward := isIxSegment
1865      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1866    }
1867  }
1868
1869  //readyFromRename Counter
1870  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1871
1872  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1873  val thisAllOut = uopRes <= readyCounter
1874
1875  switch(state) {
1876    is(s_idle) {
1877      when (inValid) {
1878        stateNext := s_active
1879        uopResNext := inUopInfo.numOfUop
1880      }
1881    }
1882    is(s_active) {
1883      when (thisAllOut) {
1884        when (inValid) {
1885          stateNext := s_active
1886          uopResNext := inUopInfo.numOfUop
1887        }.otherwise {
1888          stateNext := s_idle
1889          uopResNext := 0.U
1890        }
1891      }.otherwise {
1892        stateNext := s_active
1893        uopResNext := uopRes - readyCounter
1894      }
1895    }
1896  }
1897
1898  state := Mux(io.redirect, s_idle, stateNext)
1899  uopRes := Mux(io.redirect, 0.U, uopResNext)
1900
1901  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1902
1903  for(i <- 0 until RenameWidth) {
1904    outValids(i) := complexNum > i.U
1905    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1906  }
1907
1908  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1909  inReady := state === s_idle || state === s_active && thisAllOut
1910
1911//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1912//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1913//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1914//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1915//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1916//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1917//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1918//
1919//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1920//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1921//    0.U)
1922//  validToRename.zipWithIndex.foreach{
1923//    case(dst, i) =>
1924//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1925//      dst := MuxCase(false.B, Seq(
1926//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1927//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1928//      ).toSeq)
1929//  }
1930//
1931//  readyToIBuf.zipWithIndex.foreach {
1932//    case (dst, i) =>
1933//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1934//      dst := MuxCase(true.B, Seq(
1935//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1936//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1937//      ).toSeq)
1938//  }
1939//
1940//  io.deq.decodedInsts := decodedInsts
1941//  io.deq.complexNum := complexNum
1942//  io.deq.validToRename := validToRename
1943//  io.deq.readyToIBuf := readyToIBuf
1944}
1945