xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 3e10d835b15e35a27e4bfcc1f610babfeea30430)
1/***************************************************************************************
2  * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3  * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4  * Copyright (c) 2020-2021 Peng Cheng Laboratory
5  *
6  * XiangShan is licensed under Mulan PSL v2.
7  * You can use this software according to the terms and conditions of the Mulan PSL v2.
8  * You may obtain a copy of Mulan PSL v2 at:
9  *          http://license.coscl.org.cn/MulanPSL2
10  *
11  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14  *
15  * See the Mulan PSL v2 for more details.
16  ***************************************************************************************/
17
18package xiangshan.backend.decode
19
20import org.chipsalliance.cde.config.Parameters
21import chisel3._
22import chisel3.util._
23import freechips.rocketchip.rocket.Instructions
24import freechips.rocketchip.util.uintToBitPat
25import utils._
26import utility._
27import xiangshan.ExceptionNO.illegalInstr
28import xiangshan._
29import xiangshan.backend.fu.fpu.FPU
30import xiangshan.backend.fu.FuType
31import freechips.rocketchip.rocket.Instructions._
32import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
33import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
34import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
35import yunsuan.VpermType
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
82  val VECTOR_COMPRESS = 1 // in v0 regfile
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153  val vstartReg = latchedInst.vpu.vstart
154
155  //Type of uop Div
156  val typeOfSplit = latchedInst.uopSplitType
157  val src1Type = latchedInst.srcType(0)
158  val src1IsImm = src1Type === SrcType.imm
159  val src1IsFp = src1Type === SrcType.fp
160
161  val isVstore = FuType.isVStore(latchedInst.fuType)
162
163  numOfUop := latchedUopInfo.numOfUop
164  numOfWB := latchedUopInfo.numOfWB
165
166  //uops dispatch
167  val s_idle :: s_active :: Nil = Enum(2)
168  val state = RegInit(s_idle)
169  val stateNext = WireDefault(state)
170  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
171  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
172  val uopResNext = WireInit(uopRes)
173  val e64 = 3.U(2.W)
174  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
175  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
176  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
177
178  //uop div up to maxUopSize
179  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
180  val fixedDecodedInst = Wire(Vec(maxUopSize, new DecodedInst))
181
182  csBundle.foreach { case dst =>
183    dst := latchedInst
184    dst.numUops := latchedUopInfo.numOfUop
185    dst.numWB := latchedUopInfo.numOfWB
186    dst.firstUop := false.B
187    dst.lastUop := false.B
188    dst.vlsInstr := false.B
189  }
190
191  csBundle(0).firstUop := true.B
192  csBundle(numOfUop - 1.U).lastUop := true.B
193
194  // when vstart is not zero, the last uop will modify vstart to zero
195  // therefore, blockback and flush pipe
196  csBundle(numOfUop - 1.U).blockBackward := vstartReg =/= 0.U
197  csBundle(0.U).flushPipe := vstartReg =/= 0.U
198
199  switch(typeOfSplit) {
200    is(UopSplitType.VSET) {
201      // In simple decoder, rfWen and vecWen are not set
202      when(isVsetSimple) {
203        // Default
204        // uop0 set rd, never flushPipe
205        csBundle(0).fuType := FuType.vsetiwi.U
206        csBundle(0).flushPipe := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U)
207        csBundle(0).blockBackward := false.B
208        csBundle(0).rfWen := true.B
209        // uop1 set vl, vsetvl will flushPipe
210        csBundle(1).ldest := Vl_IDX.U
211        csBundle(1).vecWen := false.B
212        csBundle(1).vlWen := true.B
213        csBundle(1).flushPipe := false.B
214        csBundle(1).blockBackward := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U)
215        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
216          // write nothing, uop0 is a nop instruction
217          csBundle(0).rfWen := false.B
218          csBundle(0).fpWen := false.B
219          csBundle(0).vecWen := false.B
220          csBundle(0).vlWen := false.B
221          csBundle(1).fuType := FuType.vsetfwf.U
222          csBundle(1).srcType(0) := SrcType.no
223          csBundle(1).srcType(2) := SrcType.no
224          csBundle(1).srcType(3) := SrcType.no
225          csBundle(1).srcType(4) := SrcType.vp
226          csBundle(1).lsrc(4) := Vl_IDX.U
227        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
228          // uop0: mv vtype gpr to vector region
229          csBundle(0).srcType(0) := SrcType.xp
230          csBundle(0).srcType(1) := SrcType.no
231          csBundle(0).lsrc(0) := src2
232          csBundle(0).lsrc(1) := 0.U
233          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
234          csBundle(0).fuType := FuType.i2v.U
235          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
236          csBundle(0).rfWen := false.B
237          csBundle(0).fpWen := false.B
238          csBundle(0).vecWen := true.B
239          csBundle(0).vlWen := false.B
240          // uop1: uvsetvcfg_vv
241          csBundle(1).fuType := FuType.vsetfwf.U
242          // vl
243          csBundle(1).srcType(0) := SrcType.no
244          csBundle(1).srcType(2) := SrcType.no
245          csBundle(1).srcType(3) := SrcType.no
246          csBundle(1).srcType(4) := SrcType.vp
247          csBundle(1).lsrc(4) := Vl_IDX.U
248          // vtype
249          csBundle(1).srcType(1) := SrcType.vp
250          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
251          csBundle(1).vecWen := false.B
252          csBundle(1).vlWen := true.B
253          csBundle(1).ldest := Vl_IDX.U
254        }.elsewhen(dest === 0.U) {
255          // write nothing, uop0 is a nop instruction
256          csBundle(0).rfWen := false.B
257          csBundle(0).fpWen := false.B
258          csBundle(0).vecWen := false.B
259          csBundle(0).vlWen := false.B
260        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) {
261          // because vsetvl may modified src2 when src2 == rd,
262          // we need to modify vd in second uop to avoid dependency
263          // uop0 set vl
264          csBundle(0).fuType := FuType.vsetiwf.U
265          csBundle(0).ldest := Vl_IDX.U
266          csBundle(0).rfWen := false.B
267          csBundle(0).vlWen := true.B
268          // uop1 set rd
269          csBundle(1).fuType := FuType.vsetiwi.U
270          csBundle(1).ldest := dest
271          csBundle(1).rfWen := true.B
272          csBundle(1).vlWen := false.B
273        }
274        // use bypass vtype from vtypeGen
275        csBundle(0).vpu.connectVType(io.vtypeBypass)
276        csBundle(1).vpu.connectVType(io.vtypeBypass)
277      }
278    }
279    is(UopSplitType.VEC_VVV) {
280      for (i <- 0 until MAX_VLMUL) {
281        csBundle(i).lsrc(0) := src1 + i.U
282        csBundle(i).lsrc(1) := src2 + i.U
283        csBundle(i).lsrc(2) := dest + i.U
284        csBundle(i).ldest := dest + i.U
285        csBundle(i).uopIdx := i.U
286      }
287    }
288    is(UopSplitType.VEC_VFV) {
289      /*
290      f to vector move
291       */
292      csBundle(0).srcType(0) := SrcType.fp
293      csBundle(0).srcType(1) := SrcType.imm
294      csBundle(0).srcType(2) := SrcType.imm
295      csBundle(0).lsrc(1) := 0.U
296      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
297      csBundle(0).fuType := FuType.f2v.U
298      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
299      csBundle(0).vecWen := true.B
300      csBundle(0).vpu.isReverse := false.B
301      /*
302      LMUL
303       */
304      for (i <- 0 until MAX_VLMUL) {
305        csBundle(i + 1).srcType(0) := SrcType.vp
306        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
307        csBundle(i + 1).lsrc(1) := src2 + i.U
308        csBundle(i + 1).lsrc(2) := dest + i.U
309        csBundle(i + 1).ldest := dest + i.U
310        csBundle(i + 1).uopIdx := i.U
311      }
312    }
313    is(UopSplitType.VEC_EXT2) {
314      for (i <- 0 until MAX_VLMUL / 2) {
315        csBundle(2 * i).lsrc(1) := src2 + i.U
316        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
317        csBundle(2 * i).ldest := dest + (2 * i).U
318        csBundle(2 * i).uopIdx := (2 * i).U
319        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
320        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
321        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
322        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
323      }
324    }
325    is(UopSplitType.VEC_EXT4) {
326      for (i <- 0 until MAX_VLMUL / 4) {
327        csBundle(4 * i).lsrc(1) := src2 + i.U
328        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
329        csBundle(4 * i).ldest := dest + (4 * i).U
330        csBundle(4 * i).uopIdx := (4 * i).U
331        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
332        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
333        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
334        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
335        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
336        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
337        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
338        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
339        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
340        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
341        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
342        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
343      }
344    }
345    is(UopSplitType.VEC_EXT8) {
346      for (i <- 0 until MAX_VLMUL) {
347        csBundle(i).lsrc(1) := src2
348        csBundle(i).lsrc(2) := dest + i.U
349        csBundle(i).ldest := dest + i.U
350        csBundle(i).uopIdx := i.U
351      }
352    }
353    is(UopSplitType.VEC_0XV) {
354      /*
355      i/f to vector move
356       */
357      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
358      csBundle(0).srcType(1) := SrcType.imm
359      csBundle(0).srcType(2) := SrcType.imm
360      csBundle(0).lsrc(1) := 0.U
361      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
362      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
363      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
364      csBundle(0).rfWen := false.B
365      csBundle(0).fpWen := false.B
366      csBundle(0).vecWen := true.B
367      /*
368      vmv.s.x
369       */
370      csBundle(1).srcType(0) := SrcType.vp
371      csBundle(1).srcType(1) := SrcType.imm
372      csBundle(1).srcType(2) := SrcType.vp
373      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
374      csBundle(1).lsrc(1) := 0.U
375      csBundle(1).lsrc(2) := dest
376      csBundle(1).ldest := dest
377      csBundle(1).rfWen := false.B
378      csBundle(1).fpWen := false.B
379      csBundle(1).vecWen := true.B
380      csBundle(1).uopIdx := 0.U
381    }
382    is(UopSplitType.VEC_VXV) {
383      /*
384      i to vector move
385       */
386      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
387      csBundle(0).srcType(1) := SrcType.imm
388      csBundle(0).srcType(2) := SrcType.imm
389      csBundle(0).lsrc(1) := 0.U
390      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
391      csBundle(0).fuType := FuType.i2v.U
392      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
393      csBundle(0).vecWen := true.B
394      csBundle(0).vpu.isReverse := false.B
395      /*
396      LMUL
397       */
398      for (i <- 0 until MAX_VLMUL) {
399        csBundle(i + 1).srcType(0) := SrcType.vp
400        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
401        csBundle(i + 1).lsrc(1) := src2 + i.U
402        csBundle(i + 1).lsrc(2) := dest + i.U
403        csBundle(i + 1).ldest := dest + i.U
404        csBundle(i + 1).uopIdx := i.U
405      }
406    }
407    is(UopSplitType.VEC_VVW) {
408      for (i <- 0 until MAX_VLMUL / 2) {
409        csBundle(2 * i).lsrc(0) := src1 + i.U
410        csBundle(2 * i).lsrc(1) := src2 + i.U
411        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
412        csBundle(2 * i).ldest := dest + (2 * i).U
413        csBundle(2 * i).uopIdx := (2 * i).U
414        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
415        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
416        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
417        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
418        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
419      }
420    }
421    is(UopSplitType.VEC_VFW) {
422      /*
423      f to vector move
424       */
425      csBundle(0).srcType(0) := SrcType.fp
426      csBundle(0).srcType(1) := SrcType.imm
427      csBundle(0).srcType(2) := SrcType.imm
428      csBundle(0).lsrc(1) := 0.U
429      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
430      csBundle(0).fuType := FuType.f2v.U
431      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
432      csBundle(0).rfWen := false.B
433      csBundle(0).fpWen := false.B
434      csBundle(0).vecWen := true.B
435
436      for (i <- 0 until MAX_VLMUL / 2) {
437        csBundle(2 * i + 1).srcType(0) := SrcType.vp
438        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
439        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
440        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
441        csBundle(2 * i + 1).ldest := dest + (2 * i).U
442        csBundle(2 * i + 1).uopIdx := (2 * i).U
443        csBundle(2 * i + 2).srcType(0) := SrcType.vp
444        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
445        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
446        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
447        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
448        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
449      }
450    }
451    is(UopSplitType.VEC_WVW) {
452      for (i <- 0 until MAX_VLMUL / 2) {
453        csBundle(2 * i).lsrc(0) := src1 + i.U
454        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
455        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
456        csBundle(2 * i).ldest := dest + (2 * i).U
457        csBundle(2 * i).uopIdx := (2 * i).U
458        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
459        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
460        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
461        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
462        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
463      }
464    }
465    is(UopSplitType.VEC_VXW) {
466      /*
467      i to vector move
468       */
469      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
470      csBundle(0).srcType(1) := SrcType.imm
471      csBundle(0).srcType(2) := SrcType.imm
472      csBundle(0).lsrc(1) := 0.U
473      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
474      csBundle(0).fuType := FuType.i2v.U
475      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
476      csBundle(0).vecWen := true.B
477
478      for (i <- 0 until MAX_VLMUL / 2) {
479        csBundle(2 * i + 1).srcType(0) := SrcType.vp
480        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
481        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
482        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
483        csBundle(2 * i + 1).ldest := dest + (2 * i).U
484        csBundle(2 * i + 1).uopIdx := (2 * i).U
485        csBundle(2 * i + 2).srcType(0) := SrcType.vp
486        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
487        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
488        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
489        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
490        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
491      }
492    }
493    is(UopSplitType.VEC_WXW) {
494      /*
495      i to vector move
496       */
497      csBundle(0).srcType(0) := SrcType.reg
498      csBundle(0).srcType(1) := SrcType.imm
499      csBundle(0).srcType(2) := SrcType.imm
500      csBundle(0).lsrc(1) := 0.U
501      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
502      csBundle(0).fuType := FuType.i2v.U
503      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
504      csBundle(0).vecWen := true.B
505
506      for (i <- 0 until MAX_VLMUL / 2) {
507        csBundle(2 * i + 1).srcType(0) := SrcType.vp
508        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
509        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
510        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
511        csBundle(2 * i + 1).ldest := dest + (2 * i).U
512        csBundle(2 * i + 1).uopIdx := (2 * i).U
513        csBundle(2 * i + 2).srcType(0) := SrcType.vp
514        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
515        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
516        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
517        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
518        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
519      }
520    }
521    is(UopSplitType.VEC_WVV) {
522      for (i <- 0 until MAX_VLMUL / 2) {
523
524        csBundle(2 * i).lsrc(0) := src1 + i.U
525        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
526        csBundle(2 * i).lsrc(2) := dest + i.U
527        csBundle(2 * i).ldest := dest + i.U
528        csBundle(2 * i).uopIdx := (2 * i).U
529        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
530        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
531        csBundle(2 * i + 1).lsrc(2) := dest + i.U
532        csBundle(2 * i + 1).ldest := dest + i.U
533        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
534      }
535    }
536    is(UopSplitType.VEC_WFW) {
537      /*
538      f to vector move
539       */
540      csBundle(0).srcType(0) := SrcType.fp
541      csBundle(0).srcType(1) := SrcType.imm
542      csBundle(0).srcType(2) := SrcType.imm
543      csBundle(0).lsrc(1) := 0.U
544      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
545      csBundle(0).fuType := FuType.f2v.U
546      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
547      csBundle(0).rfWen := false.B
548      csBundle(0).fpWen := false.B
549      csBundle(0).vecWen := true.B
550
551      for (i <- 0 until MAX_VLMUL / 2) {
552        csBundle(2 * i + 1).srcType(0) := SrcType.vp
553        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
554        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
555        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
556        csBundle(2 * i + 1).ldest := dest + (2 * i).U
557        csBundle(2 * i + 1).uopIdx := (2 * i).U
558        csBundle(2 * i + 2).srcType(0) := SrcType.vp
559        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
560        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
561        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
562        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
563        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
564      }
565    }
566    is(UopSplitType.VEC_WXV) {
567      /*
568      i to vector move
569       */
570      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
571      csBundle(0).srcType(1) := SrcType.imm
572      csBundle(0).srcType(2) := SrcType.imm
573      csBundle(0).lsrc(1) := 0.U
574      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
575      csBundle(0).fuType := FuType.i2v.U
576      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
577      csBundle(0).vecWen := true.B
578
579      for (i <- 0 until MAX_VLMUL / 2) {
580        csBundle(2 * i + 1).srcType(0) := SrcType.vp
581        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
582        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
583        csBundle(2 * i + 1).lsrc(2) := dest + i.U
584        csBundle(2 * i + 1).ldest := dest + i.U
585        csBundle(2 * i + 1).uopIdx := (2 * i).U
586        csBundle(2 * i + 2).srcType(0) := SrcType.vp
587        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
588        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
589        csBundle(2 * i + 2).lsrc(2) := dest + i.U
590        csBundle(2 * i + 2).ldest := dest + i.U
591        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
592      }
593    }
594    is(UopSplitType.VEC_VVM) {
595      csBundle(0).lsrc(2) := dest
596      csBundle(0).ldest := dest
597      csBundle(0).uopIdx := 0.U
598      for (i <- 1 until MAX_VLMUL) {
599        csBundle(i).lsrc(0) := src1 + i.U
600        csBundle(i).lsrc(1) := src2 + i.U
601        csBundle(i).lsrc(2) := dest
602        csBundle(i).ldest := dest
603        csBundle(i).uopIdx := i.U
604      }
605    }
606    is(UopSplitType.VEC_VFM) {
607      /*
608      f to vector move
609       */
610      csBundle(0).srcType(0) := SrcType.fp
611      csBundle(0).srcType(1) := SrcType.imm
612      csBundle(0).srcType(2) := SrcType.imm
613      csBundle(0).lsrc(1) := 0.U
614      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
615      csBundle(0).fuType := FuType.f2v.U
616      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
617      csBundle(0).rfWen := false.B
618      csBundle(0).fpWen := false.B
619      csBundle(0).vecWen := true.B
620      //LMUL
621      csBundle(1).srcType(0) := SrcType.vp
622      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
623      csBundle(1).lsrc(2) := dest
624      csBundle(1).ldest := dest
625      csBundle(1).uopIdx := 0.U
626      for (i <- 1 until MAX_VLMUL) {
627        csBundle(i + 1).srcType(0) := SrcType.vp
628        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
629        csBundle(i + 1).lsrc(1) := src2 + i.U
630        csBundle(i + 1).lsrc(2) := dest
631        csBundle(i + 1).ldest := dest
632        csBundle(i + 1).uopIdx := i.U
633      }
634      csBundle(numOfUop - 1.U).ldest := dest
635    }
636    is(UopSplitType.VEC_VXM) {
637      /*
638      i to vector move
639       */
640      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
641      csBundle(0).srcType(1) := SrcType.imm
642      csBundle(0).srcType(2) := SrcType.imm
643      csBundle(0).lsrc(1) := 0.U
644      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
645      csBundle(0).fuType := FuType.i2v.U
646      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
647      csBundle(0).vecWen := true.B
648      //LMUL
649      csBundle(1).srcType(0) := SrcType.vp
650      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
651      csBundle(1).lsrc(2) := dest
652      csBundle(1).ldest := dest
653      csBundle(1).uopIdx := 0.U
654      for (i <- 1 until MAX_VLMUL) {
655        csBundle(i + 1).srcType(0) := SrcType.vp
656        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
657        csBundle(i + 1).lsrc(1) := src2 + i.U
658        csBundle(i + 1).lsrc(2) := dest
659        csBundle(i + 1).ldest := dest
660        csBundle(i + 1).uopIdx := i.U
661      }
662      csBundle(numOfUop - 1.U).ldest := dest
663    }
664    is(UopSplitType.VEC_SLIDE1UP) {
665      /*
666      i to vector move
667       */
668      csBundle(0).srcType(0) := SrcType.reg
669      csBundle(0).srcType(1) := SrcType.imm
670      csBundle(0).srcType(2) := SrcType.imm
671      csBundle(0).lsrc(1) := 0.U
672      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
673      csBundle(0).fuType := FuType.i2v.U
674      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
675      csBundle(0).vecWen := true.B
676      //LMUL
677      csBundle(1).srcType(0) := SrcType.vp
678      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
679      csBundle(1).lsrc(2) := dest
680      csBundle(1).ldest := dest
681      csBundle(1).uopIdx := 0.U
682      for (i <- 1 until MAX_VLMUL) {
683        csBundle(i + 1).srcType(0) := SrcType.vp
684        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
685        csBundle(i + 1).lsrc(1) := src2 + i.U
686        csBundle(i + 1).lsrc(2) := dest + i.U
687        csBundle(i + 1).ldest := dest + i.U
688        csBundle(i + 1).uopIdx := i.U
689      }
690    }
691    is(UopSplitType.VEC_FSLIDE1UP) {
692      /*
693      f to vector move
694       */
695      csBundle(0).srcType(0) := SrcType.fp
696      csBundle(0).srcType(1) := SrcType.imm
697      csBundle(0).srcType(2) := SrcType.imm
698      csBundle(0).lsrc(1) := 0.U
699      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
700      csBundle(0).fuType := FuType.f2v.U
701      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
702      csBundle(0).rfWen := false.B
703      csBundle(0).fpWen := false.B
704      csBundle(0).vecWen := true.B
705      //LMUL
706      csBundle(1).srcType(0) := SrcType.vp
707      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
708      csBundle(1).lsrc(1) := src2
709      csBundle(1).lsrc(2) := dest
710      csBundle(1).ldest := dest
711      csBundle(1).uopIdx := 0.U
712      for (i <- 1 until MAX_VLMUL) {
713        csBundle(i + 1).srcType(0) := SrcType.vp
714        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
715        csBundle(i + 1).lsrc(1) := src2 + i.U
716        csBundle(i + 1).lsrc(2) := dest + i.U
717        csBundle(i + 1).ldest := dest + i.U
718        csBundle(i + 1).uopIdx := i.U
719      }
720    }
721    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
722      /*
723      i to vector move
724       */
725      csBundle(0).srcType(0) := SrcType.reg
726      csBundle(0).srcType(1) := SrcType.imm
727      csBundle(0).srcType(2) := SrcType.imm
728      csBundle(0).lsrc(1) := 0.U
729      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
730      csBundle(0).fuType := FuType.i2v.U
731      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
732      csBundle(0).vecWen := true.B
733      //LMUL
734      for (i <- 0 until MAX_VLMUL) {
735        csBundle(2 * i + 1).srcType(0) := SrcType.vp
736        csBundle(2 * i + 1).srcType(1) := SrcType.vp
737        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
738        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
739        csBundle(2 * i + 1).lsrc(2) := dest + i.U
740        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
741        csBundle(2 * i + 1).uopIdx := (2 * i).U
742        if (2 * i + 2 < MAX_VLMUL * 2) {
743          csBundle(2 * i + 2).srcType(0) := SrcType.vp
744          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
745          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
746          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
747          csBundle(2 * i + 2).ldest := dest + i.U
748          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
749        }
750      }
751      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
752      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
753      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
754    }
755    is(UopSplitType.VEC_FSLIDE1DOWN) {
756      /*
757      f to vector move
758       */
759      csBundle(0).srcType(0) := SrcType.fp
760      csBundle(0).srcType(1) := SrcType.imm
761      csBundle(0).srcType(2) := SrcType.imm
762      csBundle(0).lsrc(1) := 0.U
763      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
764      csBundle(0).fuType := FuType.f2v.U
765      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
766      csBundle(0).rfWen := false.B
767      csBundle(0).fpWen := false.B
768      csBundle(0).vecWen := true.B
769      //LMUL
770      for (i <- 0 until MAX_VLMUL) {
771        csBundle(2 * i + 1).srcType(0) := SrcType.vp
772        csBundle(2 * i + 1).srcType(1) := SrcType.vp
773        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
774        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
775        csBundle(2 * i + 1).lsrc(2) := dest + i.U
776        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
777        csBundle(2 * i + 1).uopIdx := (2 * i).U
778        if (2 * i + 2 < MAX_VLMUL * 2) {
779          csBundle(2 * i + 2).srcType(0) := SrcType.vp
780          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
781          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
782          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
783          csBundle(2 * i + 2).ldest := dest + i.U
784          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
785        }
786      }
787      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
788      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
789      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
790    }
791    is(UopSplitType.VEC_VRED) {
792      when(vlmulReg === "b001".U) {
793        csBundle(0).srcType(2) := SrcType.DC
794        csBundle(0).lsrc(0) := src2 + 1.U
795        csBundle(0).lsrc(1) := src2
796        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
797        csBundle(0).uopIdx := 0.U
798      }
799      when(vlmulReg === "b010".U) {
800        csBundle(0).srcType(2) := SrcType.DC
801        csBundle(0).lsrc(0) := src2 + 1.U
802        csBundle(0).lsrc(1) := src2
803        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
804        csBundle(0).uopIdx := 0.U
805
806        csBundle(1).srcType(2) := SrcType.DC
807        csBundle(1).lsrc(0) := src2 + 3.U
808        csBundle(1).lsrc(1) := src2 + 2.U
809        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
810        csBundle(1).uopIdx := 1.U
811
812        csBundle(2).srcType(2) := SrcType.DC
813        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
814        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
815        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
816        csBundle(2).uopIdx := 2.U
817      }
818      when(vlmulReg === "b011".U) {
819        for (i <- 0 until MAX_VLMUL) {
820          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
821            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
822            csBundle(i).lsrc(1) := src2 + (i * 2).U
823            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
824          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
825            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
826            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
827            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
828          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
829            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
830            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
831            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
832          }
833          csBundle(i).srcType(2) := SrcType.DC
834          csBundle(i).uopIdx := i.U
835        }
836      }
837      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
838        /*
839         * 2 <= vlmul <= 8
840         */
841        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
842        csBundle(numOfUop - 1.U).lsrc(0) := src1
843        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
844        csBundle(numOfUop - 1.U).lsrc(2) := dest
845        csBundle(numOfUop - 1.U).ldest := dest
846        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
847      }
848    }
849    is(UopSplitType.VEC_VFRED) {
850      val vlmul = vlmulReg
851      val vsew = vsewReg
852      when(vlmul === VLmul.m8){
853        for (i <- 0 until 4) {
854          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
855          csBundle(i).lsrc(1) := src2 + (i * 2).U
856          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
857          csBundle(i).uopIdx := i.U
858        }
859        for (i <- 4 until 6) {
860          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
861          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
862          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
863          csBundle(i).uopIdx := i.U
864        }
865        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
866        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
867        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
868        csBundle(6).uopIdx := 6.U
869        when(vsew === VSew.e64) {
870          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
871          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
872          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
873          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
874          csBundle(7).uopIdx := 7.U
875          csBundle(8).lsrc(0) := src1
876          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
877          csBundle(8).ldest := dest
878          csBundle(8).uopIdx := 8.U
879        }
880        when(vsew === VSew.e32) {
881          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
882          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
883          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
884          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
885          csBundle(7).uopIdx := 7.U
886          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
887          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
888          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
889          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
890          csBundle(8).uopIdx := 8.U
891          csBundle(9).lsrc(0) := src1
892          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
893          csBundle(9).ldest := dest
894          csBundle(9).uopIdx := 9.U
895        }
896        when(vsew === VSew.e16) {
897          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
898          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
899          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
900          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
901          csBundle(7).uopIdx := 7.U
902          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
903          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
904          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
905          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
906          csBundle(8).uopIdx := 8.U
907          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
908          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
909          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
910          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
911          csBundle(9).uopIdx := 9.U
912          csBundle(10).lsrc(0) := src1
913          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
914          csBundle(10).ldest := dest
915          csBundle(10).uopIdx := 10.U
916        }
917      }
918      when(vlmul === VLmul.m4) {
919        for (i <- 0 until 2) {
920          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
921          csBundle(i).lsrc(1) := src2 + (i * 2).U
922          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
923          csBundle(i).uopIdx := i.U
924        }
925        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
926        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
927        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
928        csBundle(2).uopIdx := 2.U
929        when(vsew === VSew.e64) {
930          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
931          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
932          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
933          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
934          csBundle(3).uopIdx := 3.U
935          csBundle(4).lsrc(0) := src1
936          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
937          csBundle(4).ldest := dest
938          csBundle(4).uopIdx := 4.U
939        }
940        when(vsew === VSew.e32) {
941          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
942          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
943          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
944          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
945          csBundle(3).uopIdx := 3.U
946          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
947          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
948          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
949          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
950          csBundle(4).uopIdx := 4.U
951          csBundle(5).lsrc(0) := src1
952          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
953          csBundle(5).ldest := dest
954          csBundle(5).uopIdx := 5.U
955        }
956        when(vsew === VSew.e16) {
957          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
958          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
959          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
960          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
961          csBundle(3).uopIdx := 3.U
962          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
963          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
964          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
965          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
966          csBundle(4).uopIdx := 4.U
967          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
968          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
969          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
970          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
971          csBundle(5).uopIdx := 5.U
972          csBundle(6).lsrc(0) := src1
973          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
974          csBundle(6).ldest := dest
975          csBundle(6).uopIdx := 6.U
976        }
977      }
978      when(vlmul === VLmul.m2) {
979        csBundle(0).lsrc(0) := src2 + 1.U
980        csBundle(0).lsrc(1) := src2 + 0.U
981        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
982        csBundle(0).uopIdx := 0.U
983        when(vsew === VSew.e64) {
984          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
985          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
986          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
987          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
988          csBundle(1).uopIdx := 1.U
989          csBundle(2).lsrc(0) := src1
990          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
991          csBundle(2).ldest := dest
992          csBundle(2).uopIdx := 2.U
993        }
994        when(vsew === VSew.e32) {
995          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
996          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
997          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
998          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
999          csBundle(1).uopIdx := 1.U
1000          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1001          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1002          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1003          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1004          csBundle(2).uopIdx := 2.U
1005          csBundle(3).lsrc(0) := src1
1006          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1007          csBundle(3).ldest := dest
1008          csBundle(3).uopIdx := 3.U
1009        }
1010        when(vsew === VSew.e16) {
1011          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1012          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1013          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1014          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1015          csBundle(1).uopIdx := 1.U
1016          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1017          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1018          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1019          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1020          csBundle(2).uopIdx := 2.U
1021          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1022          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1023          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1024          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
1025          csBundle(3).uopIdx := 3.U
1026          csBundle(4).lsrc(0) := src1
1027          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1028          csBundle(4).ldest := dest
1029          csBundle(4).uopIdx := 4.U
1030        }
1031      }
1032      when(vlmul === VLmul.m1) {
1033        when(vsew === VSew.e64) {
1034          csBundle(0).lsrc(0) := src2
1035          csBundle(0).lsrc(1) := src2
1036          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1037          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1038          csBundle(0).uopIdx := 0.U
1039          csBundle(1).lsrc(0) := src1
1040          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1041          csBundle(1).ldest := dest
1042          csBundle(1).uopIdx := 1.U
1043        }
1044        when(vsew === VSew.e32) {
1045          csBundle(0).lsrc(0) := src2
1046          csBundle(0).lsrc(1) := src2
1047          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1048          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1049          csBundle(0).uopIdx := 0.U
1050          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1051          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1052          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1053          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1054          csBundle(1).uopIdx := 1.U
1055          csBundle(2).lsrc(0) := src1
1056          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1057          csBundle(2).ldest := dest
1058          csBundle(2).uopIdx := 2.U
1059        }
1060        when(vsew === VSew.e16) {
1061          csBundle(0).lsrc(0) := src2
1062          csBundle(0).lsrc(1) := src2
1063          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1064          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1065          csBundle(0).uopIdx := 0.U
1066          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1067          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1068          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1069          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1070          csBundle(1).uopIdx := 1.U
1071          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1072          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1073          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1074          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1075          csBundle(2).uopIdx := 2.U
1076          csBundle(3).lsrc(0) := src1
1077          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1078          csBundle(3).ldest := dest
1079          csBundle(3).uopIdx := 3.U
1080        }
1081      }
1082      when(vlmul === VLmul.mf2) {
1083        when(vsew === VSew.e32) {
1084          csBundle(0).lsrc(0) := src2
1085          csBundle(0).lsrc(1) := src2
1086          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1087          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1088          csBundle(0).uopIdx := 0.U
1089          csBundle(1).lsrc(0) := src1
1090          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1091          csBundle(1).ldest := dest
1092          csBundle(1).uopIdx := 1.U
1093        }
1094        when(vsew === VSew.e16) {
1095          csBundle(0).lsrc(0) := src2
1096          csBundle(0).lsrc(1) := src2
1097          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1098          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1099          csBundle(0).uopIdx := 0.U
1100          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1101          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1102          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1103          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1104          csBundle(1).uopIdx := 1.U
1105          csBundle(2).lsrc(0) := src1
1106          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1107          csBundle(2).ldest := dest
1108          csBundle(2).uopIdx := 2.U
1109        }
1110      }
1111      when(vlmul === VLmul.mf4) {
1112        when(vsew === VSew.e16) {
1113          csBundle(0).lsrc(0) := src2
1114          csBundle(0).lsrc(1) := src2
1115          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1116          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1117          csBundle(0).uopIdx := 0.U
1118          csBundle(1).lsrc(0) := src1
1119          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1120          csBundle(1).ldest := dest
1121          csBundle(1).uopIdx := 1.U
1122        }
1123      }
1124    }
1125
1126    is(UopSplitType.VEC_VFREDOSUM) {
1127      import yunsuan.VfaluType
1128      val vlmul = vlmulReg
1129      val vsew = vsewReg
1130      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1131      when(vlmul === VLmul.m8) {
1132        when(vsew === VSew.e64) {
1133          val vlmax = 16
1134          for (i <- 0 until vlmax) {
1135            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1136            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1137            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1138            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1139            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1140            csBundle(i).uopIdx := i.U
1141          }
1142        }
1143        when(vsew === VSew.e32) {
1144          val vlmax = 32
1145          for (i <- 0 until vlmax) {
1146            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1149            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1150            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1151            csBundle(i).uopIdx := i.U
1152          }
1153        }
1154        when(vsew === VSew.e16) {
1155          val vlmax = 64
1156          for (i <- 0 until vlmax) {
1157            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1158            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1161            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1162            csBundle(i).uopIdx := i.U
1163          }
1164        }
1165      }
1166      when(vlmul === VLmul.m4) {
1167        when(vsew === VSew.e64) {
1168          val vlmax = 8
1169          for (i <- 0 until vlmax) {
1170            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1172            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1174            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1175            csBundle(i).uopIdx := i.U
1176          }
1177        }
1178        when(vsew === VSew.e32) {
1179          val vlmax = 16
1180          for (i <- 0 until vlmax) {
1181            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1182            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1183            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1185            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1186            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1187            csBundle(i).uopIdx := i.U
1188          }
1189        }
1190        when(vsew === VSew.e16) {
1191          val vlmax = 32
1192          for (i <- 0 until vlmax) {
1193            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1194            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1195            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1197            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1198            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1199            csBundle(i).uopIdx := i.U
1200          }
1201        }
1202      }
1203      when(vlmul === VLmul.m2) {
1204        when(vsew === VSew.e64) {
1205          val vlmax = 4
1206          for (i <- 0 until vlmax) {
1207            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1208            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1209            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1211            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1212            csBundle(i).uopIdx := i.U
1213          }
1214        }
1215        when(vsew === VSew.e32) {
1216          val vlmax = 8
1217          for (i <- 0 until vlmax) {
1218            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1219            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1220            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1223            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1224            csBundle(i).uopIdx := i.U
1225          }
1226        }
1227        when(vsew === VSew.e16) {
1228          val vlmax = 16
1229          for (i <- 0 until vlmax) {
1230            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1231            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1232            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1235            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1236            csBundle(i).uopIdx := i.U
1237          }
1238        }
1239      }
1240      when(vlmul === VLmul.m1) {
1241        when(vsew === VSew.e64) {
1242          val vlmax = 2
1243          for (i <- 0 until vlmax) {
1244            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1245            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1248            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1249            csBundle(i).uopIdx := i.U
1250          }
1251        }
1252        when(vsew === VSew.e32) {
1253          val vlmax = 4
1254          for (i <- 0 until vlmax) {
1255            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1256            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1257            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1260            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1261            csBundle(i).uopIdx := i.U
1262          }
1263        }
1264        when(vsew === VSew.e16) {
1265          val vlmax = 8
1266          for (i <- 0 until vlmax) {
1267            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1268            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1269            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1270            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1271            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1272            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1273            csBundle(i).uopIdx := i.U
1274          }
1275        }
1276      }
1277      when(vlmul === VLmul.mf2) {
1278        when(vsew === VSew.e32) {
1279          val vlmax = 2
1280          for (i <- 0 until vlmax) {
1281            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1282            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1283            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1284            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1285            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1286            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1287            csBundle(i).uopIdx := i.U
1288          }
1289        }
1290        when(vsew === VSew.e16) {
1291          val vlmax = 4
1292          for (i <- 0 until vlmax) {
1293            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1294            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1295            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1296            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1297            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1298            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1299            csBundle(i).uopIdx := i.U
1300          }
1301        }
1302      }
1303      when(vlmul === VLmul.mf4) {
1304        when(vsew === VSew.e16) {
1305          val vlmax = 2
1306          for (i <- 0 until vlmax) {
1307            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1308            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1309            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1310            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1311            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1312            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1313            csBundle(i).uopIdx := i.U
1314          }
1315        }
1316      }
1317    }
1318
1319    is(UopSplitType.VEC_SLIDEUP) {
1320      // i to vector move
1321      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1322      csBundle(0).srcType(1) := SrcType.imm
1323      csBundle(0).srcType(2) := SrcType.imm
1324      csBundle(0).lsrc(1) := 0.U
1325      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1326      csBundle(0).fuType := FuType.i2v.U
1327      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1328      csBundle(0).vecWen := true.B
1329      // LMUL
1330      for (i <- 0 until MAX_VLMUL)
1331        for (j <- 0 to i) {
1332          val old_vd = if (j == 0) {
1333            dest + i.U
1334          } else (VECTOR_TMP_REG_LMUL + j).U
1335          val vd = if (j == i) {
1336            dest + i.U
1337          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1338          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1339          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1340          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1341          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1342          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1343          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1344        }
1345    }
1346
1347    is(UopSplitType.VEC_SLIDEDOWN) {
1348      // i to vector move
1349      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1350      csBundle(0).srcType(1) := SrcType.imm
1351      csBundle(0).srcType(2) := SrcType.imm
1352      csBundle(0).lsrc(1) := 0.U
1353      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1354      csBundle(0).fuType := FuType.i2v.U
1355      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1356      csBundle(0).vecWen := true.B
1357      // LMUL
1358      for (i <- 0 until MAX_VLMUL)
1359        for (j <- (0 to i).reverse) {
1360          when(i.U < lmul) {
1361            val old_vd = if (j == 0) {
1362              dest + lmul - 1.U - i.U
1363            } else (VECTOR_TMP_REG_LMUL + j).U
1364            val vd = if (j == i) {
1365              dest + lmul - 1.U - i.U
1366            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1367            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1368            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1369            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1370            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1371            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1372            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1373          }
1374        }
1375    }
1376
1377    is(UopSplitType.VEC_M0X) {
1378      // LMUL
1379      for (i <- 0 until MAX_VLMUL) {
1380        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1381        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1382        csBundle(i).srcType(0) := srcType0
1383        csBundle(i).srcType(1) := SrcType.vp
1384        csBundle(i).rfWen := false.B
1385        csBundle(i).fpWen := false.B
1386        csBundle(i).vecWen := true.B
1387        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1388        csBundle(i).lsrc(1) := src2
1389        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1390        csBundle(i).ldest := ldest
1391        csBundle(i).uopIdx := i.U
1392      }
1393      csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B)
1394      csBundle(numOfUop - 1.U).fpWen := false.B
1395      csBundle(numOfUop - 1.U).vecWen := false.B
1396      csBundle(numOfUop - 1.U).ldest := dest
1397    }
1398
1399    is(UopSplitType.VEC_MVV) {
1400      // LMUL
1401      for (i <- 0 until MAX_VLMUL) {
1402        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1403        csBundle(i * 2 + 0).srcType(0) := srcType0
1404        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1405        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1406        csBundle(i * 2 + 0).lsrc(1) := src2
1407        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1408        csBundle(i * 2 + 0).ldest := dest + i.U
1409        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1410
1411        csBundle(i * 2 + 1).srcType(0) := srcType0
1412        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1413        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1414        csBundle(i * 2 + 1).lsrc(1) := src2
1415        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1416        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1417        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1418      }
1419    }
1420    is(UopSplitType.VEC_VWW) {
1421      for (i <- 0 until MAX_VLMUL*2) {
1422        when(i.U < lmul){
1423          csBundle(i).srcType(2) := SrcType.DC
1424          csBundle(i).lsrc(0) := src2 + i.U
1425          csBundle(i).lsrc(1) := src2 + i.U
1426          // csBundle(i).lsrc(2) := dest + (2 * i).U
1427          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1428          csBundle(i).uopIdx :=  i.U
1429        } otherwise {
1430          csBundle(i).srcType(2) := SrcType.DC
1431          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1432          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1433          // csBundle(i).lsrc(2) := dest + (2 * i).U
1434          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1435          csBundle(i).uopIdx := i.U
1436        }
1437        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1438        csBundle(numOfUop-1.U).lsrc(0) := src1
1439        csBundle(numOfUop-1.U).lsrc(2) := dest
1440        csBundle(numOfUop-1.U).ldest := dest
1441      }
1442    }
1443    is(UopSplitType.VEC_RGATHER) {
1444      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1445        for (i <- 0 until len)
1446          for (j <- 0 until len) {
1447            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1448            // csBundle(i * len + j).srcType(1) := SrcType.vp
1449            // csBundle(i * len + j).srcType(2) := SrcType.vp
1450            csBundle(i * len + j).lsrc(0) := src1 + i.U
1451            csBundle(i * len + j).lsrc(1) := src2 + j.U
1452            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1453            csBundle(i * len + j).lsrc(2) := vd_old
1454            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1455            csBundle(i * len + j).ldest := vd
1456            csBundle(i * len + j).uopIdx := (i * len + j).U
1457          }
1458      }
1459      switch(vlmulReg) {
1460        is("b001".U ){
1461          genCsBundle_VEC_RGATHER(2)
1462        }
1463        is("b010".U ){
1464          genCsBundle_VEC_RGATHER(4)
1465        }
1466        is("b011".U ){
1467          genCsBundle_VEC_RGATHER(8)
1468        }
1469      }
1470    }
1471    is(UopSplitType.VEC_RGATHER_VX) {
1472      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1473        for (i <- 0 until len)
1474          for (j <- 0 until len) {
1475            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1476            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1477            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1478            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1479            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1480            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1481            csBundle(i * len + j + 1).lsrc(2) := vd_old
1482            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1483            csBundle(i * len + j + 1).ldest := vd
1484            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1485          }
1486      }
1487      // i to vector move
1488      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1489      csBundle(0).srcType(1) := SrcType.imm
1490      csBundle(0).srcType(2) := SrcType.imm
1491      csBundle(0).lsrc(1) := 0.U
1492      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1493      csBundle(0).fuType := FuType.i2v.U
1494      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1495      csBundle(0).rfWen := false.B
1496      csBundle(0).fpWen := false.B
1497      csBundle(0).vecWen := true.B
1498      genCsBundle_RGATHER_VX(1)
1499      switch(vlmulReg) {
1500        is("b001".U ){
1501          genCsBundle_RGATHER_VX(2)
1502        }
1503        is("b010".U ){
1504          genCsBundle_RGATHER_VX(4)
1505        }
1506        is("b011".U ){
1507          genCsBundle_RGATHER_VX(8)
1508        }
1509      }
1510    }
1511    is(UopSplitType.VEC_RGATHEREI16) {
1512      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1513        for (i <- 0 until len)
1514          for (j <- 0 until len) {
1515            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1516            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1517            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1518            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1519            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1520            csBundle((i * len + j)*2+0).ldest := vd0
1521            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1522            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1523            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1524            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1525            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1526            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1527            csBundle((i * len + j)*2+1).ldest := vd1
1528            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1529          }
1530      }
1531      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1532        for (i <- 0 until len)
1533          for (j <- 0 until len) {
1534            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1535            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1536            csBundle(i * len + j).lsrc(0) := src1 + i.U
1537            csBundle(i * len + j).lsrc(1) := src2 + j.U
1538            csBundle(i * len + j).lsrc(2) := vd_old
1539            csBundle(i * len + j).ldest := vd
1540            csBundle(i * len + j).uopIdx := (i * len + j).U
1541          }
1542      }
1543      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1544        for (i <- 0 until len)
1545          for (j <- 0 until len) {
1546            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1547            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1548            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1549            csBundle(i * len + j).lsrc(1) := src2 + j.U
1550            csBundle(i * len + j).lsrc(2) := vd_old
1551            csBundle(i * len + j).ldest := vd
1552            csBundle(i * len + j).uopIdx := (i * len + j).U
1553          }
1554      }
1555      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1556        for (i <- 0 until len)
1557          for (j <- 0 until len) {
1558            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1559            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1560            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1561            csBundle(i * len + j).lsrc(1) := src2 + j.U
1562            csBundle(i * len + j).lsrc(2) := vd_old
1563            csBundle(i * len + j).ldest := vd
1564            csBundle(i * len + j).uopIdx := (i * len + j).U
1565          }
1566      }
1567      when(!vsewReg.orR){
1568        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1569      }.elsewhen(vsewReg === VSew.e32){
1570        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1571      }.elsewhen(vsewReg === VSew.e64){
1572        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1573      }.otherwise{
1574        genCsBundle_VEC_RGATHEREI16(1)
1575      }
1576      switch(vlmulReg) {
1577        is("b001".U) {
1578          when(!vsewReg.orR) {
1579            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1580          }.elsewhen(vsewReg === VSew.e32){
1581            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1582          }.elsewhen(vsewReg === VSew.e64){
1583            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1584          }.otherwise{
1585            genCsBundle_VEC_RGATHEREI16(2)
1586          }
1587        }
1588        is("b010".U) {
1589          when(!vsewReg.orR) {
1590            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1591          }.elsewhen(vsewReg === VSew.e32){
1592            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1593          }.elsewhen(vsewReg === VSew.e64){
1594            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1595          }.otherwise{
1596            genCsBundle_VEC_RGATHEREI16(4)
1597          }
1598        }
1599        is("b011".U) {
1600          when(vsewReg === VSew.e32){
1601            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1602          }.elsewhen(vsewReg === VSew.e64){
1603            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1604          }.otherwise{
1605            genCsBundle_VEC_RGATHEREI16(8)
1606          }
1607        }
1608      }
1609    }
1610    is(UopSplitType.VEC_COMPRESS) {
1611      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1612        for (i <- 0 until len) {
1613          val jlen = if (i == len-1) i+1 else i+2
1614          for (j <- 0 until jlen) {
1615            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1616            val vd = if(i==len-1) (dest + j.U) else {
1617              if (j == i+1) VECTOR_TMP_REG_LMUL.U  else (VECTOR_TMP_REG_LMUL + j + 1).U
1618            }
1619            csBundle(i*(i+3)/2 + j).vecWen := true.B
1620            csBundle(i*(i+3)/2 + j).v0Wen := false.B
1621            val src13Type = if (j == i+1) DontCare else SrcType.vp
1622            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1623            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1624            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1625            if (i == 0) {
1626              csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1627            } else {
1628              csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1629            }
1630            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1631            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1632            csBundle(i*(i+3)/2 + j).ldest := vd
1633            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1634          }
1635        }
1636      }
1637      switch(vlmulReg) {
1638        is("b001".U ){
1639          genCsBundle_VEC_COMPRESS(2)
1640        }
1641        is("b010".U ){
1642          genCsBundle_VEC_COMPRESS(4)
1643        }
1644        is("b011".U ){
1645          genCsBundle_VEC_COMPRESS(8)
1646        }
1647      }
1648    }
1649    is(UopSplitType.VEC_MVNR) {
1650      for (i <- 0 until MAX_VLMUL) {
1651        csBundle(i).lsrc(0) := src1 + i.U
1652        csBundle(i).lsrc(1) := src2 + i.U
1653        csBundle(i).lsrc(2) := dest + i.U
1654        csBundle(i).ldest := dest + i.U
1655        csBundle(i).uopIdx := i.U
1656      }
1657    }
1658    is(UopSplitType.VEC_US_LDST) {
1659      /*
1660      FMV.D.X
1661       */
1662      csBundle(0).srcType(0) := SrcType.reg
1663      csBundle(0).srcType(1) := SrcType.imm
1664      csBundle(0).lsrc(1) := 0.U
1665      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1666      csBundle(0).fuType := FuType.i2v.U
1667      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1668      csBundle(0).rfWen := false.B
1669      csBundle(0).fpWen := false.B
1670      csBundle(0).vecWen := true.B
1671      csBundle(0).vlsInstr := true.B
1672      //LMUL
1673      for (i <- 0 until MAX_VLMUL) {
1674        csBundle(i + 1).srcType(0) := SrcType.vp
1675        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1676        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1677        csBundle(i + 1).ldest := dest + i.U
1678        csBundle(i + 1).uopIdx := i.U
1679        csBundle(i + 1).vlsInstr := true.B
1680      }
1681      csBundle.head.waitForward := isUsSegment
1682      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1683    }
1684    is(UopSplitType.VEC_S_LDST) {
1685      /*
1686      FMV.D.X
1687       */
1688      csBundle(0).srcType(0) := SrcType.reg
1689      csBundle(0).srcType(1) := SrcType.imm
1690      csBundle(0).lsrc(1) := 0.U
1691      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1692      csBundle(0).fuType := FuType.i2v.U
1693      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1694      csBundle(0).rfWen := false.B
1695      csBundle(0).fpWen := false.B
1696      csBundle(0).vecWen := true.B
1697      csBundle(0).vlsInstr := true.B
1698
1699      csBundle(1).srcType(0) := SrcType.reg
1700      csBundle(1).srcType(1) := SrcType.imm
1701      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1702      csBundle(1).lsrc(1) := 0.U
1703      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1704      csBundle(1).fuType := FuType.i2v.U
1705      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1706      csBundle(1).rfWen := false.B
1707      csBundle(1).fpWen := false.B
1708      csBundle(1).vecWen := true.B
1709      csBundle(1).vlsInstr := true.B
1710
1711      //LMUL
1712      for (i <- 0 until MAX_VLMUL) {
1713        csBundle(i + 2).srcType(0) := SrcType.vp
1714        csBundle(i + 2).srcType(1) := SrcType.vp
1715        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1716        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1717        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1718        csBundle(i + 2).ldest := dest + i.U
1719        csBundle(i + 2).uopIdx := i.U
1720        csBundle(i + 2).vlsInstr := true.B
1721      }
1722      csBundle.head.waitForward := isSdSegment
1723      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1724    }
1725    is(UopSplitType.VEC_I_LDST) {
1726      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1727        for (i <- 0 until MAX_VLMUL) {
1728          val vecWen = if (i < lmul * nf) true.B else false.B
1729          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1730          csBundle(i + 1).srcType(0) := SrcType.vp
1731          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1732          csBundle(i + 1).srcType(1) := SrcType.no
1733          csBundle(i + 1).lsrc(1) := src2 + i.U
1734          csBundle(i + 1).srcType(2) := src2Type
1735          csBundle(i + 1).lsrc(2) := dest + i.U
1736          csBundle(i + 1).ldest := dest + i.U
1737          csBundle(i + 1).rfWen := false.B
1738          csBundle(i + 1).fpWen := false.B
1739          csBundle(i + 1).vecWen := vecWen
1740          csBundle(i + 1).uopIdx := i.U
1741          csBundle(i + 1).vlsInstr := true.B
1742        }
1743      }
1744      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1745        for (i <- 0 until MAX_VLMUL) {
1746          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1747          csBundle(i + 1).srcType(1) := src1Type
1748          csBundle(i + 1).lsrc(1) := src2 + i.U
1749        }
1750      }
1751
1752      val vlmul = vlmulReg
1753      val vsew = Cat(0.U(1.W), vsewReg)
1754      val veew = Cat(0.U(1.W), width)
1755      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1756      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Seq(
1757        "b001".U -> 1.U,
1758        "b010".U -> 2.U,
1759        "b011".U -> 3.U
1760      ))
1761      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Seq(
1762        "b001".U -> 1.U,
1763        "b010".U -> 2.U,
1764        "b011".U -> 3.U
1765      ))
1766      csBundle(0).srcType(0) := SrcType.reg
1767      csBundle(0).srcType(1) := SrcType.imm
1768      csBundle(0).lsrc(1) := 0.U
1769      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1770      csBundle(0).fuType := FuType.i2v.U
1771      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1772      csBundle(0).rfWen := false.B
1773      csBundle(0).fpWen := false.B
1774      csBundle(0).vecWen := true.B
1775      csBundle(0).vlsInstr := true.B
1776
1777      //LMUL
1778      when(nf === 0.U) {
1779        for (i <- 0 until MAX_VLMUL) {
1780          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1781          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1782          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1783          csBundle(i + 1).srcType(0) := SrcType.vp
1784          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1785          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1786          csBundle(i + 1).srcType(2) := SrcType.vp
1787          // lsrc2 is old vd
1788          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1789          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1790          csBundle(i + 1).uopIdx := i.U
1791          csBundle(i + 1).vlsInstr := true.B
1792        }
1793      }.otherwise{
1794        // nf > 1, is segment indexed load/store
1795        // gen src0, vd
1796        switch(simple_lmul) {
1797          is(0.U) {
1798            switch(nf) {
1799              is(1.U) {
1800                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1801              }
1802              is(2.U) {
1803                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1804              }
1805              is(3.U) {
1806                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1807              }
1808              is(4.U) {
1809                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1810              }
1811              is(5.U) {
1812                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1813              }
1814              is(6.U) {
1815                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1816              }
1817              is(7.U) {
1818                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1819              }
1820            }
1821          }
1822          is(1.U) {
1823            switch(nf) {
1824              is(1.U) {
1825                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1826              }
1827              is(2.U) {
1828                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1829              }
1830              is(3.U) {
1831                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1832              }
1833            }
1834          }
1835          is(2.U) {
1836            switch(nf) {
1837              is(1.U) {
1838                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1839              }
1840            }
1841          }
1842        }
1843
1844        // gen src1
1845        switch(simple_emul) {
1846          is(0.U) {
1847            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1848          }
1849          is(1.U) {
1850            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1851          }
1852          is(2.U) {
1853            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1854          }
1855          is(3.U) {
1856            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1857          }
1858        }
1859
1860        // when is vstore instructions, not set vecwen
1861        when(isVstore) {
1862          for (i <- 0 until MAX_VLMUL) {
1863            csBundle(i + 1).vecWen := false.B
1864          }
1865        }
1866      }
1867      csBundle.head.waitForward := isIxSegment
1868      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1869    }
1870  }
1871
1872  //readyFromRename Counter
1873  val readyCounter = Mux(outReadys.head, RenameWidth.U, 0.U)
1874
1875  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1876  val thisAllOut = uopRes <= readyCounter
1877
1878  switch(state) {
1879    is(s_idle) {
1880      when (inValid) {
1881        stateNext := s_active
1882        uopResNext := inUopInfo.numOfUop
1883      }
1884    }
1885    is(s_active) {
1886      when (thisAllOut) {
1887        when (inValid) {
1888          stateNext := s_active
1889          uopResNext := inUopInfo.numOfUop
1890        }.otherwise {
1891          stateNext := s_idle
1892          uopResNext := 0.U
1893        }
1894      }.otherwise {
1895        stateNext := s_active
1896        uopResNext := uopRes - readyCounter
1897      }
1898    }
1899  }
1900
1901  state := Mux(io.redirect, s_idle, stateNext)
1902  uopRes := Mux(io.redirect, 0.U, uopResNext)
1903
1904  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1905
1906  fixedDecodedInst := csBundle
1907
1908  // when vstart is not zero, the last uop will modify vstart to zero
1909  // therefore, blockback and flush pipe
1910  fixedDecodedInst(numOfUop - 1.U).flushPipe := (vstartReg =/= 0.U) || latchedInst.flushPipe
1911
1912  for(i <- 0 until RenameWidth) {
1913    outValids(i) := complexNum > i.U
1914    outDecodedInsts(i) := fixedDecodedInst(i.U + numOfUop - uopRes)
1915  }
1916
1917  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1918  inReady := state === s_idle || state === s_active && thisAllOut
1919
1920
1921  XSError(inValid && inUopInfo.numOfUop === 0.U,
1922    p"uop number ${inUopInfo.numOfUop} is illegal, cannot be zero")
1923//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1924//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1925//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1926//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1927//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1928//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1929//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1930//
1931//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1932//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1933//    0.U)
1934//  validToRename.zipWithIndex.foreach{
1935//    case(dst, i) =>
1936//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1937//      dst := MuxCase(false.B, Seq(
1938//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1939//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1940//      ).toSeq)
1941//  }
1942//
1943//  readyToIBuf.zipWithIndex.foreach {
1944//    case (dst, i) =>
1945//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1946//      dst := MuxCase(true.B, Seq(
1947//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1948//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1949//      ).toSeq)
1950//  }
1951//
1952//  io.deq.decodedInsts := decodedInsts
1953//  io.deq.complexNum := complexNum
1954//  io.deq.validToRename := validToRename
1955//  io.deq.readyToIBuf := readyToIBuf
1956}
1957