xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 9fabe3237d2f82aa0e850cd73e6175834346bed4)
1/***************************************************************************************
2  * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3  * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4  * Copyright (c) 2020-2021 Peng Cheng Laboratory
5  *
6  * XiangShan is licensed under Mulan PSL v2.
7  * You can use this software according to the terms and conditions of the Mulan PSL v2.
8  * You may obtain a copy of Mulan PSL v2 at:
9  *          http://license.coscl.org.cn/MulanPSL2
10  *
11  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14  *
15  * See the Mulan PSL v2 for more details.
16  ***************************************************************************************/
17
18package xiangshan.backend.decode
19
20import org.chipsalliance.cde.config.Parameters
21import chisel3._
22import chisel3.util._
23import freechips.rocketchip.rocket.Instructions
24import freechips.rocketchip.util.uintToBitPat
25import utils._
26import utility._
27import xiangshan.ExceptionNO.illegalInstr
28import xiangshan._
29import xiangshan.backend.fu.fpu.FPU
30import xiangshan.backend.fu.FuType
31import freechips.rocketchip.rocket.Instructions._
32import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
33import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
34import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul, Vl}
35import yunsuan.VpermType
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 32 // 32~46  ->  15
82  val VECTOR_COMPRESS = 1 // in v0 regfile
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153  val vstartReg = latchedInst.vpu.vstart
154
155  //Type of uop Div
156  val typeOfSplit = latchedInst.uopSplitType
157  val src1Type = latchedInst.srcType(0)
158  val src1IsImm = src1Type === SrcType.imm
159  val src1IsFp = src1Type === SrcType.fp
160
161  val isVstore = FuType.isVStore(latchedInst.fuType)
162
163  // exception generator
164  val vecException = Module(new VecExceptionGen)
165  vecException.io.inst := latchedInst.instr
166  vecException.io.decodedInst := latchedInst
167  vecException.io.vtype := latchedInst.vpu.vtype
168  vecException.io.vstart := latchedInst.vpu.vstart
169  val illegalInst = vecException.io.illegalInst
170
171  numOfUop := latchedUopInfo.numOfUop
172  numOfWB := latchedUopInfo.numOfWB
173
174  //uops dispatch
175  val s_idle :: s_active :: Nil = Enum(2)
176  val state = RegInit(s_idle)
177  val stateNext = WireDefault(state)
178  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
179  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
180  val uopResNext = WireInit(uopRes)
181  val e64 = 3.U(2.W)
182  val isUsSegment = instFields.MOP === 0.U && ((nf =/= 0.U && instFields.LUMOP === 0.U) || instFields.LUMOP === "b10000".U)
183  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
184  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
185
186  //uop div up to maxUopSize
187  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
188  val fixedDecodedInst = Wire(Vec(maxUopSize, new DecodedInst))
189
190  csBundle.foreach { case dst =>
191    dst := latchedInst
192    dst.numUops := latchedUopInfo.numOfUop
193    dst.numWB := latchedUopInfo.numOfWB
194    dst.exceptionVec(ExceptionNO.EX_II) := latchedInst.exceptionVec(ExceptionNO.EX_II) || illegalInst
195    dst.firstUop := false.B
196    dst.lastUop := false.B
197    dst.vlsInstr := false.B
198  }
199
200  csBundle(0).firstUop := true.B
201  csBundle(numOfUop - 1.U).lastUop := true.B
202
203  // when vstart is not zero, the last uop will modify vstart to zero
204  // therefore, blockback and flush pipe
205  csBundle(numOfUop - 1.U).blockBackward := vstartReg =/= 0.U
206  csBundle(0.U).flushPipe := vstartReg =/= 0.U
207
208  switch(typeOfSplit) {
209    is(UopSplitType.VSET) {
210      // In simple decoder, rfWen and vecWen are not set
211      when(isVsetSimple) {
212        // Default
213        // uop0 set rd, never flushPipe
214        csBundle(0).fuType := FuType.vsetiwi.U
215        csBundle(0).flushPipe := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U)
216        csBundle(0).blockBackward := false.B
217        csBundle(0).rfWen := true.B
218        // uop1 set vl, vsetvl will flushPipe
219        csBundle(1).ldest := Vl_IDX.U
220        csBundle(1).vecWen := false.B
221        csBundle(1).vlWen := true.B
222        csBundle(1).flushPipe := false.B
223        csBundle(1).blockBackward := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U)
224        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
225          // write nothing, uop0 is a nop instruction
226          csBundle(0).rfWen := false.B
227          csBundle(0).fpWen := false.B
228          csBundle(0).vecWen := false.B
229          csBundle(0).vlWen := false.B
230          csBundle(1).fuType := FuType.vsetfwf.U
231          csBundle(1).srcType(0) := SrcType.no
232          csBundle(1).srcType(2) := SrcType.no
233          csBundle(1).srcType(3) := SrcType.no
234          csBundle(1).srcType(4) := SrcType.vp
235          csBundle(1).lsrc(4) := Vl_IDX.U
236        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
237          // uop0: mv vtype gpr to vector region
238          csBundle(0).srcType(0) := SrcType.xp
239          csBundle(0).srcType(1) := SrcType.no
240          csBundle(0).lsrc(0) := src2
241          csBundle(0).lsrc(1) := 0.U
242          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
243          csBundle(0).fuType := FuType.i2v.U
244          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
245          csBundle(0).rfWen := false.B
246          csBundle(0).fpWen := false.B
247          csBundle(0).vecWen := true.B
248          csBundle(0).vlWen := false.B
249          // uop1: uvsetvcfg_vv
250          csBundle(1).fuType := FuType.vsetfwf.U
251          // vl
252          csBundle(1).srcType(0) := SrcType.no
253          csBundle(1).srcType(2) := SrcType.no
254          csBundle(1).srcType(3) := SrcType.no
255          csBundle(1).srcType(4) := SrcType.vp
256          csBundle(1).lsrc(4) := Vl_IDX.U
257          // vtype
258          csBundle(1).srcType(1) := SrcType.vp
259          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
260          csBundle(1).vecWen := false.B
261          csBundle(1).vlWen := true.B
262          csBundle(1).ldest := Vl_IDX.U
263        }.elsewhen(dest === 0.U) {
264          // write nothing, uop0 is a nop instruction
265          csBundle(0).rfWen := false.B
266          csBundle(0).fpWen := false.B
267          csBundle(0).vecWen := false.B
268          csBundle(0).vlWen := false.B
269        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) {
270          // because vsetvl may modified src2 when src2 == rd,
271          // we need to modify vd in second uop to avoid dependency
272          // uop0 set vl
273          csBundle(0).fuType := FuType.vsetiwf.U
274          csBundle(0).ldest := Vl_IDX.U
275          csBundle(0).rfWen := false.B
276          csBundle(0).vlWen := true.B
277          // uop1 set rd
278          csBundle(1).fuType := FuType.vsetiwi.U
279          csBundle(1).ldest := dest
280          csBundle(1).rfWen := true.B
281          csBundle(1).vlWen := false.B
282        }
283        // use bypass vtype from vtypeGen
284        csBundle(0).vpu.connectVType(io.vtypeBypass)
285        csBundle(1).vpu.connectVType(io.vtypeBypass)
286      }
287    }
288    is(UopSplitType.VEC_VVV) {
289      for (i <- 0 until MAX_VLMUL) {
290        csBundle(i).lsrc(0) := src1 + i.U
291        csBundle(i).lsrc(1) := src2 + i.U
292        csBundle(i).lsrc(2) := dest + i.U
293        csBundle(i).ldest := dest + i.U
294        csBundle(i).uopIdx := i.U
295      }
296    }
297    is(UopSplitType.VEC_VFV) {
298      /*
299      f to vector move
300       */
301      csBundle(0).srcType(0) := SrcType.fp
302      csBundle(0).srcType(1) := SrcType.imm
303      csBundle(0).srcType(2) := SrcType.imm
304      csBundle(0).lsrc(1) := 0.U
305      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
306      csBundle(0).fuType := FuType.f2v.U
307      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
308      csBundle(0).vecWen := true.B
309      csBundle(0).vpu.isReverse := false.B
310      /*
311      LMUL
312       */
313      for (i <- 0 until MAX_VLMUL) {
314        csBundle(i + 1).srcType(0) := SrcType.vp
315        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
316        csBundle(i + 1).lsrc(1) := src2 + i.U
317        csBundle(i + 1).lsrc(2) := dest + i.U
318        csBundle(i + 1).ldest := dest + i.U
319        csBundle(i + 1).uopIdx := i.U
320      }
321    }
322    is(UopSplitType.VEC_EXT2) {
323      for (i <- 0 until MAX_VLMUL / 2) {
324        csBundle(2 * i).lsrc(1) := src2 + i.U
325        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
326        csBundle(2 * i).ldest := dest + (2 * i).U
327        csBundle(2 * i).uopIdx := (2 * i).U
328        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
329        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
330        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
331        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
332      }
333    }
334    is(UopSplitType.VEC_EXT4) {
335      for (i <- 0 until MAX_VLMUL / 4) {
336        csBundle(4 * i).lsrc(1) := src2 + i.U
337        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
338        csBundle(4 * i).ldest := dest + (4 * i).U
339        csBundle(4 * i).uopIdx := (4 * i).U
340        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
341        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
342        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
343        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
344        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
345        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
346        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
347        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
348        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
349        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
350        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
351        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
352      }
353    }
354    is(UopSplitType.VEC_EXT8) {
355      for (i <- 0 until MAX_VLMUL) {
356        csBundle(i).lsrc(1) := src2
357        csBundle(i).lsrc(2) := dest + i.U
358        csBundle(i).ldest := dest + i.U
359        csBundle(i).uopIdx := i.U
360      }
361    }
362    is(UopSplitType.VEC_0XV) {
363      /*
364      i/f to vector move
365       */
366      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
367      csBundle(0).srcType(1) := SrcType.imm
368      csBundle(0).srcType(2) := SrcType.imm
369      csBundle(0).lsrc(1) := 0.U
370      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
371      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
372      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
373      csBundle(0).rfWen := false.B
374      csBundle(0).fpWen := false.B
375      csBundle(0).vecWen := true.B
376      /*
377      vmv.s.x
378       */
379      csBundle(1).srcType(0) := SrcType.vp
380      csBundle(1).srcType(1) := SrcType.imm
381      csBundle(1).srcType(2) := SrcType.vp
382      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
383      csBundle(1).lsrc(1) := 0.U
384      csBundle(1).lsrc(2) := dest
385      csBundle(1).ldest := dest
386      csBundle(1).rfWen := false.B
387      csBundle(1).fpWen := false.B
388      csBundle(1).vecWen := true.B
389      csBundle(1).uopIdx := 0.U
390    }
391    is(UopSplitType.VEC_VXV) {
392      /*
393      i to vector move
394       */
395      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
396      csBundle(0).srcType(1) := SrcType.imm
397      csBundle(0).srcType(2) := SrcType.imm
398      csBundle(0).lsrc(1) := 0.U
399      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
400      csBundle(0).fuType := FuType.i2v.U
401      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
402      csBundle(0).vecWen := true.B
403      csBundle(0).vpu.isReverse := false.B
404      /*
405      LMUL
406       */
407      for (i <- 0 until MAX_VLMUL) {
408        csBundle(i + 1).srcType(0) := SrcType.vp
409        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
410        csBundle(i + 1).lsrc(1) := src2 + i.U
411        csBundle(i + 1).lsrc(2) := dest + i.U
412        csBundle(i + 1).ldest := dest + i.U
413        csBundle(i + 1).uopIdx := i.U
414      }
415    }
416    is(UopSplitType.VEC_VVW) {
417      for (i <- 0 until MAX_VLMUL / 2) {
418        csBundle(2 * i).lsrc(0) := src1 + i.U
419        csBundle(2 * i).lsrc(1) := src2 + i.U
420        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
421        csBundle(2 * i).ldest := dest + (2 * i).U
422        csBundle(2 * i).uopIdx := (2 * i).U
423        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
424        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
425        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
426        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
427        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
428      }
429    }
430    is(UopSplitType.VEC_VFW) {
431      /*
432      f to vector move
433       */
434      csBundle(0).srcType(0) := SrcType.fp
435      csBundle(0).srcType(1) := SrcType.imm
436      csBundle(0).srcType(2) := SrcType.imm
437      csBundle(0).lsrc(1) := 0.U
438      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
439      csBundle(0).fuType := FuType.f2v.U
440      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
441      csBundle(0).rfWen := false.B
442      csBundle(0).fpWen := false.B
443      csBundle(0).vecWen := true.B
444
445      for (i <- 0 until MAX_VLMUL / 2) {
446        csBundle(2 * i + 1).srcType(0) := SrcType.vp
447        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
448        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
449        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
450        csBundle(2 * i + 1).ldest := dest + (2 * i).U
451        csBundle(2 * i + 1).uopIdx := (2 * i).U
452        csBundle(2 * i + 2).srcType(0) := SrcType.vp
453        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
454        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
455        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
456        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
457        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
458      }
459    }
460    is(UopSplitType.VEC_WVW) {
461      for (i <- 0 until MAX_VLMUL / 2) {
462        csBundle(2 * i).lsrc(0) := src1 + i.U
463        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
464        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
465        csBundle(2 * i).ldest := dest + (2 * i).U
466        csBundle(2 * i).uopIdx := (2 * i).U
467        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
468        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
469        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
470        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
471        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
472      }
473    }
474    is(UopSplitType.VEC_VXW) {
475      /*
476      i to vector move
477       */
478      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
479      csBundle(0).srcType(1) := SrcType.imm
480      csBundle(0).srcType(2) := SrcType.imm
481      csBundle(0).lsrc(1) := 0.U
482      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
483      csBundle(0).fuType := FuType.i2v.U
484      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
485      csBundle(0).vecWen := true.B
486
487      for (i <- 0 until MAX_VLMUL / 2) {
488        csBundle(2 * i + 1).srcType(0) := SrcType.vp
489        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
490        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
491        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
492        csBundle(2 * i + 1).ldest := dest + (2 * i).U
493        csBundle(2 * i + 1).uopIdx := (2 * i).U
494        csBundle(2 * i + 2).srcType(0) := SrcType.vp
495        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
496        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
497        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
498        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
499        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
500      }
501    }
502    is(UopSplitType.VEC_WXW) {
503      /*
504      i to vector move
505       */
506      csBundle(0).srcType(0) := SrcType.reg
507      csBundle(0).srcType(1) := SrcType.imm
508      csBundle(0).srcType(2) := SrcType.imm
509      csBundle(0).lsrc(1) := 0.U
510      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
511      csBundle(0).fuType := FuType.i2v.U
512      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
513      csBundle(0).vecWen := true.B
514
515      for (i <- 0 until MAX_VLMUL / 2) {
516        csBundle(2 * i + 1).srcType(0) := SrcType.vp
517        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
518        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
519        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
520        csBundle(2 * i + 1).ldest := dest + (2 * i).U
521        csBundle(2 * i + 1).uopIdx := (2 * i).U
522        csBundle(2 * i + 2).srcType(0) := SrcType.vp
523        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
524        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
525        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
526        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
527        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
528      }
529    }
530    is(UopSplitType.VEC_WVV) {
531      for (i <- 0 until MAX_VLMUL / 2) {
532
533        csBundle(2 * i).lsrc(0) := src1 + i.U
534        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
535        csBundle(2 * i).lsrc(2) := dest + i.U
536        csBundle(2 * i).ldest := dest + i.U
537        csBundle(2 * i).uopIdx := (2 * i).U
538        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
539        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
540        csBundle(2 * i + 1).lsrc(2) := dest + i.U
541        csBundle(2 * i + 1).ldest := dest + i.U
542        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
543      }
544    }
545    is(UopSplitType.VEC_WFW) {
546      /*
547      f to vector move
548       */
549      csBundle(0).srcType(0) := SrcType.fp
550      csBundle(0).srcType(1) := SrcType.imm
551      csBundle(0).srcType(2) := SrcType.imm
552      csBundle(0).lsrc(1) := 0.U
553      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
554      csBundle(0).fuType := FuType.f2v.U
555      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
556      csBundle(0).rfWen := false.B
557      csBundle(0).fpWen := false.B
558      csBundle(0).vecWen := true.B
559
560      for (i <- 0 until MAX_VLMUL / 2) {
561        csBundle(2 * i + 1).srcType(0) := SrcType.vp
562        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
563        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
564        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
565        csBundle(2 * i + 1).ldest := dest + (2 * i).U
566        csBundle(2 * i + 1).uopIdx := (2 * i).U
567        csBundle(2 * i + 2).srcType(0) := SrcType.vp
568        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
569        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
570        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
571        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
572        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
573      }
574    }
575    is(UopSplitType.VEC_WXV) {
576      /*
577      i to vector move
578       */
579      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
580      csBundle(0).srcType(1) := SrcType.imm
581      csBundle(0).srcType(2) := SrcType.imm
582      csBundle(0).lsrc(1) := 0.U
583      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
584      csBundle(0).fuType := FuType.i2v.U
585      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
586      csBundle(0).vecWen := true.B
587
588      for (i <- 0 until MAX_VLMUL / 2) {
589        csBundle(2 * i + 1).srcType(0) := SrcType.vp
590        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
591        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
592        csBundle(2 * i + 1).lsrc(2) := dest + i.U
593        csBundle(2 * i + 1).ldest := dest + i.U
594        csBundle(2 * i + 1).uopIdx := (2 * i).U
595        csBundle(2 * i + 2).srcType(0) := SrcType.vp
596        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
597        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
598        csBundle(2 * i + 2).lsrc(2) := dest + i.U
599        csBundle(2 * i + 2).ldest := dest + i.U
600        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
601      }
602    }
603    is(UopSplitType.VEC_VVM) {
604      csBundle(0).lsrc(2) := dest
605      csBundle(0).ldest := dest
606      csBundle(0).uopIdx := 0.U
607      for (i <- 1 until MAX_VLMUL) {
608        csBundle(i).lsrc(0) := src1 + i.U
609        csBundle(i).lsrc(1) := src2 + i.U
610        csBundle(i).lsrc(2) := dest
611        csBundle(i).ldest := dest
612        csBundle(i).uopIdx := i.U
613      }
614    }
615    is(UopSplitType.VEC_VFM) {
616      /*
617      f to vector move
618       */
619      csBundle(0).srcType(0) := SrcType.fp
620      csBundle(0).srcType(1) := SrcType.imm
621      csBundle(0).srcType(2) := SrcType.imm
622      csBundle(0).lsrc(1) := 0.U
623      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
624      csBundle(0).fuType := FuType.f2v.U
625      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
626      csBundle(0).rfWen := false.B
627      csBundle(0).fpWen := false.B
628      csBundle(0).vecWen := true.B
629      //LMUL
630      csBundle(1).srcType(0) := SrcType.vp
631      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
632      csBundle(1).lsrc(2) := dest
633      csBundle(1).ldest := dest
634      csBundle(1).uopIdx := 0.U
635      for (i <- 1 until MAX_VLMUL) {
636        csBundle(i + 1).srcType(0) := SrcType.vp
637        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
638        csBundle(i + 1).lsrc(1) := src2 + i.U
639        csBundle(i + 1).lsrc(2) := dest
640        csBundle(i + 1).ldest := dest
641        csBundle(i + 1).uopIdx := i.U
642      }
643      csBundle(numOfUop - 1.U).ldest := dest
644    }
645    is(UopSplitType.VEC_VXM) {
646      /*
647      i to vector move
648       */
649      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
650      csBundle(0).srcType(1) := SrcType.imm
651      csBundle(0).srcType(2) := SrcType.imm
652      csBundle(0).lsrc(1) := 0.U
653      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
654      csBundle(0).fuType := FuType.i2v.U
655      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
656      csBundle(0).vecWen := true.B
657      //LMUL
658      csBundle(1).srcType(0) := SrcType.vp
659      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
660      csBundle(1).lsrc(2) := dest
661      csBundle(1).ldest := dest
662      csBundle(1).uopIdx := 0.U
663      for (i <- 1 until MAX_VLMUL) {
664        csBundle(i + 1).srcType(0) := SrcType.vp
665        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
666        csBundle(i + 1).lsrc(1) := src2 + i.U
667        csBundle(i + 1).lsrc(2) := dest
668        csBundle(i + 1).ldest := dest
669        csBundle(i + 1).uopIdx := i.U
670      }
671      csBundle(numOfUop - 1.U).ldest := dest
672    }
673    is(UopSplitType.VEC_SLIDE1UP) {
674      /*
675      i to vector move
676       */
677      csBundle(0).srcType(0) := SrcType.reg
678      csBundle(0).srcType(1) := SrcType.imm
679      csBundle(0).srcType(2) := SrcType.imm
680      csBundle(0).lsrc(1) := 0.U
681      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
682      csBundle(0).fuType := FuType.i2v.U
683      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
684      csBundle(0).vecWen := true.B
685      //LMUL
686      csBundle(1).srcType(0) := SrcType.vp
687      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
688      csBundle(1).lsrc(2) := dest
689      csBundle(1).ldest := dest
690      csBundle(1).uopIdx := 0.U
691      for (i <- 1 until MAX_VLMUL) {
692        csBundle(i + 1).srcType(0) := SrcType.vp
693        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
694        csBundle(i + 1).lsrc(1) := src2 + i.U
695        csBundle(i + 1).lsrc(2) := dest + i.U
696        csBundle(i + 1).ldest := dest + i.U
697        csBundle(i + 1).uopIdx := i.U
698      }
699    }
700    is(UopSplitType.VEC_FSLIDE1UP) {
701      /*
702      f to vector move
703       */
704      csBundle(0).srcType(0) := SrcType.fp
705      csBundle(0).srcType(1) := SrcType.imm
706      csBundle(0).srcType(2) := SrcType.imm
707      csBundle(0).lsrc(1) := 0.U
708      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
709      csBundle(0).fuType := FuType.f2v.U
710      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
711      csBundle(0).rfWen := false.B
712      csBundle(0).fpWen := false.B
713      csBundle(0).vecWen := true.B
714      //LMUL
715      csBundle(1).srcType(0) := SrcType.vp
716      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
717      csBundle(1).lsrc(1) := src2
718      csBundle(1).lsrc(2) := dest
719      csBundle(1).ldest := dest
720      csBundle(1).uopIdx := 0.U
721      for (i <- 1 until MAX_VLMUL) {
722        csBundle(i + 1).srcType(0) := SrcType.vp
723        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
724        csBundle(i + 1).lsrc(1) := src2 + i.U
725        csBundle(i + 1).lsrc(2) := dest + i.U
726        csBundle(i + 1).ldest := dest + i.U
727        csBundle(i + 1).uopIdx := i.U
728      }
729    }
730    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
731      /*
732      i to vector move
733       */
734      csBundle(0).srcType(0) := SrcType.reg
735      csBundle(0).srcType(1) := SrcType.imm
736      csBundle(0).srcType(2) := SrcType.imm
737      csBundle(0).lsrc(1) := 0.U
738      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
739      csBundle(0).fuType := FuType.i2v.U
740      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
741      csBundle(0).vecWen := true.B
742      //LMUL
743      for (i <- 0 until MAX_VLMUL) {
744        csBundle(2 * i + 1).srcType(0) := SrcType.vp
745        csBundle(2 * i + 1).srcType(1) := SrcType.vp
746        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
747        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
748        csBundle(2 * i + 1).lsrc(2) := dest + i.U
749        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
750        csBundle(2 * i + 1).uopIdx := (2 * i).U
751        if (2 * i + 2 < MAX_VLMUL * 2) {
752          csBundle(2 * i + 2).srcType(0) := SrcType.vp
753          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
754          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
755          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
756          csBundle(2 * i + 2).ldest := dest + i.U
757          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
758        }
759      }
760      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
761      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
762      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
763    }
764    is(UopSplitType.VEC_FSLIDE1DOWN) {
765      /*
766      f to vector move
767       */
768      csBundle(0).srcType(0) := SrcType.fp
769      csBundle(0).srcType(1) := SrcType.imm
770      csBundle(0).srcType(2) := SrcType.imm
771      csBundle(0).lsrc(1) := 0.U
772      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
773      csBundle(0).fuType := FuType.f2v.U
774      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
775      csBundle(0).rfWen := false.B
776      csBundle(0).fpWen := false.B
777      csBundle(0).vecWen := true.B
778      //LMUL
779      for (i <- 0 until MAX_VLMUL) {
780        csBundle(2 * i + 1).srcType(0) := SrcType.vp
781        csBundle(2 * i + 1).srcType(1) := SrcType.vp
782        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
783        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
784        csBundle(2 * i + 1).lsrc(2) := dest + i.U
785        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
786        csBundle(2 * i + 1).uopIdx := (2 * i).U
787        if (2 * i + 2 < MAX_VLMUL * 2) {
788          csBundle(2 * i + 2).srcType(0) := SrcType.vp
789          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
790          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
791          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
792          csBundle(2 * i + 2).ldest := dest + i.U
793          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
794        }
795      }
796      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
797      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
798      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
799    }
800    is(UopSplitType.VEC_VRED) {
801      when(vlmulReg === "b001".U) {
802        csBundle(0).srcType(2) := SrcType.DC
803        csBundle(0).lsrc(0) := src2 + 1.U
804        csBundle(0).lsrc(1) := src2
805        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
806        csBundle(0).uopIdx := 0.U
807      }
808      when(vlmulReg === "b010".U) {
809        csBundle(0).srcType(2) := SrcType.DC
810        csBundle(0).lsrc(0) := src2 + 1.U
811        csBundle(0).lsrc(1) := src2
812        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
813        csBundle(0).uopIdx := 0.U
814
815        csBundle(1).srcType(2) := SrcType.DC
816        csBundle(1).lsrc(0) := src2 + 3.U
817        csBundle(1).lsrc(1) := src2 + 2.U
818        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
819        csBundle(1).uopIdx := 1.U
820
821        csBundle(2).srcType(2) := SrcType.DC
822        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
823        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
824        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
825        csBundle(2).uopIdx := 2.U
826      }
827      when(vlmulReg === "b011".U) {
828        for (i <- 0 until MAX_VLMUL) {
829          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
830            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
831            csBundle(i).lsrc(1) := src2 + (i * 2).U
832            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
833          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
834            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
835            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
836            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
837          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
838            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
839            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
840            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
841          }
842          csBundle(i).srcType(2) := SrcType.DC
843          csBundle(i).uopIdx := i.U
844        }
845      }
846      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
847        /*
848         * 2 <= vlmul <= 8
849         */
850        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
851        csBundle(numOfUop - 1.U).lsrc(0) := src1
852        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
853        csBundle(numOfUop - 1.U).lsrc(2) := dest
854        csBundle(numOfUop - 1.U).ldest := dest
855        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
856      }
857    }
858    is(UopSplitType.VEC_VFRED) {
859      val vlmul = vlmulReg
860      val vsew = vsewReg
861      when(vlmul === VLmul.m8){
862        for (i <- 0 until 4) {
863          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
864          csBundle(i).lsrc(1) := src2 + (i * 2).U
865          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
866          csBundle(i).uopIdx := i.U
867        }
868        for (i <- 4 until 6) {
869          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
870          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
871          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
872          csBundle(i).uopIdx := i.U
873        }
874        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
875        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
876        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
877        csBundle(6).uopIdx := 6.U
878        when(vsew === VSew.e64) {
879          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
880          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
881          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
882          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
883          csBundle(7).uopIdx := 7.U
884          csBundle(8).lsrc(0) := src1
885          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
886          csBundle(8).ldest := dest
887          csBundle(8).uopIdx := 8.U
888        }
889        when(vsew === VSew.e32) {
890          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
891          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
892          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
893          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
894          csBundle(7).uopIdx := 7.U
895          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
896          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
897          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
898          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
899          csBundle(8).uopIdx := 8.U
900          csBundle(9).lsrc(0) := src1
901          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
902          csBundle(9).ldest := dest
903          csBundle(9).uopIdx := 9.U
904        }
905        when(vsew === VSew.e16) {
906          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
907          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
908          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
909          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
910          csBundle(7).uopIdx := 7.U
911          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
912          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
913          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
914          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
915          csBundle(8).uopIdx := 8.U
916          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
917          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
918          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
919          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
920          csBundle(9).uopIdx := 9.U
921          csBundle(10).lsrc(0) := src1
922          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
923          csBundle(10).ldest := dest
924          csBundle(10).uopIdx := 10.U
925        }
926      }
927      when(vlmul === VLmul.m4) {
928        for (i <- 0 until 2) {
929          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
930          csBundle(i).lsrc(1) := src2 + (i * 2).U
931          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
932          csBundle(i).uopIdx := i.U
933        }
934        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
935        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
936        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
937        csBundle(2).uopIdx := 2.U
938        when(vsew === VSew.e64) {
939          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
940          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
941          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
942          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
943          csBundle(3).uopIdx := 3.U
944          csBundle(4).lsrc(0) := src1
945          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
946          csBundle(4).ldest := dest
947          csBundle(4).uopIdx := 4.U
948        }
949        when(vsew === VSew.e32) {
950          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
951          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
952          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
953          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
954          csBundle(3).uopIdx := 3.U
955          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
956          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
957          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
958          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
959          csBundle(4).uopIdx := 4.U
960          csBundle(5).lsrc(0) := src1
961          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
962          csBundle(5).ldest := dest
963          csBundle(5).uopIdx := 5.U
964        }
965        when(vsew === VSew.e16) {
966          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
967          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
968          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
969          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
970          csBundle(3).uopIdx := 3.U
971          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
972          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
973          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
974          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
975          csBundle(4).uopIdx := 4.U
976          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
977          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
978          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
979          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
980          csBundle(5).uopIdx := 5.U
981          csBundle(6).lsrc(0) := src1
982          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
983          csBundle(6).ldest := dest
984          csBundle(6).uopIdx := 6.U
985        }
986      }
987      when(vlmul === VLmul.m2) {
988        csBundle(0).lsrc(0) := src2 + 1.U
989        csBundle(0).lsrc(1) := src2 + 0.U
990        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
991        csBundle(0).uopIdx := 0.U
992        when(vsew === VSew.e64) {
993          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
994          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
995          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
996          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
997          csBundle(1).uopIdx := 1.U
998          csBundle(2).lsrc(0) := src1
999          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1000          csBundle(2).ldest := dest
1001          csBundle(2).uopIdx := 2.U
1002        }
1003        when(vsew === VSew.e32) {
1004          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1005          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1006          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1007          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1008          csBundle(1).uopIdx := 1.U
1009          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1010          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1011          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1012          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1013          csBundle(2).uopIdx := 2.U
1014          csBundle(3).lsrc(0) := src1
1015          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1016          csBundle(3).ldest := dest
1017          csBundle(3).uopIdx := 3.U
1018        }
1019        when(vsew === VSew.e16) {
1020          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1021          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1022          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1023          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1024          csBundle(1).uopIdx := 1.U
1025          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1026          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1027          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1028          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1029          csBundle(2).uopIdx := 2.U
1030          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1031          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1032          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1033          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
1034          csBundle(3).uopIdx := 3.U
1035          csBundle(4).lsrc(0) := src1
1036          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1037          csBundle(4).ldest := dest
1038          csBundle(4).uopIdx := 4.U
1039        }
1040      }
1041      when(vlmul === VLmul.m1) {
1042        when(vsew === VSew.e64) {
1043          csBundle(0).lsrc(0) := src2
1044          csBundle(0).lsrc(1) := src2
1045          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1046          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1047          csBundle(0).uopIdx := 0.U
1048          csBundle(1).lsrc(0) := src1
1049          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1050          csBundle(1).ldest := dest
1051          csBundle(1).uopIdx := 1.U
1052        }
1053        when(vsew === VSew.e32) {
1054          csBundle(0).lsrc(0) := src2
1055          csBundle(0).lsrc(1) := src2
1056          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1057          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1058          csBundle(0).uopIdx := 0.U
1059          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1060          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1061          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1062          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1063          csBundle(1).uopIdx := 1.U
1064          csBundle(2).lsrc(0) := src1
1065          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1066          csBundle(2).ldest := dest
1067          csBundle(2).uopIdx := 2.U
1068        }
1069        when(vsew === VSew.e16) {
1070          csBundle(0).lsrc(0) := src2
1071          csBundle(0).lsrc(1) := src2
1072          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1073          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1074          csBundle(0).uopIdx := 0.U
1075          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1076          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1077          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1078          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1079          csBundle(1).uopIdx := 1.U
1080          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1081          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1082          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1083          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1084          csBundle(2).uopIdx := 2.U
1085          csBundle(3).lsrc(0) := src1
1086          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1087          csBundle(3).ldest := dest
1088          csBundle(3).uopIdx := 3.U
1089        }
1090      }
1091      when(vlmul === VLmul.mf2) {
1092        when(vsew === VSew.e32) {
1093          csBundle(0).lsrc(0) := src2
1094          csBundle(0).lsrc(1) := src2
1095          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1096          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1097          csBundle(0).uopIdx := 0.U
1098          csBundle(1).lsrc(0) := src1
1099          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1100          csBundle(1).ldest := dest
1101          csBundle(1).uopIdx := 1.U
1102        }
1103        when(vsew === VSew.e16) {
1104          csBundle(0).lsrc(0) := src2
1105          csBundle(0).lsrc(1) := src2
1106          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1107          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1108          csBundle(0).uopIdx := 0.U
1109          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1110          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1111          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1112          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1113          csBundle(1).uopIdx := 1.U
1114          csBundle(2).lsrc(0) := src1
1115          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1116          csBundle(2).ldest := dest
1117          csBundle(2).uopIdx := 2.U
1118        }
1119      }
1120      when(vlmul === VLmul.mf4) {
1121        when(vsew === VSew.e16) {
1122          csBundle(0).lsrc(0) := src2
1123          csBundle(0).lsrc(1) := src2
1124          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1125          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1126          csBundle(0).uopIdx := 0.U
1127          csBundle(1).lsrc(0) := src1
1128          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1129          csBundle(1).ldest := dest
1130          csBundle(1).uopIdx := 1.U
1131        }
1132      }
1133    }
1134
1135    is(UopSplitType.VEC_VFREDOSUM) {
1136      import yunsuan.VfaluType
1137      val vlmul = vlmulReg
1138      val vsew = vsewReg
1139      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1140      when(vlmul === VLmul.m8) {
1141        when(vsew === VSew.e64) {
1142          val vlmax = 16
1143          for (i <- 0 until vlmax) {
1144            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1145            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1146            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1149            csBundle(i).uopIdx := i.U
1150          }
1151        }
1152        when(vsew === VSew.e32) {
1153          val vlmax = 32
1154          for (i <- 0 until vlmax) {
1155            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1156            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1157            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1158            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1160            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1161            csBundle(i).uopIdx := i.U
1162          }
1163        }
1164        when(vsew === VSew.e16) {
1165          val vlmax = 64
1166          for (i <- 0 until vlmax) {
1167            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1168            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1169            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1170            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1172            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1173            csBundle(i).uopIdx := i.U
1174          }
1175        }
1176      }
1177      when(vlmul === VLmul.m4) {
1178        when(vsew === VSew.e64) {
1179          val vlmax = 8
1180          for (i <- 0 until vlmax) {
1181            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1182            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1183            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1185            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1186            csBundle(i).uopIdx := i.U
1187          }
1188        }
1189        when(vsew === VSew.e32) {
1190          val vlmax = 16
1191          for (i <- 0 until vlmax) {
1192            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1193            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1194            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1195            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1197            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1198            csBundle(i).uopIdx := i.U
1199          }
1200        }
1201        when(vsew === VSew.e16) {
1202          val vlmax = 32
1203          for (i <- 0 until vlmax) {
1204            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1205            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1206            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1207            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1208            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1209            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1210            csBundle(i).uopIdx := i.U
1211          }
1212        }
1213      }
1214      when(vlmul === VLmul.m2) {
1215        when(vsew === VSew.e64) {
1216          val vlmax = 4
1217          for (i <- 0 until vlmax) {
1218            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1219            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1220            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1223            csBundle(i).uopIdx := i.U
1224          }
1225        }
1226        when(vsew === VSew.e32) {
1227          val vlmax = 8
1228          for (i <- 0 until vlmax) {
1229            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1230            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1231            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1232            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1234            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1235            csBundle(i).uopIdx := i.U
1236          }
1237        }
1238        when(vsew === VSew.e16) {
1239          val vlmax = 16
1240          for (i <- 0 until vlmax) {
1241            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1242            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1243            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1244            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1245            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1246            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1247            csBundle(i).uopIdx := i.U
1248          }
1249        }
1250      }
1251      when(vlmul === VLmul.m1) {
1252        when(vsew === VSew.e64) {
1253          val vlmax = 2
1254          for (i <- 0 until vlmax) {
1255            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1256            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1257            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1260            csBundle(i).uopIdx := i.U
1261          }
1262        }
1263        when(vsew === VSew.e32) {
1264          val vlmax = 4
1265          for (i <- 0 until vlmax) {
1266            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1267            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1268            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1269            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1270            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1271            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1272            csBundle(i).uopIdx := i.U
1273          }
1274        }
1275        when(vsew === VSew.e16) {
1276          val vlmax = 8
1277          for (i <- 0 until vlmax) {
1278            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1279            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1280            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1281            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1282            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1283            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1284            csBundle(i).uopIdx := i.U
1285          }
1286        }
1287      }
1288      when(vlmul === VLmul.mf2) {
1289        when(vsew === VSew.e32) {
1290          val vlmax = 2
1291          for (i <- 0 until vlmax) {
1292            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1293            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1294            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1295            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1296            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1297            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1298            csBundle(i).uopIdx := i.U
1299          }
1300        }
1301        when(vsew === VSew.e16) {
1302          val vlmax = 4
1303          for (i <- 0 until vlmax) {
1304            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1305            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1306            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1307            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1308            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1309            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1310            csBundle(i).uopIdx := i.U
1311          }
1312        }
1313      }
1314      when(vlmul === VLmul.mf4) {
1315        when(vsew === VSew.e16) {
1316          val vlmax = 2
1317          for (i <- 0 until vlmax) {
1318            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1319            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1320            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1321            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1322            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1323            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1324            csBundle(i).uopIdx := i.U
1325          }
1326        }
1327      }
1328    }
1329
1330    is(UopSplitType.VEC_SLIDEUP) {
1331      // i to vector move
1332      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1333      csBundle(0).srcType(1) := SrcType.imm
1334      csBundle(0).srcType(2) := SrcType.imm
1335      csBundle(0).lsrc(1) := 0.U
1336      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1337      csBundle(0).fuType := FuType.i2v.U
1338      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1339      csBundle(0).vecWen := true.B
1340      // LMUL
1341      for (i <- 0 until MAX_VLMUL)
1342        for (j <- 0 to i) {
1343          val old_vd = if (j == 0) {
1344            dest + i.U
1345          } else (VECTOR_TMP_REG_LMUL + j).U
1346          val vd = if (j == i) {
1347            dest + i.U
1348          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1349          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1350          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1351          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1352          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1353          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1354          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1355        }
1356    }
1357
1358    is(UopSplitType.VEC_SLIDEDOWN) {
1359      // i to vector move
1360      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1361      csBundle(0).srcType(1) := SrcType.imm
1362      csBundle(0).srcType(2) := SrcType.imm
1363      csBundle(0).lsrc(1) := 0.U
1364      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1365      csBundle(0).fuType := FuType.i2v.U
1366      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1367      csBundle(0).vecWen := true.B
1368      // LMUL
1369      for (i <- 0 until MAX_VLMUL)
1370        for (j <- (0 to i).reverse) {
1371          when(i.U < lmul) {
1372            val old_vd = if (j == 0) {
1373              dest + lmul - 1.U - i.U
1374            } else (VECTOR_TMP_REG_LMUL + j).U
1375            val vd = if (j == i) {
1376              dest + lmul - 1.U - i.U
1377            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1378            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1379            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1380            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1381            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1382            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1383            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1384          }
1385        }
1386    }
1387
1388    is(UopSplitType.VEC_M0X) {
1389      // LMUL
1390      for (i <- 0 until MAX_VLMUL) {
1391        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1392        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1393        csBundle(i).srcType(0) := srcType0
1394        csBundle(i).srcType(1) := SrcType.vp
1395        csBundle(i).rfWen := false.B
1396        csBundle(i).fpWen := false.B
1397        csBundle(i).vecWen := true.B
1398        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1399        csBundle(i).lsrc(1) := src2
1400        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1401        csBundle(i).ldest := ldest
1402        csBundle(i).uopIdx := i.U
1403      }
1404      csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B)
1405      csBundle(numOfUop - 1.U).fpWen := false.B
1406      csBundle(numOfUop - 1.U).vecWen := false.B
1407      csBundle(numOfUop - 1.U).ldest := dest
1408    }
1409
1410    is(UopSplitType.VEC_MVV) {
1411      // LMUL
1412      for (i <- 0 until MAX_VLMUL) {
1413        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1414        csBundle(i * 2 + 0).srcType(0) := srcType0
1415        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1416        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1417        csBundle(i * 2 + 0).lsrc(1) := src2
1418        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1419        csBundle(i * 2 + 0).ldest := dest + i.U
1420        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1421
1422        csBundle(i * 2 + 1).srcType(0) := srcType0
1423        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1424        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1425        csBundle(i * 2 + 1).lsrc(1) := src2
1426        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1427        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1428        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1429      }
1430    }
1431    is(UopSplitType.VEC_VWW) {
1432      for (i <- 0 until MAX_VLMUL*2) {
1433        when(i.U < lmul){
1434          csBundle(i).srcType(2) := SrcType.DC
1435          csBundle(i).lsrc(0) := src2 + i.U
1436          csBundle(i).lsrc(1) := src2 + i.U
1437          // csBundle(i).lsrc(2) := dest + (2 * i).U
1438          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1439          csBundle(i).uopIdx :=  i.U
1440        } otherwise {
1441          csBundle(i).srcType(2) := SrcType.DC
1442          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1443          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1444          // csBundle(i).lsrc(2) := dest + (2 * i).U
1445          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1446          csBundle(i).uopIdx := i.U
1447        }
1448        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1449        csBundle(numOfUop-1.U).lsrc(0) := src1
1450        csBundle(numOfUop-1.U).lsrc(2) := dest
1451        csBundle(numOfUop-1.U).ldest := dest
1452      }
1453    }
1454    is(UopSplitType.VEC_RGATHER) {
1455      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1456        for (i <- 0 until len)
1457          for (j <- 0 until len) {
1458            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1459            // csBundle(i * len + j).srcType(1) := SrcType.vp
1460            // csBundle(i * len + j).srcType(2) := SrcType.vp
1461            csBundle(i * len + j).lsrc(0) := src1 + i.U
1462            csBundle(i * len + j).lsrc(1) := src2 + j.U
1463            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1464            csBundle(i * len + j).lsrc(2) := vd_old
1465            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1466            csBundle(i * len + j).ldest := vd
1467            csBundle(i * len + j).uopIdx := (i * len + j).U
1468          }
1469      }
1470      switch(vlmulReg) {
1471        is("b001".U ){
1472          genCsBundle_VEC_RGATHER(2)
1473        }
1474        is("b010".U ){
1475          genCsBundle_VEC_RGATHER(4)
1476        }
1477        is("b011".U ){
1478          genCsBundle_VEC_RGATHER(8)
1479        }
1480      }
1481    }
1482    is(UopSplitType.VEC_RGATHER_VX) {
1483      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1484        for (i <- 0 until len)
1485          for (j <- 0 until len) {
1486            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1487            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1488            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1489            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1490            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1491            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1492            csBundle(i * len + j + 1).lsrc(2) := vd_old
1493            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1494            csBundle(i * len + j + 1).ldest := vd
1495            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1496          }
1497      }
1498      // i to vector move
1499      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1500      csBundle(0).srcType(1) := SrcType.imm
1501      csBundle(0).srcType(2) := SrcType.imm
1502      csBundle(0).lsrc(1) := 0.U
1503      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1504      csBundle(0).fuType := FuType.i2v.U
1505      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1506      csBundle(0).rfWen := false.B
1507      csBundle(0).fpWen := false.B
1508      csBundle(0).vecWen := true.B
1509      genCsBundle_RGATHER_VX(1)
1510      switch(vlmulReg) {
1511        is("b001".U ){
1512          genCsBundle_RGATHER_VX(2)
1513        }
1514        is("b010".U ){
1515          genCsBundle_RGATHER_VX(4)
1516        }
1517        is("b011".U ){
1518          genCsBundle_RGATHER_VX(8)
1519        }
1520      }
1521    }
1522    is(UopSplitType.VEC_RGATHEREI16) {
1523      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1524        for (i <- 0 until len)
1525          for (j <- 0 until len) {
1526            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1527            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1528            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1529            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1530            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1531            csBundle((i * len + j)*2+0).ldest := vd0
1532            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1533            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1534            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1535            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1536            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1537            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1538            csBundle((i * len + j)*2+1).ldest := vd1
1539            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1540          }
1541      }
1542      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1543        for (i <- 0 until len)
1544          for (j <- 0 until len) {
1545            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1546            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1547            csBundle(i * len + j).lsrc(0) := src1 + i.U
1548            csBundle(i * len + j).lsrc(1) := src2 + j.U
1549            csBundle(i * len + j).lsrc(2) := vd_old
1550            csBundle(i * len + j).ldest := vd
1551            csBundle(i * len + j).uopIdx := (i * len + j).U
1552          }
1553      }
1554      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1555        for (i <- 0 until len)
1556          for (j <- 0 until len) {
1557            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1558            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1559            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1560            csBundle(i * len + j).lsrc(1) := src2 + j.U
1561            csBundle(i * len + j).lsrc(2) := vd_old
1562            csBundle(i * len + j).ldest := vd
1563            csBundle(i * len + j).uopIdx := (i * len + j).U
1564          }
1565      }
1566      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1567        for (i <- 0 until len)
1568          for (j <- 0 until len) {
1569            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1570            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1571            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1572            csBundle(i * len + j).lsrc(1) := src2 + j.U
1573            csBundle(i * len + j).lsrc(2) := vd_old
1574            csBundle(i * len + j).ldest := vd
1575            csBundle(i * len + j).uopIdx := (i * len + j).U
1576          }
1577      }
1578      when(!vsewReg.orR){
1579        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1580      }.elsewhen(vsewReg === VSew.e32){
1581        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1582      }.elsewhen(vsewReg === VSew.e64){
1583        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1584      }.otherwise{
1585        genCsBundle_VEC_RGATHEREI16(1)
1586      }
1587      switch(vlmulReg) {
1588        is("b001".U) {
1589          when(!vsewReg.orR) {
1590            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1591          }.elsewhen(vsewReg === VSew.e32){
1592            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1593          }.elsewhen(vsewReg === VSew.e64){
1594            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1595          }.otherwise{
1596            genCsBundle_VEC_RGATHEREI16(2)
1597          }
1598        }
1599        is("b010".U) {
1600          when(!vsewReg.orR) {
1601            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1602          }.elsewhen(vsewReg === VSew.e32){
1603            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1604          }.elsewhen(vsewReg === VSew.e64){
1605            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1606          }.otherwise{
1607            genCsBundle_VEC_RGATHEREI16(4)
1608          }
1609        }
1610        is("b011".U) {
1611          when(vsewReg === VSew.e32){
1612            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1613          }.elsewhen(vsewReg === VSew.e64){
1614            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1615          }.otherwise{
1616            genCsBundle_VEC_RGATHEREI16(8)
1617          }
1618        }
1619      }
1620    }
1621    is(UopSplitType.VEC_COMPRESS) {
1622      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1623        for (i <- 0 until len) {
1624          val jlen = if (i == len-1) i+1 else i+2
1625          for (j <- 0 until jlen) {
1626            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1627            val vd = if(i==len-1) (dest + j.U) else {
1628              if (j == i+1) VECTOR_TMP_REG_LMUL.U  else (VECTOR_TMP_REG_LMUL + j + 1).U
1629            }
1630            csBundle(i*(i+3)/2 + j).vecWen := true.B
1631            csBundle(i*(i+3)/2 + j).v0Wen := false.B
1632            val src13Type = if (j == i+1) DontCare else SrcType.vp
1633            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1634            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1635            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1636            if (i == 0) {
1637              csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1638            } else {
1639              csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1640            }
1641            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1642            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1643            csBundle(i*(i+3)/2 + j).ldest := vd
1644            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1645          }
1646        }
1647      }
1648      switch(vlmulReg) {
1649        is("b001".U ){
1650          genCsBundle_VEC_COMPRESS(2)
1651        }
1652        is("b010".U ){
1653          genCsBundle_VEC_COMPRESS(4)
1654        }
1655        is("b011".U ){
1656          genCsBundle_VEC_COMPRESS(8)
1657        }
1658      }
1659    }
1660    is(UopSplitType.VEC_MVNR) {
1661      for (i <- 0 until MAX_VLMUL) {
1662        csBundle(i).lsrc(0) := src1 + i.U
1663        csBundle(i).lsrc(1) := src2 + i.U
1664        csBundle(i).lsrc(2) := dest + i.U
1665        csBundle(i).ldest := dest + i.U
1666        csBundle(i).uopIdx := i.U
1667      }
1668    }
1669    is(UopSplitType.VEC_US_LDST) {
1670      /*
1671      FMV.D.X
1672       */
1673      csBundle(0).srcType(0) := SrcType.reg
1674      csBundle(0).srcType(1) := SrcType.imm
1675      csBundle(0).lsrc(1) := 0.U
1676      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1677      csBundle(0).fuType := FuType.i2v.U
1678      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1679      csBundle(0).rfWen := false.B
1680      csBundle(0).fpWen := false.B
1681      csBundle(0).vecWen := true.B
1682      csBundle(0).vlsInstr := true.B
1683      //LMUL
1684      for (i <- 0 until MAX_VLMUL) {
1685        csBundle(i + 1).srcType(0) := SrcType.vp
1686        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1687        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1688        csBundle(i + 1).ldest := dest + i.U
1689        csBundle(i + 1).uopIdx := i.U
1690        csBundle(i + 1).vlsInstr := true.B
1691      }
1692      csBundle.head.waitForward := isUsSegment
1693      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1694    }
1695    is(UopSplitType.VEC_US_FF_LD) {
1696      csBundle(0).srcType(0) := SrcType.reg
1697      csBundle(0).srcType(1) := SrcType.imm
1698      csBundle(0).lsrc(1) := 0.U
1699      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1700      csBundle(0).fuType := FuType.i2v.U
1701      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1702      csBundle(0).rfWen := false.B
1703      csBundle(0).fpWen := false.B
1704      csBundle(0).vecWen := true.B
1705      csBundle(0).vlsInstr := true.B
1706      //LMUL
1707      for (i <- 0 until MAX_VLMUL) {
1708        csBundle(i + 1).srcType(0) := SrcType.vp
1709        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1710        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1711        csBundle(i + 1).ldest := dest + i.U
1712        csBundle(i + 1).uopIdx := i.U
1713        csBundle(i + 1).vlsInstr := true.B
1714      }
1715      csBundle.head.waitForward := isUsSegment
1716      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1717      // last uop read vl and write vl
1718      csBundle(numOfUop - 1.U).srcType(0) := SrcType.no
1719      csBundle(numOfUop - 1.U).srcType(1) := SrcType.no
1720      csBundle(numOfUop - 1.U).srcType(2) := SrcType.no
1721      csBundle(numOfUop - 1.U).srcType(3) := SrcType.no
1722      csBundle(numOfUop - 1.U).srcType(4) := SrcType.vp
1723      csBundle(numOfUop - 1.U).lsrc(4) := Vl_IDX.U
1724      // vtype
1725      csBundle(numOfUop - 1.U).vecWen := false.B
1726      csBundle(numOfUop - 1.U).vlWen := true.B
1727      csBundle(numOfUop - 1.U).ldest := Vl_IDX.U
1728    }
1729    is(UopSplitType.VEC_S_LDST) {
1730      /*
1731      FMV.D.X
1732       */
1733      csBundle(0).srcType(0) := SrcType.reg
1734      csBundle(0).srcType(1) := SrcType.imm
1735      csBundle(0).lsrc(1) := 0.U
1736      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1737      csBundle(0).fuType := FuType.i2v.U
1738      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1739      csBundle(0).rfWen := false.B
1740      csBundle(0).fpWen := false.B
1741      csBundle(0).vecWen := true.B
1742      csBundle(0).vlsInstr := true.B
1743
1744      csBundle(1).srcType(0) := SrcType.reg
1745      csBundle(1).srcType(1) := SrcType.imm
1746      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1747      csBundle(1).lsrc(1) := 0.U
1748      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1749      csBundle(1).fuType := FuType.i2v.U
1750      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1751      csBundle(1).rfWen := false.B
1752      csBundle(1).fpWen := false.B
1753      csBundle(1).vecWen := true.B
1754      csBundle(1).vlsInstr := true.B
1755
1756      //LMUL
1757      for (i <- 0 until MAX_VLMUL) {
1758        csBundle(i + 2).srcType(0) := SrcType.vp
1759        csBundle(i + 2).srcType(1) := SrcType.vp
1760        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1761        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1762        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1763        csBundle(i + 2).ldest := dest + i.U
1764        csBundle(i + 2).uopIdx := i.U
1765        csBundle(i + 2).vlsInstr := true.B
1766      }
1767      csBundle.head.waitForward := isSdSegment
1768      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1769    }
1770    is(UopSplitType.VEC_I_LDST) {
1771      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1772        for (i <- 0 until MAX_VLMUL) {
1773          val vecWen = if (i < lmul * nf) true.B else false.B
1774          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1775          csBundle(i + 1).srcType(0) := SrcType.vp
1776          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1777          csBundle(i + 1).srcType(1) := SrcType.no
1778          csBundle(i + 1).lsrc(1) := src2 + i.U
1779          csBundle(i + 1).srcType(2) := src2Type
1780          csBundle(i + 1).lsrc(2) := dest + i.U
1781          csBundle(i + 1).ldest := dest + i.U
1782          csBundle(i + 1).rfWen := false.B
1783          csBundle(i + 1).fpWen := false.B
1784          csBundle(i + 1).vecWen := vecWen
1785          csBundle(i + 1).uopIdx := i.U
1786          csBundle(i + 1).vlsInstr := true.B
1787        }
1788      }
1789      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1790        for (i <- 0 until MAX_VLMUL) {
1791          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1792          csBundle(i + 1).srcType(1) := src1Type
1793          csBundle(i + 1).lsrc(1) := src2 + i.U
1794        }
1795      }
1796
1797      val vlmul = vlmulReg
1798      val vsew = Cat(0.U(1.W), vsewReg)
1799      val veew = Cat(0.U(1.W), width)
1800      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1801      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Seq(
1802        "b001".U -> 1.U,
1803        "b010".U -> 2.U,
1804        "b011".U -> 3.U
1805      ))
1806      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Seq(
1807        "b001".U -> 1.U,
1808        "b010".U -> 2.U,
1809        "b011".U -> 3.U
1810      ))
1811      csBundle(0).srcType(0) := SrcType.reg
1812      csBundle(0).srcType(1) := SrcType.imm
1813      csBundle(0).lsrc(1) := 0.U
1814      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1815      csBundle(0).fuType := FuType.i2v.U
1816      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1817      csBundle(0).rfWen := false.B
1818      csBundle(0).fpWen := false.B
1819      csBundle(0).vecWen := true.B
1820      csBundle(0).vlsInstr := true.B
1821
1822      //LMUL
1823      when(nf === 0.U) {
1824        for (i <- 0 until MAX_VLMUL) {
1825          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1826          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1827          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1828          csBundle(i + 1).srcType(0) := SrcType.vp
1829          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1830          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1831          csBundle(i + 1).srcType(2) := SrcType.vp
1832          // lsrc2 is old vd
1833          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1834          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1835          csBundle(i + 1).uopIdx := i.U
1836          csBundle(i + 1).vlsInstr := true.B
1837        }
1838      }.otherwise{
1839        // nf > 1, is segment indexed load/store
1840        // gen src0, vd
1841        switch(simple_lmul) {
1842          is(0.U) {
1843            switch(nf) {
1844              is(1.U) {
1845                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1846              }
1847              is(2.U) {
1848                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1849              }
1850              is(3.U) {
1851                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1852              }
1853              is(4.U) {
1854                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1855              }
1856              is(5.U) {
1857                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1858              }
1859              is(6.U) {
1860                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1861              }
1862              is(7.U) {
1863                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1864              }
1865            }
1866          }
1867          is(1.U) {
1868            switch(nf) {
1869              is(1.U) {
1870                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1871              }
1872              is(2.U) {
1873                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1874              }
1875              is(3.U) {
1876                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1877              }
1878            }
1879          }
1880          is(2.U) {
1881            switch(nf) {
1882              is(1.U) {
1883                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1884              }
1885            }
1886          }
1887        }
1888
1889        // gen src1
1890        switch(simple_emul) {
1891          is(0.U) {
1892            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1893          }
1894          is(1.U) {
1895            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1896          }
1897          is(2.U) {
1898            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1899          }
1900          is(3.U) {
1901            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1902          }
1903        }
1904
1905        // when is vstore instructions, not set vecwen
1906        when(isVstore) {
1907          for (i <- 0 until MAX_VLMUL) {
1908            csBundle(i + 1).vecWen := false.B
1909          }
1910        }
1911      }
1912      csBundle.head.waitForward := isIxSegment
1913      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1914    }
1915  }
1916
1917  //readyFromRename Counter
1918  val readyCounter = Mux(outReadys.head, RenameWidth.U, 0.U)
1919
1920  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1921  val thisAllOut = uopRes <= readyCounter
1922
1923  switch(state) {
1924    is(s_idle) {
1925      when (inValid) {
1926        stateNext := s_active
1927        uopResNext := inUopInfo.numOfUop
1928      }
1929    }
1930    is(s_active) {
1931      when (thisAllOut) {
1932        when (inValid) {
1933          stateNext := s_active
1934          uopResNext := inUopInfo.numOfUop
1935        }.otherwise {
1936          stateNext := s_idle
1937          uopResNext := 0.U
1938        }
1939      }.otherwise {
1940        stateNext := s_active
1941        uopResNext := uopRes - readyCounter
1942      }
1943    }
1944  }
1945
1946  state := Mux(io.redirect, s_idle, stateNext)
1947  uopRes := Mux(io.redirect, 0.U, uopResNext)
1948
1949  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1950
1951  fixedDecodedInst := csBundle
1952
1953  // when vstart is not zero, the last uop will modify vstart to zero
1954  // therefore, blockback and flush pipe
1955  fixedDecodedInst(numOfUop - 1.U).flushPipe := (vstartReg =/= 0.U) || latchedInst.flushPipe
1956
1957  for(i <- 0 until RenameWidth) {
1958    outValids(i) := complexNum > i.U
1959    outDecodedInsts(i) := fixedDecodedInst(i.U + numOfUop - uopRes)
1960  }
1961
1962  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1963  inReady := state === s_idle || state === s_active && thisAllOut
1964
1965
1966  XSError(inValid && inUopInfo.numOfUop === 0.U,
1967    p"uop number ${inUopInfo.numOfUop} is illegal, cannot be zero")
1968//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1969//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1970//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1971//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1972//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1973//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1974//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1975//
1976//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1977//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1978//    0.U)
1979//  validToRename.zipWithIndex.foreach{
1980//    case(dst, i) =>
1981//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1982//      dst := MuxCase(false.B, Seq(
1983//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1984//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1985//      ).toSeq)
1986//  }
1987//
1988//  readyToIBuf.zipWithIndex.foreach {
1989//    case (dst, i) =>
1990//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1991//      dst := MuxCase(true.B, Seq(
1992//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1993//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1994//      ).toSeq)
1995//  }
1996//
1997//  io.deq.decodedInsts := decodedInsts
1998//  io.deq.complexNum := complexNum
1999//  io.deq.validToRename := validToRename
2000//  io.deq.readyToIBuf := readyToIBuf
2001}
2002