xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision b37ee2eea1a9a583b6e7cd5cd3645faa13d4f235)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
82  val VECTOR_COMPRESS = 1 // in v0 regfile
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153
154  //Type of uop Div
155  val typeOfSplit = latchedInst.uopSplitType
156  val src1Type = latchedInst.srcType(0)
157  val src1IsImm = src1Type === SrcType.imm
158  val src1IsFp = src1Type === SrcType.fp
159
160  val isVstore = FuType.isVStore(latchedInst.fuType)
161
162  numOfUop := latchedUopInfo.numOfUop
163  numOfWB := latchedUopInfo.numOfWB
164
165  //uops dispatch
166  val s_idle :: s_active :: Nil = Enum(2)
167  val state = RegInit(s_idle)
168  val stateNext = WireDefault(state)
169  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
170  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
171  val uopResNext = WireInit(uopRes)
172  val e64 = 3.U(2.W)
173  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
174  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
175  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
176
177  //uop div up to maxUopSize
178  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
179  csBundle.foreach { case dst =>
180    dst := latchedInst
181    dst.numUops := latchedUopInfo.numOfUop
182    dst.numWB := latchedUopInfo.numOfWB
183    dst.firstUop := false.B
184    dst.lastUop := false.B
185    dst.vlsInstr := false.B
186  }
187
188  csBundle(0).firstUop := true.B
189  csBundle(numOfUop - 1.U).lastUop := true.B
190
191  switch(typeOfSplit) {
192    is(UopSplitType.VSET) {
193      // In simple decoder, rfWen and vecWen are not set
194      when(isVsetSimple) {
195        // Default
196        // uop0 set rd, never flushPipe
197        csBundle(0).fuType := FuType.vsetiwi.U
198        csBundle(0).flushPipe := false.B
199        csBundle(0).rfWen := true.B
200        // uop1 set vl, vsetvl will flushPipe
201        csBundle(1).ldest := Vl_IDX.U
202        csBundle(1).vecWen := false.B
203        csBundle(1).vlWen := true.B
204        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
205          // write nothing, uop0 is a nop instruction
206          csBundle(0).rfWen := false.B
207          csBundle(0).fpWen := false.B
208          csBundle(0).vecWen := false.B
209          csBundle(0).vlWen := false.B
210          csBundle(1).fuType := FuType.vsetfwf.U
211          csBundle(1).srcType(0) := SrcType.no
212          csBundle(1).srcType(2) := SrcType.no
213          csBundle(1).srcType(3) := SrcType.no
214          csBundle(1).srcType(4) := SrcType.vp
215          csBundle(1).lsrc(4) := Vl_IDX.U
216        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
217          // uop0: mv vtype gpr to vector region
218          csBundle(0).srcType(0) := SrcType.xp
219          csBundle(0).srcType(1) := SrcType.no
220          csBundle(0).lsrc(0) := src2
221          csBundle(0).lsrc(1) := 0.U
222          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
223          csBundle(0).fuType := FuType.i2v.U
224          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
225          csBundle(0).rfWen := false.B
226          csBundle(0).fpWen := false.B
227          csBundle(0).vecWen := true.B
228          csBundle(0).vlWen := false.B
229          csBundle(0).flushPipe := false.B
230          // uop1: uvsetvcfg_vv
231          csBundle(1).fuType := FuType.vsetfwf.U
232          // vl
233          csBundle(1).srcType(0) := SrcType.no
234          csBundle(1).srcType(2) := SrcType.no
235          csBundle(1).srcType(3) := SrcType.no
236          csBundle(1).srcType(4) := SrcType.vp
237          csBundle(1).lsrc(4) := Vl_IDX.U
238          // vtype
239          csBundle(1).srcType(1) := SrcType.vp
240          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
241          csBundle(1).vecWen := false.B
242          csBundle(1).vlWen := true.B
243          csBundle(1).ldest := Vl_IDX.U
244        }.elsewhen(dest === 0.U) {
245          // write nothing, uop0 is a nop instruction
246          csBundle(0).rfWen := false.B
247          csBundle(0).fpWen := false.B
248          csBundle(0).vecWen := false.B
249          csBundle(0).vlWen := false.B
250        }
251        // use bypass vtype from vtypeGen
252        csBundle(0).vpu.connectVType(io.vtypeBypass)
253        csBundle(1).vpu.connectVType(io.vtypeBypass)
254      }
255    }
256    is(UopSplitType.VEC_VVV) {
257      for (i <- 0 until MAX_VLMUL) {
258        csBundle(i).lsrc(0) := src1 + i.U
259        csBundle(i).lsrc(1) := src2 + i.U
260        csBundle(i).lsrc(2) := dest + i.U
261        csBundle(i).ldest := dest + i.U
262        csBundle(i).uopIdx := i.U
263      }
264    }
265    is(UopSplitType.VEC_VFV) {
266      /*
267      f to vector move
268       */
269      csBundle(0).srcType(0) := SrcType.fp
270      csBundle(0).srcType(1) := SrcType.imm
271      csBundle(0).srcType(2) := SrcType.imm
272      csBundle(0).lsrc(1) := 0.U
273      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
274      csBundle(0).fuType := FuType.f2v.U
275      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
276      csBundle(0).vecWen := true.B
277      csBundle(0).vpu.isReverse := false.B
278      /*
279      LMUL
280       */
281      for (i <- 0 until MAX_VLMUL) {
282        csBundle(i + 1).srcType(0) := SrcType.vp
283        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
284        csBundle(i + 1).lsrc(1) := src2 + i.U
285        csBundle(i + 1).lsrc(2) := dest + i.U
286        csBundle(i + 1).ldest := dest + i.U
287        csBundle(i + 1).uopIdx := i.U
288      }
289    }
290    is(UopSplitType.VEC_EXT2) {
291      for (i <- 0 until MAX_VLMUL / 2) {
292        csBundle(2 * i).lsrc(1) := src2 + i.U
293        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
294        csBundle(2 * i).ldest := dest + (2 * i).U
295        csBundle(2 * i).uopIdx := (2 * i).U
296        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
297        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
298        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
299        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
300      }
301    }
302    is(UopSplitType.VEC_EXT4) {
303      for (i <- 0 until MAX_VLMUL / 4) {
304        csBundle(4 * i).lsrc(1) := src2 + i.U
305        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
306        csBundle(4 * i).ldest := dest + (4 * i).U
307        csBundle(4 * i).uopIdx := (4 * i).U
308        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
309        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
310        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
311        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
312        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
313        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
314        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
315        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
316        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
317        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
318        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
319        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
320      }
321    }
322    is(UopSplitType.VEC_EXT8) {
323      for (i <- 0 until MAX_VLMUL) {
324        csBundle(i).lsrc(1) := src2
325        csBundle(i).lsrc(2) := dest + i.U
326        csBundle(i).ldest := dest + i.U
327        csBundle(i).uopIdx := i.U
328      }
329    }
330    is(UopSplitType.VEC_0XV) {
331      /*
332      i/f to vector move
333       */
334      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
335      csBundle(0).srcType(1) := SrcType.imm
336      csBundle(0).srcType(2) := SrcType.imm
337      csBundle(0).lsrc(1) := 0.U
338      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
339      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
340      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
341      csBundle(0).rfWen := false.B
342      csBundle(0).fpWen := false.B
343      csBundle(0).vecWen := true.B
344      /*
345      vmv.s.x
346       */
347      csBundle(1).srcType(0) := SrcType.vp
348      csBundle(1).srcType(1) := SrcType.imm
349      csBundle(1).srcType(2) := SrcType.vp
350      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
351      csBundle(1).lsrc(1) := 0.U
352      csBundle(1).lsrc(2) := dest
353      csBundle(1).ldest := dest
354      csBundle(1).rfWen := false.B
355      csBundle(1).fpWen := false.B
356      csBundle(1).vecWen := true.B
357      csBundle(1).uopIdx := 0.U
358    }
359    is(UopSplitType.VEC_VXV) {
360      /*
361      i to vector move
362       */
363      csBundle(0).srcType(0) := SrcType.reg
364      csBundle(0).srcType(1) := SrcType.imm
365      csBundle(0).srcType(2) := SrcType.imm
366      csBundle(0).lsrc(1) := 0.U
367      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
368      csBundle(0).fuType := FuType.i2v.U
369      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
370      csBundle(0).vecWen := true.B
371      csBundle(0).vpu.isReverse := false.B
372      /*
373      LMUL
374       */
375      for (i <- 0 until MAX_VLMUL) {
376        csBundle(i + 1).srcType(0) := SrcType.vp
377        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
378        csBundle(i + 1).lsrc(1) := src2 + i.U
379        csBundle(i + 1).lsrc(2) := dest + i.U
380        csBundle(i + 1).ldest := dest + i.U
381        csBundle(i + 1).uopIdx := i.U
382      }
383    }
384    is(UopSplitType.VEC_VVW) {
385      for (i <- 0 until MAX_VLMUL / 2) {
386        csBundle(2 * i).lsrc(0) := src1 + i.U
387        csBundle(2 * i).lsrc(1) := src2 + i.U
388        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
389        csBundle(2 * i).ldest := dest + (2 * i).U
390        csBundle(2 * i).uopIdx := (2 * i).U
391        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
392        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
393        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
394        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
395        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
396      }
397    }
398    is(UopSplitType.VEC_VFW) {
399      /*
400      f to vector move
401       */
402      csBundle(0).srcType(0) := SrcType.fp
403      csBundle(0).srcType(1) := SrcType.imm
404      csBundle(0).srcType(2) := SrcType.imm
405      csBundle(0).lsrc(1) := 0.U
406      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
407      csBundle(0).fuType := FuType.f2v.U
408      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
409      csBundle(0).rfWen := false.B
410      csBundle(0).fpWen := false.B
411      csBundle(0).vecWen := true.B
412
413      for (i <- 0 until MAX_VLMUL / 2) {
414        csBundle(2 * i + 1).srcType(0) := SrcType.vp
415        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
416        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
417        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
418        csBundle(2 * i + 1).ldest := dest + (2 * i).U
419        csBundle(2 * i + 1).uopIdx := (2 * i).U
420        csBundle(2 * i + 2).srcType(0) := SrcType.vp
421        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
422        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
423        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
424        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
425        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
426      }
427    }
428    is(UopSplitType.VEC_WVW) {
429      for (i <- 0 until MAX_VLMUL / 2) {
430        csBundle(2 * i).lsrc(0) := src1 + i.U
431        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
432        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
433        csBundle(2 * i).ldest := dest + (2 * i).U
434        csBundle(2 * i).uopIdx := (2 * i).U
435        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
436        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
437        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
438        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
439        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
440      }
441    }
442    is(UopSplitType.VEC_VXW) {
443      /*
444      i to vector move
445       */
446      csBundle(0).srcType(0) := SrcType.reg
447      csBundle(0).srcType(1) := SrcType.imm
448      csBundle(0).srcType(2) := SrcType.imm
449      csBundle(0).lsrc(1) := 0.U
450      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
451      csBundle(0).fuType := FuType.i2v.U
452      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
453      csBundle(0).vecWen := true.B
454
455      for (i <- 0 until MAX_VLMUL / 2) {
456        csBundle(2 * i + 1).srcType(0) := SrcType.vp
457        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
458        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
459        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
460        csBundle(2 * i + 1).ldest := dest + (2 * i).U
461        csBundle(2 * i + 1).uopIdx := (2 * i).U
462        csBundle(2 * i + 2).srcType(0) := SrcType.vp
463        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
464        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
465        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
466        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
467        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
468      }
469    }
470    is(UopSplitType.VEC_WXW) {
471      /*
472      i to vector move
473       */
474      csBundle(0).srcType(0) := SrcType.reg
475      csBundle(0).srcType(1) := SrcType.imm
476      csBundle(0).srcType(2) := SrcType.imm
477      csBundle(0).lsrc(1) := 0.U
478      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
479      csBundle(0).fuType := FuType.i2v.U
480      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
481      csBundle(0).vecWen := true.B
482
483      for (i <- 0 until MAX_VLMUL / 2) {
484        csBundle(2 * i + 1).srcType(0) := SrcType.vp
485        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
486        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
487        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
488        csBundle(2 * i + 1).ldest := dest + (2 * i).U
489        csBundle(2 * i + 1).uopIdx := (2 * i).U
490        csBundle(2 * i + 2).srcType(0) := SrcType.vp
491        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
492        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
493        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
494        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
495        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
496      }
497    }
498    is(UopSplitType.VEC_WVV) {
499      for (i <- 0 until MAX_VLMUL / 2) {
500
501        csBundle(2 * i).lsrc(0) := src1 + i.U
502        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
503        csBundle(2 * i).lsrc(2) := dest + i.U
504        csBundle(2 * i).ldest := dest + i.U
505        csBundle(2 * i).uopIdx := (2 * i).U
506        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
507        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
508        csBundle(2 * i + 1).lsrc(2) := dest + i.U
509        csBundle(2 * i + 1).ldest := dest + i.U
510        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
511      }
512    }
513    is(UopSplitType.VEC_WFW) {
514      /*
515      f to vector move
516       */
517      csBundle(0).srcType(0) := SrcType.fp
518      csBundle(0).srcType(1) := SrcType.imm
519      csBundle(0).srcType(2) := SrcType.imm
520      csBundle(0).lsrc(1) := 0.U
521      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
522      csBundle(0).fuType := FuType.f2v.U
523      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
524      csBundle(0).rfWen := false.B
525      csBundle(0).fpWen := false.B
526      csBundle(0).vecWen := true.B
527
528      for (i <- 0 until MAX_VLMUL / 2) {
529        csBundle(2 * i + 1).srcType(0) := SrcType.vp
530        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
531        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
532        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
533        csBundle(2 * i + 1).ldest := dest + (2 * i).U
534        csBundle(2 * i + 1).uopIdx := (2 * i).U
535        csBundle(2 * i + 2).srcType(0) := SrcType.vp
536        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
537        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
538        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
539        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
540        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
541      }
542    }
543    is(UopSplitType.VEC_WXV) {
544      /*
545      i to vector move
546       */
547      csBundle(0).srcType(0) := SrcType.reg
548      csBundle(0).srcType(1) := SrcType.imm
549      csBundle(0).srcType(2) := SrcType.imm
550      csBundle(0).lsrc(1) := 0.U
551      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
552      csBundle(0).fuType := FuType.i2v.U
553      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
554      csBundle(0).vecWen := true.B
555
556      for (i <- 0 until MAX_VLMUL / 2) {
557        csBundle(2 * i + 1).srcType(0) := SrcType.vp
558        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
559        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
560        csBundle(2 * i + 1).lsrc(2) := dest + i.U
561        csBundle(2 * i + 1).ldest := dest + i.U
562        csBundle(2 * i + 1).uopIdx := (2 * i).U
563        csBundle(2 * i + 2).srcType(0) := SrcType.vp
564        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
565        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
566        csBundle(2 * i + 2).lsrc(2) := dest + i.U
567        csBundle(2 * i + 2).ldest := dest + i.U
568        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
569      }
570    }
571    is(UopSplitType.VEC_VVM) {
572      csBundle(0).lsrc(2) := dest
573      csBundle(0).ldest := dest
574      csBundle(0).uopIdx := 0.U
575      for (i <- 1 until MAX_VLMUL) {
576        csBundle(i).lsrc(0) := src1 + i.U
577        csBundle(i).lsrc(1) := src2 + i.U
578        csBundle(i).lsrc(2) := dest
579        csBundle(i).ldest := dest
580        csBundle(i).uopIdx := i.U
581      }
582    }
583    is(UopSplitType.VEC_VFM) {
584      /*
585      f to vector move
586       */
587      csBundle(0).srcType(0) := SrcType.fp
588      csBundle(0).srcType(1) := SrcType.imm
589      csBundle(0).srcType(2) := SrcType.imm
590      csBundle(0).lsrc(1) := 0.U
591      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
592      csBundle(0).fuType := FuType.f2v.U
593      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
594      csBundle(0).rfWen := false.B
595      csBundle(0).fpWen := false.B
596      csBundle(0).vecWen := true.B
597      //LMUL
598      csBundle(1).srcType(0) := SrcType.vp
599      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
600      csBundle(1).lsrc(2) := dest
601      csBundle(1).ldest := dest
602      csBundle(1).uopIdx := 0.U
603      for (i <- 1 until MAX_VLMUL) {
604        csBundle(i + 1).srcType(0) := SrcType.vp
605        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
606        csBundle(i + 1).lsrc(1) := src2 + i.U
607        csBundle(i + 1).lsrc(2) := dest
608        csBundle(i + 1).ldest := dest
609        csBundle(i + 1).uopIdx := i.U
610      }
611      csBundle(numOfUop - 1.U).ldest := dest
612    }
613    is(UopSplitType.VEC_VXM) {
614      /*
615      i to vector move
616       */
617      csBundle(0).srcType(0) := SrcType.reg
618      csBundle(0).srcType(1) := SrcType.imm
619      csBundle(0).srcType(2) := SrcType.imm
620      csBundle(0).lsrc(1) := 0.U
621      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
622      csBundle(0).fuType := FuType.i2v.U
623      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
624      csBundle(0).vecWen := true.B
625      //LMUL
626      csBundle(1).srcType(0) := SrcType.vp
627      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
628      csBundle(1).lsrc(2) := dest
629      csBundle(1).ldest := dest
630      csBundle(1).uopIdx := 0.U
631      for (i <- 1 until MAX_VLMUL) {
632        csBundle(i + 1).srcType(0) := SrcType.vp
633        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
634        csBundle(i + 1).lsrc(1) := src2 + i.U
635        csBundle(i + 1).lsrc(2) := dest
636        csBundle(i + 1).ldest := dest
637        csBundle(i + 1).uopIdx := i.U
638      }
639      csBundle(numOfUop - 1.U).ldest := dest
640    }
641    is(UopSplitType.VEC_SLIDE1UP) {
642      /*
643      i to vector move
644       */
645      csBundle(0).srcType(0) := SrcType.reg
646      csBundle(0).srcType(1) := SrcType.imm
647      csBundle(0).srcType(2) := SrcType.imm
648      csBundle(0).lsrc(1) := 0.U
649      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
650      csBundle(0).fuType := FuType.i2v.U
651      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
652      csBundle(0).vecWen := true.B
653      //LMUL
654      csBundle(1).srcType(0) := SrcType.vp
655      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
656      csBundle(1).lsrc(2) := dest
657      csBundle(1).ldest := dest
658      csBundle(1).uopIdx := 0.U
659      for (i <- 1 until MAX_VLMUL) {
660        csBundle(i + 1).srcType(0) := SrcType.vp
661        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
662        csBundle(i + 1).lsrc(1) := src2 + i.U
663        csBundle(i + 1).lsrc(2) := dest + i.U
664        csBundle(i + 1).ldest := dest + i.U
665        csBundle(i + 1).uopIdx := i.U
666      }
667    }
668    is(UopSplitType.VEC_FSLIDE1UP) {
669      /*
670      f to vector move
671       */
672      csBundle(0).srcType(0) := SrcType.fp
673      csBundle(0).srcType(1) := SrcType.imm
674      csBundle(0).srcType(2) := SrcType.imm
675      csBundle(0).lsrc(1) := 0.U
676      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
677      csBundle(0).fuType := FuType.f2v.U
678      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
679      csBundle(0).rfWen := false.B
680      csBundle(0).fpWen := false.B
681      csBundle(0).vecWen := true.B
682      //LMUL
683      csBundle(1).srcType(0) := SrcType.vp
684      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
685      csBundle(1).lsrc(1) := src2
686      csBundle(1).lsrc(2) := dest
687      csBundle(1).ldest := dest
688      csBundle(1).uopIdx := 0.U
689      for (i <- 1 until MAX_VLMUL) {
690        csBundle(i + 1).srcType(0) := SrcType.vp
691        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
692        csBundle(i + 1).lsrc(1) := src2 + i.U
693        csBundle(i + 1).lsrc(2) := dest + i.U
694        csBundle(i + 1).ldest := dest + i.U
695        csBundle(i + 1).uopIdx := i.U
696      }
697    }
698    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
699      /*
700      i to vector move
701       */
702      csBundle(0).srcType(0) := SrcType.reg
703      csBundle(0).srcType(1) := SrcType.imm
704      csBundle(0).srcType(2) := SrcType.imm
705      csBundle(0).lsrc(1) := 0.U
706      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
707      csBundle(0).fuType := FuType.i2v.U
708      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
709      csBundle(0).vecWen := true.B
710      //LMUL
711      for (i <- 0 until MAX_VLMUL) {
712        csBundle(2 * i + 1).srcType(0) := SrcType.vp
713        csBundle(2 * i + 1).srcType(1) := SrcType.vp
714        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
715        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
716        csBundle(2 * i + 1).lsrc(2) := dest + i.U
717        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
718        csBundle(2 * i + 1).uopIdx := (2 * i).U
719        if (2 * i + 2 < MAX_VLMUL * 2) {
720          csBundle(2 * i + 2).srcType(0) := SrcType.vp
721          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
722          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
723          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
724          csBundle(2 * i + 2).ldest := dest + i.U
725          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
726        }
727      }
728      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
729      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
730      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
731    }
732    is(UopSplitType.VEC_FSLIDE1DOWN) {
733      /*
734      f to vector move
735       */
736      csBundle(0).srcType(0) := SrcType.fp
737      csBundle(0).srcType(1) := SrcType.imm
738      csBundle(0).srcType(2) := SrcType.imm
739      csBundle(0).lsrc(1) := 0.U
740      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
741      csBundle(0).fuType := FuType.f2v.U
742      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
743      csBundle(0).rfWen := false.B
744      csBundle(0).fpWen := false.B
745      csBundle(0).vecWen := true.B
746      //LMUL
747      for (i <- 0 until MAX_VLMUL) {
748        csBundle(2 * i + 1).srcType(0) := SrcType.vp
749        csBundle(2 * i + 1).srcType(1) := SrcType.vp
750        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
751        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
752        csBundle(2 * i + 1).lsrc(2) := dest + i.U
753        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
754        csBundle(2 * i + 1).uopIdx := (2 * i).U
755        if (2 * i + 2 < MAX_VLMUL * 2) {
756          csBundle(2 * i + 2).srcType(0) := SrcType.vp
757          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
758          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
759          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
760          csBundle(2 * i + 2).ldest := dest + i.U
761          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
762        }
763      }
764      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
765      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
766      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
767    }
768    is(UopSplitType.VEC_VRED) {
769      when(vlmulReg === "b001".U) {
770        csBundle(0).srcType(2) := SrcType.DC
771        csBundle(0).lsrc(0) := src2 + 1.U
772        csBundle(0).lsrc(1) := src2
773        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
774        csBundle(0).uopIdx := 0.U
775      }
776      when(vlmulReg === "b010".U) {
777        csBundle(0).srcType(2) := SrcType.DC
778        csBundle(0).lsrc(0) := src2 + 1.U
779        csBundle(0).lsrc(1) := src2
780        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
781        csBundle(0).uopIdx := 0.U
782
783        csBundle(1).srcType(2) := SrcType.DC
784        csBundle(1).lsrc(0) := src2 + 3.U
785        csBundle(1).lsrc(1) := src2 + 2.U
786        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
787        csBundle(1).uopIdx := 1.U
788
789        csBundle(2).srcType(2) := SrcType.DC
790        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
791        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
792        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
793        csBundle(2).uopIdx := 2.U
794      }
795      when(vlmulReg === "b011".U) {
796        for (i <- 0 until MAX_VLMUL) {
797          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
798            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
799            csBundle(i).lsrc(1) := src2 + (i * 2).U
800            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
801          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
802            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
803            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
804            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
805          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
806            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
807            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
808            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
809          }
810          csBundle(i).srcType(2) := SrcType.DC
811          csBundle(i).uopIdx := i.U
812        }
813      }
814      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
815        /*
816         * 2 <= vlmul <= 8
817         */
818        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
819        csBundle(numOfUop - 1.U).lsrc(0) := src1
820        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
821        csBundle(numOfUop - 1.U).lsrc(2) := dest
822        csBundle(numOfUop - 1.U).ldest := dest
823        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
824      }
825    }
826    is(UopSplitType.VEC_VFRED) {
827      val vlmul = vlmulReg
828      val vsew = vsewReg
829      when(vlmul === VLmul.m8){
830        for (i <- 0 until 4) {
831          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
832          csBundle(i).lsrc(1) := src2 + (i * 2).U
833          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
834          csBundle(i).uopIdx := i.U
835        }
836        for (i <- 4 until 6) {
837          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
838          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
839          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
840          csBundle(i).uopIdx := i.U
841        }
842        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
843        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
844        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
845        csBundle(6).uopIdx := 6.U
846        when(vsew === VSew.e64) {
847          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
848          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
849          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
850          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
851          csBundle(7).uopIdx := 7.U
852          csBundle(8).lsrc(0) := src1
853          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
854          csBundle(8).ldest := dest
855          csBundle(8).uopIdx := 8.U
856        }
857        when(vsew === VSew.e32) {
858          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
859          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
860          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
861          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
862          csBundle(7).uopIdx := 7.U
863          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
864          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
865          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
866          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
867          csBundle(8).uopIdx := 8.U
868          csBundle(9).lsrc(0) := src1
869          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
870          csBundle(9).ldest := dest
871          csBundle(9).uopIdx := 9.U
872        }
873        when(vsew === VSew.e16) {
874          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
875          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
876          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
877          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
878          csBundle(7).uopIdx := 7.U
879          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
880          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
881          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
882          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
883          csBundle(8).uopIdx := 8.U
884          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
885          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
886          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
887          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
888          csBundle(9).uopIdx := 9.U
889          csBundle(10).lsrc(0) := src1
890          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
891          csBundle(10).ldest := dest
892          csBundle(10).uopIdx := 10.U
893        }
894      }
895      when(vlmul === VLmul.m4) {
896        for (i <- 0 until 2) {
897          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
898          csBundle(i).lsrc(1) := src2 + (i * 2).U
899          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
900          csBundle(i).uopIdx := i.U
901        }
902        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
903        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
904        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
905        csBundle(2).uopIdx := 2.U
906        when(vsew === VSew.e64) {
907          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
908          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
909          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
910          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
911          csBundle(3).uopIdx := 3.U
912          csBundle(4).lsrc(0) := src1
913          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
914          csBundle(4).ldest := dest
915          csBundle(4).uopIdx := 4.U
916        }
917        when(vsew === VSew.e32) {
918          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
919          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
920          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
921          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
922          csBundle(3).uopIdx := 3.U
923          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
924          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
925          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
926          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
927          csBundle(4).uopIdx := 4.U
928          csBundle(5).lsrc(0) := src1
929          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
930          csBundle(5).ldest := dest
931          csBundle(5).uopIdx := 5.U
932        }
933        when(vsew === VSew.e16) {
934          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
935          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
936          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
937          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
938          csBundle(3).uopIdx := 3.U
939          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
940          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
941          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
942          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
943          csBundle(4).uopIdx := 4.U
944          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
945          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
946          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
947          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
948          csBundle(5).uopIdx := 5.U
949          csBundle(6).lsrc(0) := src1
950          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
951          csBundle(6).ldest := dest
952          csBundle(6).uopIdx := 6.U
953        }
954      }
955      when(vlmul === VLmul.m2) {
956        csBundle(0).lsrc(0) := src2 + 1.U
957        csBundle(0).lsrc(1) := src2 + 0.U
958        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
959        csBundle(0).uopIdx := 0.U
960        when(vsew === VSew.e64) {
961          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
962          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
963          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
964          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
965          csBundle(1).uopIdx := 1.U
966          csBundle(2).lsrc(0) := src1
967          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
968          csBundle(2).ldest := dest
969          csBundle(2).uopIdx := 2.U
970        }
971        when(vsew === VSew.e32) {
972          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
973          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
974          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
975          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
976          csBundle(1).uopIdx := 1.U
977          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
978          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
979          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
980          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
981          csBundle(2).uopIdx := 2.U
982          csBundle(3).lsrc(0) := src1
983          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
984          csBundle(3).ldest := dest
985          csBundle(3).uopIdx := 3.U
986        }
987        when(vsew === VSew.e16) {
988          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
989          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
990          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
991          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
992          csBundle(1).uopIdx := 1.U
993          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
994          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
995          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
996          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
997          csBundle(2).uopIdx := 2.U
998          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
999          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1000          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1001          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
1002          csBundle(3).uopIdx := 3.U
1003          csBundle(4).lsrc(0) := src1
1004          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1005          csBundle(4).ldest := dest
1006          csBundle(4).uopIdx := 4.U
1007        }
1008      }
1009      when(vlmul === VLmul.m1) {
1010        when(vsew === VSew.e64) {
1011          csBundle(0).lsrc(0) := src2
1012          csBundle(0).lsrc(1) := src2
1013          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1014          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1015          csBundle(0).uopIdx := 0.U
1016          csBundle(1).lsrc(0) := src1
1017          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1018          csBundle(1).ldest := dest
1019          csBundle(1).uopIdx := 1.U
1020        }
1021        when(vsew === VSew.e32) {
1022          csBundle(0).lsrc(0) := src2
1023          csBundle(0).lsrc(1) := src2
1024          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1025          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1026          csBundle(0).uopIdx := 0.U
1027          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1028          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1029          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1030          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1031          csBundle(1).uopIdx := 1.U
1032          csBundle(2).lsrc(0) := src1
1033          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1034          csBundle(2).ldest := dest
1035          csBundle(2).uopIdx := 2.U
1036        }
1037        when(vsew === VSew.e16) {
1038          csBundle(0).lsrc(0) := src2
1039          csBundle(0).lsrc(1) := src2
1040          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1041          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1042          csBundle(0).uopIdx := 0.U
1043          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1044          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1045          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1046          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1047          csBundle(1).uopIdx := 1.U
1048          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1049          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1050          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1051          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1052          csBundle(2).uopIdx := 2.U
1053          csBundle(3).lsrc(0) := src1
1054          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1055          csBundle(3).ldest := dest
1056          csBundle(3).uopIdx := 3.U
1057        }
1058      }
1059      when(vlmul === VLmul.mf2) {
1060        when(vsew === VSew.e32) {
1061          csBundle(0).lsrc(0) := src2
1062          csBundle(0).lsrc(1) := src2
1063          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1064          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1065          csBundle(0).uopIdx := 0.U
1066          csBundle(1).lsrc(0) := src1
1067          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1068          csBundle(1).ldest := dest
1069          csBundle(1).uopIdx := 1.U
1070        }
1071        when(vsew === VSew.e16) {
1072          csBundle(0).lsrc(0) := src2
1073          csBundle(0).lsrc(1) := src2
1074          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1075          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1076          csBundle(0).uopIdx := 0.U
1077          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1078          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1079          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1080          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1081          csBundle(1).uopIdx := 1.U
1082          csBundle(2).lsrc(0) := src1
1083          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1084          csBundle(2).ldest := dest
1085          csBundle(2).uopIdx := 2.U
1086        }
1087      }
1088      when(vlmul === VLmul.mf4) {
1089        when(vsew === VSew.e16) {
1090          csBundle(0).lsrc(0) := src2
1091          csBundle(0).lsrc(1) := src2
1092          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1093          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1094          csBundle(0).uopIdx := 0.U
1095          csBundle(1).lsrc(0) := src1
1096          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1097          csBundle(1).ldest := dest
1098          csBundle(1).uopIdx := 1.U
1099        }
1100      }
1101    }
1102
1103    is(UopSplitType.VEC_VFREDOSUM) {
1104      import yunsuan.VfaluType
1105      val vlmul = vlmulReg
1106      val vsew = vsewReg
1107      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1108      when(vlmul === VLmul.m8) {
1109        when(vsew === VSew.e64) {
1110          val vlmax = 16
1111          for (i <- 0 until vlmax) {
1112            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1113            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1114            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1115            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1116            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1117            csBundle(i).uopIdx := i.U
1118          }
1119        }
1120        when(vsew === VSew.e32) {
1121          val vlmax = 32
1122          for (i <- 0 until vlmax) {
1123            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1124            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1125            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1126            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1127            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1128            csBundle(i).uopIdx := i.U
1129          }
1130        }
1131        when(vsew === VSew.e16) {
1132          val vlmax = 64
1133          for (i <- 0 until vlmax) {
1134            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1135            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1136            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1137            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1138            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1139            csBundle(i).uopIdx := i.U
1140          }
1141        }
1142      }
1143      when(vlmul === VLmul.m4) {
1144        when(vsew === VSew.e64) {
1145          val vlmax = 8
1146          for (i <- 0 until vlmax) {
1147            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1149            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1150            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1151            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1152            csBundle(i).uopIdx := i.U
1153          }
1154        }
1155        when(vsew === VSew.e32) {
1156          val vlmax = 16
1157          for (i <- 0 until vlmax) {
1158            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1161            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1162            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1163            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1164            csBundle(i).uopIdx := i.U
1165          }
1166        }
1167        when(vsew === VSew.e16) {
1168          val vlmax = 32
1169          for (i <- 0 until vlmax) {
1170            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1172            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1174            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1175            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1176            csBundle(i).uopIdx := i.U
1177          }
1178        }
1179      }
1180      when(vlmul === VLmul.m2) {
1181        when(vsew === VSew.e64) {
1182          val vlmax = 4
1183          for (i <- 0 until vlmax) {
1184            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1185            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1186            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1187            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1188            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1189            csBundle(i).uopIdx := i.U
1190          }
1191        }
1192        when(vsew === VSew.e32) {
1193          val vlmax = 8
1194          for (i <- 0 until vlmax) {
1195            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1197            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1198            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1199            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1200            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1201            csBundle(i).uopIdx := i.U
1202          }
1203        }
1204        when(vsew === VSew.e16) {
1205          val vlmax = 16
1206          for (i <- 0 until vlmax) {
1207            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1208            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1209            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1211            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1212            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1213            csBundle(i).uopIdx := i.U
1214          }
1215        }
1216      }
1217      when(vlmul === VLmul.m1) {
1218        when(vsew === VSew.e64) {
1219          val vlmax = 2
1220          for (i <- 0 until vlmax) {
1221            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1223            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1224            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1225            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1226            csBundle(i).uopIdx := i.U
1227          }
1228        }
1229        when(vsew === VSew.e32) {
1230          val vlmax = 4
1231          for (i <- 0 until vlmax) {
1232            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1235            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1236            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1237            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1238            csBundle(i).uopIdx := i.U
1239          }
1240        }
1241        when(vsew === VSew.e16) {
1242          val vlmax = 8
1243          for (i <- 0 until vlmax) {
1244            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1245            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1248            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1249            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1250            csBundle(i).uopIdx := i.U
1251          }
1252        }
1253      }
1254      when(vlmul === VLmul.mf2) {
1255        when(vsew === VSew.e32) {
1256          val vlmax = 2
1257          for (i <- 0 until vlmax) {
1258            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1261            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1262            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1263            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1264            csBundle(i).uopIdx := i.U
1265          }
1266        }
1267        when(vsew === VSew.e16) {
1268          val vlmax = 4
1269          for (i <- 0 until vlmax) {
1270            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1271            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1272            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1273            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1274            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1275            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1276            csBundle(i).uopIdx := i.U
1277          }
1278        }
1279      }
1280      when(vlmul === VLmul.mf4) {
1281        when(vsew === VSew.e16) {
1282          val vlmax = 2
1283          for (i <- 0 until vlmax) {
1284            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1285            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1286            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1287            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1288            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1289            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1290            csBundle(i).uopIdx := i.U
1291          }
1292        }
1293      }
1294    }
1295
1296    is(UopSplitType.VEC_SLIDEUP) {
1297      // i to vector move
1298      csBundle(0).srcType(0) := SrcType.reg
1299      csBundle(0).srcType(1) := SrcType.imm
1300      csBundle(0).srcType(2) := SrcType.imm
1301      csBundle(0).lsrc(1) := 0.U
1302      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1303      csBundle(0).fuType := FuType.i2v.U
1304      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1305      csBundle(0).vecWen := true.B
1306      // LMUL
1307      for (i <- 0 until MAX_VLMUL)
1308        for (j <- 0 to i) {
1309          val old_vd = if (j == 0) {
1310            dest + i.U
1311          } else (VECTOR_TMP_REG_LMUL + j).U
1312          val vd = if (j == i) {
1313            dest + i.U
1314          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1315          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1316          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1317          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1318          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1319          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1320          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1321        }
1322    }
1323
1324    is(UopSplitType.VEC_SLIDEDOWN) {
1325      // i to vector move
1326      csBundle(0).srcType(0) := SrcType.reg
1327      csBundle(0).srcType(1) := SrcType.imm
1328      csBundle(0).srcType(2) := SrcType.imm
1329      csBundle(0).lsrc(1) := 0.U
1330      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1331      csBundle(0).fuType := FuType.i2v.U
1332      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1333      csBundle(0).vecWen := true.B
1334      // LMUL
1335      for (i <- 0 until MAX_VLMUL)
1336        for (j <- (0 to i).reverse) {
1337          when(i.U < lmul) {
1338            val old_vd = if (j == 0) {
1339              dest + lmul - 1.U - i.U
1340            } else (VECTOR_TMP_REG_LMUL + j).U
1341            val vd = if (j == i) {
1342              dest + lmul - 1.U - i.U
1343            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1344            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1345            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1346            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1347            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1348            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1349            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1350          }
1351        }
1352    }
1353
1354    is(UopSplitType.VEC_M0X) {
1355      // LMUL
1356      for (i <- 0 until MAX_VLMUL) {
1357        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1358        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1359        csBundle(i).srcType(0) := srcType0
1360        csBundle(i).srcType(1) := SrcType.vp
1361        csBundle(i).rfWen := false.B
1362        csBundle(i).fpWen := false.B
1363        csBundle(i).vecWen := true.B
1364        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1365        csBundle(i).lsrc(1) := src2
1366        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1367        csBundle(i).ldest := ldest
1368        csBundle(i).uopIdx := i.U
1369      }
1370      csBundle(lmul - 1.U).rfWen := true.B
1371      csBundle(lmul - 1.U).fpWen := false.B
1372      csBundle(lmul - 1.U).vecWen := false.B
1373      csBundle(lmul - 1.U).ldest := dest
1374    }
1375
1376    is(UopSplitType.VEC_MVV) {
1377      // LMUL
1378      for (i <- 0 until MAX_VLMUL) {
1379        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1380        csBundle(i * 2 + 0).srcType(0) := srcType0
1381        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1382        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1383        csBundle(i * 2 + 0).lsrc(1) := src2
1384        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1385        csBundle(i * 2 + 0).ldest := dest + i.U
1386        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1387
1388        csBundle(i * 2 + 1).srcType(0) := srcType0
1389        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1390        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1391        csBundle(i * 2 + 1).lsrc(1) := src2
1392        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1393        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1394        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1395      }
1396    }
1397
1398    is(UopSplitType.VEC_M0X_VFIRST) {
1399      // LMUL
1400      csBundle(0).rfWen := true.B
1401      csBundle(0).fpWen := false.B
1402      csBundle(0).vecWen := false.B
1403      csBundle(0).ldest := dest
1404    }
1405    is(UopSplitType.VEC_VWW) {
1406      for (i <- 0 until MAX_VLMUL*2) {
1407        when(i.U < lmul){
1408          csBundle(i).srcType(2) := SrcType.DC
1409          csBundle(i).lsrc(0) := src2 + i.U
1410          csBundle(i).lsrc(1) := src2 + i.U
1411          // csBundle(i).lsrc(2) := dest + (2 * i).U
1412          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1413          csBundle(i).uopIdx :=  i.U
1414        } otherwise {
1415          csBundle(i).srcType(2) := SrcType.DC
1416          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1417          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1418          // csBundle(i).lsrc(2) := dest + (2 * i).U
1419          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1420          csBundle(i).uopIdx := i.U
1421        }
1422        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1423        csBundle(numOfUop-1.U).lsrc(0) := src1
1424        csBundle(numOfUop-1.U).lsrc(2) := dest
1425        csBundle(numOfUop-1.U).ldest := dest
1426      }
1427    }
1428    is(UopSplitType.VEC_RGATHER) {
1429      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1430        for (i <- 0 until len)
1431          for (j <- 0 until len) {
1432            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1433            // csBundle(i * len + j).srcType(1) := SrcType.vp
1434            // csBundle(i * len + j).srcType(2) := SrcType.vp
1435            csBundle(i * len + j).lsrc(0) := src1 + i.U
1436            csBundle(i * len + j).lsrc(1) := src2 + j.U
1437            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1438            csBundle(i * len + j).lsrc(2) := vd_old
1439            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1440            csBundle(i * len + j).ldest := vd
1441            csBundle(i * len + j).uopIdx := (i * len + j).U
1442          }
1443      }
1444      switch(vlmulReg) {
1445        is("b001".U ){
1446          genCsBundle_VEC_RGATHER(2)
1447        }
1448        is("b010".U ){
1449          genCsBundle_VEC_RGATHER(4)
1450        }
1451        is("b011".U ){
1452          genCsBundle_VEC_RGATHER(8)
1453        }
1454      }
1455    }
1456    is(UopSplitType.VEC_RGATHER_VX) {
1457      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1458        for (i <- 0 until len)
1459          for (j <- 0 until len) {
1460            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1461            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1462            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1463            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1464            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1465            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1466            csBundle(i * len + j + 1).lsrc(2) := vd_old
1467            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1468            csBundle(i * len + j + 1).ldest := vd
1469            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1470          }
1471      }
1472      // i to vector move
1473      csBundle(0).srcType(0) := SrcType.reg
1474      csBundle(0).srcType(1) := SrcType.imm
1475      csBundle(0).srcType(2) := SrcType.imm
1476      csBundle(0).lsrc(1) := 0.U
1477      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1478      csBundle(0).fuType := FuType.i2v.U
1479      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1480      csBundle(0).rfWen := false.B
1481      csBundle(0).fpWen := false.B
1482      csBundle(0).vecWen := true.B
1483      genCsBundle_RGATHER_VX(1)
1484      switch(vlmulReg) {
1485        is("b001".U ){
1486          genCsBundle_RGATHER_VX(2)
1487        }
1488        is("b010".U ){
1489          genCsBundle_RGATHER_VX(4)
1490        }
1491        is("b011".U ){
1492          genCsBundle_RGATHER_VX(8)
1493        }
1494      }
1495    }
1496    is(UopSplitType.VEC_RGATHEREI16) {
1497      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1498        for (i <- 0 until len)
1499          for (j <- 0 until len) {
1500            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1501            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1502            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1503            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1504            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1505            csBundle((i * len + j)*2+0).ldest := vd0
1506            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1507            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1508            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1509            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1510            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1511            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1512            csBundle((i * len + j)*2+1).ldest := vd1
1513            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1514          }
1515      }
1516      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1517        for (i <- 0 until len)
1518          for (j <- 0 until len) {
1519            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1520            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1521            csBundle(i * len + j).lsrc(0) := src1 + i.U
1522            csBundle(i * len + j).lsrc(1) := src2 + j.U
1523            csBundle(i * len + j).lsrc(2) := vd_old
1524            csBundle(i * len + j).ldest := vd
1525            csBundle(i * len + j).uopIdx := (i * len + j).U
1526          }
1527      }
1528      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1529        for (i <- 0 until len)
1530          for (j <- 0 until len) {
1531            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1532            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1533            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1534            csBundle(i * len + j).lsrc(1) := src2 + j.U
1535            csBundle(i * len + j).lsrc(2) := vd_old
1536            csBundle(i * len + j).ldest := vd
1537            csBundle(i * len + j).uopIdx := (i * len + j).U
1538          }
1539      }
1540      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1541        for (i <- 0 until len)
1542          for (j <- 0 until len) {
1543            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1544            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1545            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1546            csBundle(i * len + j).lsrc(1) := src2 + j.U
1547            csBundle(i * len + j).lsrc(2) := vd_old
1548            csBundle(i * len + j).ldest := vd
1549            csBundle(i * len + j).uopIdx := (i * len + j).U
1550          }
1551      }
1552      when(!vsewReg.orR){
1553        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1554      }.elsewhen(vsewReg === VSew.e32){
1555        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1556      }.elsewhen(vsewReg === VSew.e64){
1557        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1558      }.otherwise{
1559        genCsBundle_VEC_RGATHEREI16(1)
1560      }
1561      switch(vlmulReg) {
1562        is("b001".U) {
1563          when(!vsewReg.orR) {
1564            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1565          }.elsewhen(vsewReg === VSew.e32){
1566            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1567          }.elsewhen(vsewReg === VSew.e64){
1568            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1569          }.otherwise{
1570            genCsBundle_VEC_RGATHEREI16(2)
1571          }
1572        }
1573        is("b010".U) {
1574          when(!vsewReg.orR) {
1575            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1576          }.elsewhen(vsewReg === VSew.e32){
1577            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1578          }.elsewhen(vsewReg === VSew.e64){
1579            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1580          }.otherwise{
1581            genCsBundle_VEC_RGATHEREI16(4)
1582          }
1583        }
1584        is("b011".U) {
1585          when(vsewReg === VSew.e32){
1586            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1587          }.elsewhen(vsewReg === VSew.e64){
1588            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1589          }.otherwise{
1590            genCsBundle_VEC_RGATHEREI16(8)
1591          }
1592        }
1593      }
1594    }
1595    is(UopSplitType.VEC_COMPRESS) {
1596      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1597        for (i <- 0 until len) {
1598          val jlen = if (i == len-1) i+1 else i+2
1599          for (j <- 0 until jlen) {
1600            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1601            val vd = if(i==len-1) (dest + j.U) else {
1602              if (j == i+1) VECTOR_TMP_REG_LMUL.U  else (VECTOR_TMP_REG_LMUL + j + 1).U
1603            }
1604            csBundle(i*(i+3)/2 + j).vecWen := true.B
1605            csBundle(i*(i+3)/2 + j).v0Wen := false.B
1606            val src13Type = if (j == i+1) DontCare else SrcType.vp
1607            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1608            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1609            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1610            if (i == 0) {
1611              csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1612            } else {
1613              csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1614            }
1615            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1616            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1617            csBundle(i*(i+3)/2 + j).ldest := vd
1618            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1619          }
1620        }
1621      }
1622      switch(vlmulReg) {
1623        is("b001".U ){
1624          genCsBundle_VEC_COMPRESS(2)
1625        }
1626        is("b010".U ){
1627          genCsBundle_VEC_COMPRESS(4)
1628        }
1629        is("b011".U ){
1630          genCsBundle_VEC_COMPRESS(8)
1631        }
1632      }
1633    }
1634    is(UopSplitType.VEC_MVNR) {
1635      for (i <- 0 until MAX_VLMUL) {
1636        csBundle(i).lsrc(0) := src1 + i.U
1637        csBundle(i).lsrc(1) := src2 + i.U
1638        csBundle(i).lsrc(2) := dest + i.U
1639        csBundle(i).ldest := dest + i.U
1640        csBundle(i).uopIdx := i.U
1641      }
1642    }
1643    is(UopSplitType.VEC_US_LDST) {
1644      /*
1645      FMV.D.X
1646       */
1647      csBundle(0).srcType(0) := SrcType.reg
1648      csBundle(0).srcType(1) := SrcType.imm
1649      csBundle(0).lsrc(1) := 0.U
1650      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1651      csBundle(0).fuType := FuType.i2v.U
1652      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1653      csBundle(0).rfWen := false.B
1654      csBundle(0).fpWen := false.B
1655      csBundle(0).vecWen := true.B
1656      csBundle(0).vlsInstr := true.B
1657      //LMUL
1658      for (i <- 0 until MAX_VLMUL) {
1659        csBundle(i + 1).srcType(0) := SrcType.vp
1660        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1661        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1662        csBundle(i + 1).ldest := dest + i.U
1663        csBundle(i + 1).uopIdx := i.U
1664        csBundle(i + 1).vlsInstr := true.B
1665      }
1666      csBundle.head.waitForward := isUsSegment
1667      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1668    }
1669    is(UopSplitType.VEC_S_LDST) {
1670      /*
1671      FMV.D.X
1672       */
1673      csBundle(0).srcType(0) := SrcType.reg
1674      csBundle(0).srcType(1) := SrcType.imm
1675      csBundle(0).lsrc(1) := 0.U
1676      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1677      csBundle(0).fuType := FuType.i2v.U
1678      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1679      csBundle(0).rfWen := false.B
1680      csBundle(0).fpWen := false.B
1681      csBundle(0).vecWen := true.B
1682      csBundle(0).vlsInstr := true.B
1683
1684      csBundle(1).srcType(0) := SrcType.reg
1685      csBundle(1).srcType(1) := SrcType.imm
1686      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1687      csBundle(1).lsrc(1) := 0.U
1688      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1689      csBundle(1).fuType := FuType.i2v.U
1690      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1691      csBundle(1).rfWen := false.B
1692      csBundle(1).fpWen := false.B
1693      csBundle(1).vecWen := true.B
1694      csBundle(1).vlsInstr := true.B
1695
1696      //LMUL
1697      for (i <- 0 until MAX_VLMUL) {
1698        csBundle(i + 2).srcType(0) := SrcType.vp
1699        csBundle(i + 2).srcType(1) := SrcType.vp
1700        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1701        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1702        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1703        csBundle(i + 2).ldest := dest + i.U
1704        csBundle(i + 2).uopIdx := i.U
1705        csBundle(i + 2).vlsInstr := true.B
1706      }
1707      csBundle.head.waitForward := isSdSegment
1708      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1709    }
1710    is(UopSplitType.VEC_I_LDST) {
1711      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1712        for (i <- 0 until MAX_VLMUL) {
1713          val vecWen = if (i < lmul * nf) true.B else false.B
1714          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1715          csBundle(i + 1).srcType(0) := SrcType.vp
1716          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1717          csBundle(i + 1).srcType(1) := SrcType.no
1718          csBundle(i + 1).lsrc(1) := src2 + i.U
1719          csBundle(i + 1).srcType(2) := src2Type
1720          csBundle(i + 1).lsrc(2) := dest + i.U
1721          csBundle(i + 1).ldest := dest + i.U
1722          csBundle(i + 1).rfWen := false.B
1723          csBundle(i + 1).fpWen := false.B
1724          csBundle(i + 1).vecWen := vecWen
1725          csBundle(i + 1).uopIdx := i.U
1726          csBundle(i + 1).vlsInstr := true.B
1727        }
1728      }
1729      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1730        for (i <- 0 until MAX_VLMUL) {
1731          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1732          csBundle(i + 1).srcType(1) := src1Type
1733          csBundle(i + 1).lsrc(1) := src2 + i.U
1734        }
1735      }
1736
1737      val vlmul = vlmulReg
1738      val vsew = Cat(0.U(1.W), vsewReg)
1739      val veew = Cat(0.U(1.W), width)
1740      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1741      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1742        "b001".U -> 1.U,
1743        "b010".U -> 2.U,
1744        "b011".U -> 3.U
1745      ))
1746      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1747        "b001".U -> 1.U,
1748        "b010".U -> 2.U,
1749        "b011".U -> 3.U
1750      ))
1751      csBundle(0).srcType(0) := SrcType.reg
1752      csBundle(0).srcType(1) := SrcType.imm
1753      csBundle(0).lsrc(1) := 0.U
1754      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1755      csBundle(0).fuType := FuType.i2v.U
1756      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1757      csBundle(0).rfWen := false.B
1758      csBundle(0).fpWen := false.B
1759      csBundle(0).vecWen := true.B
1760      csBundle(0).vlsInstr := true.B
1761
1762      //LMUL
1763      when(nf === 0.U) {
1764        for (i <- 0 until MAX_VLMUL) {
1765          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1766          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1767          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1768          csBundle(i + 1).srcType(0) := SrcType.vp
1769          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1770          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1771          csBundle(i + 1).srcType(2) := SrcType.vp
1772          // lsrc2 is old vd
1773          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1774          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1775          csBundle(i + 1).uopIdx := i.U
1776          csBundle(i + 1).vlsInstr := true.B
1777        }
1778      }.otherwise{
1779        // nf > 1, is segment indexed load/store
1780        // gen src0, vd
1781        switch(simple_lmul) {
1782          is(0.U) {
1783            switch(nf) {
1784              is(1.U) {
1785                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1786              }
1787              is(2.U) {
1788                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1789              }
1790              is(3.U) {
1791                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1792              }
1793              is(4.U) {
1794                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1795              }
1796              is(5.U) {
1797                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1798              }
1799              is(6.U) {
1800                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1801              }
1802              is(7.U) {
1803                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1804              }
1805            }
1806          }
1807          is(1.U) {
1808            switch(nf) {
1809              is(1.U) {
1810                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1811              }
1812              is(2.U) {
1813                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1814              }
1815              is(3.U) {
1816                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1817              }
1818            }
1819          }
1820          is(2.U) {
1821            switch(nf) {
1822              is(1.U) {
1823                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1824              }
1825            }
1826          }
1827        }
1828
1829        // gen src1
1830        switch(simple_emul) {
1831          is(0.U) {
1832            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1833          }
1834          is(1.U) {
1835            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1836          }
1837          is(2.U) {
1838            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1839          }
1840          is(3.U) {
1841            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1842          }
1843        }
1844
1845        // when is vstore instructions, not set vecwen
1846        when(isVstore) {
1847          for (i <- 0 until MAX_VLMUL) {
1848            csBundle(i + 1).vecWen := false.B
1849          }
1850        }
1851      }
1852      csBundle.head.waitForward := isIxSegment
1853      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1854    }
1855  }
1856
1857  //readyFromRename Counter
1858  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1859
1860  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1861  val thisAllOut = uopRes <= readyCounter
1862
1863  switch(state) {
1864    is(s_idle) {
1865      when (inValid) {
1866        stateNext := s_active
1867        uopResNext := inUopInfo.numOfUop
1868      }
1869    }
1870    is(s_active) {
1871      when (thisAllOut) {
1872        when (inValid) {
1873          stateNext := s_active
1874          uopResNext := inUopInfo.numOfUop
1875        }.otherwise {
1876          stateNext := s_idle
1877          uopResNext := 0.U
1878        }
1879      }.otherwise {
1880        stateNext := s_active
1881        uopResNext := uopRes - readyCounter
1882      }
1883    }
1884  }
1885
1886  state := Mux(io.redirect, s_idle, stateNext)
1887  uopRes := Mux(io.redirect, 0.U, uopResNext)
1888
1889  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1890
1891  for(i <- 0 until RenameWidth) {
1892    outValids(i) := complexNum > i.U
1893    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1894  }
1895
1896  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1897  inReady := state === s_idle || state === s_active && thisAllOut
1898
1899//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1900//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1901//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1902//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1903//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1904//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1905//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1906//
1907//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1908//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1909//    0.U)
1910//  validToRename.zipWithIndex.foreach{
1911//    case(dst, i) =>
1912//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1913//      dst := MuxCase(false.B, Seq(
1914//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1915//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1916//      ).toSeq)
1917//  }
1918//
1919//  readyToIBuf.zipWithIndex.foreach {
1920//    case (dst, i) =>
1921//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1922//      dst := MuxCase(true.B, Seq(
1923//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1924//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1925//      ).toSeq)
1926//  }
1927//
1928//  io.deq.decodedInsts := decodedInsts
1929//  io.deq.complexNum := complexNum
1930//  io.deq.validToRename := validToRename
1931//  io.deq.readyToIBuf := readyToIBuf
1932}
1933