xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 762f2b3971380059b4d5a794e7f45bcf222155ae)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
82  val VECTOR_COMPRESS = 1 // in v0 regfile
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153
154  //Type of uop Div
155  val typeOfSplit = latchedInst.uopSplitType
156  val src1Type = latchedInst.srcType(0)
157  val src1IsImm = src1Type === SrcType.imm
158  val src1IsFp = src1Type === SrcType.fp
159
160  val isVstore = FuType.isVStore(latchedInst.fuType)
161
162  numOfUop := latchedUopInfo.numOfUop
163  numOfWB := latchedUopInfo.numOfWB
164
165  //uops dispatch
166  val s_idle :: s_active :: Nil = Enum(2)
167  val state = RegInit(s_idle)
168  val stateNext = WireDefault(state)
169  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
170  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
171  val uopResNext = WireInit(uopRes)
172  val e64 = 3.U(2.W)
173  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
174  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
175  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
176
177  //uop div up to maxUopSize
178  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
179  csBundle.foreach { case dst =>
180    dst := latchedInst
181    dst.numUops := latchedUopInfo.numOfUop
182    dst.numWB := latchedUopInfo.numOfWB
183    dst.firstUop := false.B
184    dst.lastUop := false.B
185    dst.vlsInstr := false.B
186  }
187
188  csBundle(0).firstUop := true.B
189  csBundle(numOfUop - 1.U).lastUop := true.B
190
191  switch(typeOfSplit) {
192    is(UopSplitType.VSET) {
193      // In simple decoder, rfWen and vecWen are not set
194      when(isVsetSimple) {
195        // Default
196        // uop0 set rd, never flushPipe
197        csBundle(0).fuType := FuType.vsetiwi.U
198        csBundle(0).flushPipe := false.B
199        csBundle(0).blockBackward := false.B
200        csBundle(0).rfWen := true.B
201        // uop1 set vl, vsetvl will flushPipe
202        csBundle(1).ldest := Vl_IDX.U
203        csBundle(1).vecWen := false.B
204        csBundle(1).vlWen := true.B
205        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
206          // write nothing, uop0 is a nop instruction
207          csBundle(0).rfWen := false.B
208          csBundle(0).fpWen := false.B
209          csBundle(0).vecWen := false.B
210          csBundle(0).vlWen := false.B
211          csBundle(1).fuType := FuType.vsetfwf.U
212          csBundle(1).srcType(0) := SrcType.no
213          csBundle(1).srcType(2) := SrcType.no
214          csBundle(1).srcType(3) := SrcType.no
215          csBundle(1).srcType(4) := SrcType.vp
216          csBundle(1).lsrc(4) := Vl_IDX.U
217        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
218          // uop0: mv vtype gpr to vector region
219          csBundle(0).srcType(0) := SrcType.xp
220          csBundle(0).srcType(1) := SrcType.no
221          csBundle(0).lsrc(0) := src2
222          csBundle(0).lsrc(1) := 0.U
223          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
224          csBundle(0).fuType := FuType.i2v.U
225          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
226          csBundle(0).rfWen := false.B
227          csBundle(0).fpWen := false.B
228          csBundle(0).vecWen := true.B
229          csBundle(0).vlWen := false.B
230          // uop1: uvsetvcfg_vv
231          csBundle(1).fuType := FuType.vsetfwf.U
232          // vl
233          csBundle(1).srcType(0) := SrcType.no
234          csBundle(1).srcType(2) := SrcType.no
235          csBundle(1).srcType(3) := SrcType.no
236          csBundle(1).srcType(4) := SrcType.vp
237          csBundle(1).lsrc(4) := Vl_IDX.U
238          // vtype
239          csBundle(1).srcType(1) := SrcType.vp
240          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
241          csBundle(1).vecWen := false.B
242          csBundle(1).vlWen := true.B
243          csBundle(1).ldest := Vl_IDX.U
244        }.elsewhen(dest === 0.U) {
245          // write nothing, uop0 is a nop instruction
246          csBundle(0).rfWen := false.B
247          csBundle(0).fpWen := false.B
248          csBundle(0).vecWen := false.B
249          csBundle(0).vlWen := false.B
250        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) {
251          // because vsetvl may modified src2 when src2 == rd,
252          // we need to modify vd in second uop to avoid dependency
253          // uop0 set vl
254          csBundle(0).fuType := FuType.vsetiwf.U
255          csBundle(0).ldest := Vl_IDX.U
256          csBundle(0).rfWen := false.B
257          csBundle(0).vlWen := true.B
258          // uop1 set rd
259          csBundle(1).fuType := FuType.vsetiwi.U
260          csBundle(1).ldest := dest
261          csBundle(1).rfWen := true.B
262          csBundle(1).vlWen := false.B
263        }
264        // use bypass vtype from vtypeGen
265        csBundle(0).vpu.connectVType(io.vtypeBypass)
266        csBundle(1).vpu.connectVType(io.vtypeBypass)
267      }
268    }
269    is(UopSplitType.VEC_VVV) {
270      for (i <- 0 until MAX_VLMUL) {
271        csBundle(i).lsrc(0) := src1 + i.U
272        csBundle(i).lsrc(1) := src2 + i.U
273        csBundle(i).lsrc(2) := dest + i.U
274        csBundle(i).ldest := dest + i.U
275        csBundle(i).uopIdx := i.U
276      }
277    }
278    is(UopSplitType.VEC_VFV) {
279      /*
280      f to vector move
281       */
282      csBundle(0).srcType(0) := SrcType.fp
283      csBundle(0).srcType(1) := SrcType.imm
284      csBundle(0).srcType(2) := SrcType.imm
285      csBundle(0).lsrc(1) := 0.U
286      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
287      csBundle(0).fuType := FuType.f2v.U
288      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
289      csBundle(0).vecWen := true.B
290      csBundle(0).vpu.isReverse := false.B
291      /*
292      LMUL
293       */
294      for (i <- 0 until MAX_VLMUL) {
295        csBundle(i + 1).srcType(0) := SrcType.vp
296        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
297        csBundle(i + 1).lsrc(1) := src2 + i.U
298        csBundle(i + 1).lsrc(2) := dest + i.U
299        csBundle(i + 1).ldest := dest + i.U
300        csBundle(i + 1).uopIdx := i.U
301      }
302    }
303    is(UopSplitType.VEC_EXT2) {
304      for (i <- 0 until MAX_VLMUL / 2) {
305        csBundle(2 * i).lsrc(1) := src2 + i.U
306        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
307        csBundle(2 * i).ldest := dest + (2 * i).U
308        csBundle(2 * i).uopIdx := (2 * i).U
309        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
310        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
311        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
312        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
313      }
314    }
315    is(UopSplitType.VEC_EXT4) {
316      for (i <- 0 until MAX_VLMUL / 4) {
317        csBundle(4 * i).lsrc(1) := src2 + i.U
318        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
319        csBundle(4 * i).ldest := dest + (4 * i).U
320        csBundle(4 * i).uopIdx := (4 * i).U
321        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
322        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
323        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
324        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
325        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
326        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
327        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
328        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
329        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
330        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
331        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
332        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
333      }
334    }
335    is(UopSplitType.VEC_EXT8) {
336      for (i <- 0 until MAX_VLMUL) {
337        csBundle(i).lsrc(1) := src2
338        csBundle(i).lsrc(2) := dest + i.U
339        csBundle(i).ldest := dest + i.U
340        csBundle(i).uopIdx := i.U
341      }
342    }
343    is(UopSplitType.VEC_0XV) {
344      /*
345      i/f to vector move
346       */
347      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
348      csBundle(0).srcType(1) := SrcType.imm
349      csBundle(0).srcType(2) := SrcType.imm
350      csBundle(0).lsrc(1) := 0.U
351      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
352      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
353      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
354      csBundle(0).rfWen := false.B
355      csBundle(0).fpWen := false.B
356      csBundle(0).vecWen := true.B
357      /*
358      vmv.s.x
359       */
360      csBundle(1).srcType(0) := SrcType.vp
361      csBundle(1).srcType(1) := SrcType.imm
362      csBundle(1).srcType(2) := SrcType.vp
363      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
364      csBundle(1).lsrc(1) := 0.U
365      csBundle(1).lsrc(2) := dest
366      csBundle(1).ldest := dest
367      csBundle(1).rfWen := false.B
368      csBundle(1).fpWen := false.B
369      csBundle(1).vecWen := true.B
370      csBundle(1).uopIdx := 0.U
371    }
372    is(UopSplitType.VEC_VXV) {
373      /*
374      i to vector move
375       */
376      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
377      csBundle(0).srcType(1) := SrcType.imm
378      csBundle(0).srcType(2) := SrcType.imm
379      csBundle(0).lsrc(1) := 0.U
380      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
381      csBundle(0).fuType := FuType.i2v.U
382      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
383      csBundle(0).vecWen := true.B
384      csBundle(0).vpu.isReverse := false.B
385      /*
386      LMUL
387       */
388      for (i <- 0 until MAX_VLMUL) {
389        csBundle(i + 1).srcType(0) := SrcType.vp
390        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
391        csBundle(i + 1).lsrc(1) := src2 + i.U
392        csBundle(i + 1).lsrc(2) := dest + i.U
393        csBundle(i + 1).ldest := dest + i.U
394        csBundle(i + 1).uopIdx := i.U
395      }
396    }
397    is(UopSplitType.VEC_VVW) {
398      for (i <- 0 until MAX_VLMUL / 2) {
399        csBundle(2 * i).lsrc(0) := src1 + i.U
400        csBundle(2 * i).lsrc(1) := src2 + i.U
401        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
402        csBundle(2 * i).ldest := dest + (2 * i).U
403        csBundle(2 * i).uopIdx := (2 * i).U
404        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
405        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
406        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
407        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
408        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
409      }
410    }
411    is(UopSplitType.VEC_VFW) {
412      /*
413      f to vector move
414       */
415      csBundle(0).srcType(0) := SrcType.fp
416      csBundle(0).srcType(1) := SrcType.imm
417      csBundle(0).srcType(2) := SrcType.imm
418      csBundle(0).lsrc(1) := 0.U
419      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
420      csBundle(0).fuType := FuType.f2v.U
421      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
422      csBundle(0).rfWen := false.B
423      csBundle(0).fpWen := false.B
424      csBundle(0).vecWen := true.B
425
426      for (i <- 0 until MAX_VLMUL / 2) {
427        csBundle(2 * i + 1).srcType(0) := SrcType.vp
428        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
429        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
430        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
431        csBundle(2 * i + 1).ldest := dest + (2 * i).U
432        csBundle(2 * i + 1).uopIdx := (2 * i).U
433        csBundle(2 * i + 2).srcType(0) := SrcType.vp
434        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
435        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
436        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
437        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
438        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
439      }
440    }
441    is(UopSplitType.VEC_WVW) {
442      for (i <- 0 until MAX_VLMUL / 2) {
443        csBundle(2 * i).lsrc(0) := src1 + i.U
444        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
445        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
446        csBundle(2 * i).ldest := dest + (2 * i).U
447        csBundle(2 * i).uopIdx := (2 * i).U
448        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
449        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
450        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
451        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
452        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
453      }
454    }
455    is(UopSplitType.VEC_VXW) {
456      /*
457      i to vector move
458       */
459      csBundle(0).srcType(0) := SrcType.reg
460      csBundle(0).srcType(1) := SrcType.imm
461      csBundle(0).srcType(2) := SrcType.imm
462      csBundle(0).lsrc(1) := 0.U
463      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
464      csBundle(0).fuType := FuType.i2v.U
465      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
466      csBundle(0).vecWen := true.B
467
468      for (i <- 0 until MAX_VLMUL / 2) {
469        csBundle(2 * i + 1).srcType(0) := SrcType.vp
470        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
471        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
472        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
473        csBundle(2 * i + 1).ldest := dest + (2 * i).U
474        csBundle(2 * i + 1).uopIdx := (2 * i).U
475        csBundle(2 * i + 2).srcType(0) := SrcType.vp
476        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
477        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
478        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
479        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
480        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
481      }
482    }
483    is(UopSplitType.VEC_WXW) {
484      /*
485      i to vector move
486       */
487      csBundle(0).srcType(0) := SrcType.reg
488      csBundle(0).srcType(1) := SrcType.imm
489      csBundle(0).srcType(2) := SrcType.imm
490      csBundle(0).lsrc(1) := 0.U
491      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
492      csBundle(0).fuType := FuType.i2v.U
493      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
494      csBundle(0).vecWen := true.B
495
496      for (i <- 0 until MAX_VLMUL / 2) {
497        csBundle(2 * i + 1).srcType(0) := SrcType.vp
498        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
499        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
500        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
501        csBundle(2 * i + 1).ldest := dest + (2 * i).U
502        csBundle(2 * i + 1).uopIdx := (2 * i).U
503        csBundle(2 * i + 2).srcType(0) := SrcType.vp
504        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
505        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
506        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
507        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
508        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
509      }
510    }
511    is(UopSplitType.VEC_WVV) {
512      for (i <- 0 until MAX_VLMUL / 2) {
513
514        csBundle(2 * i).lsrc(0) := src1 + i.U
515        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
516        csBundle(2 * i).lsrc(2) := dest + i.U
517        csBundle(2 * i).ldest := dest + i.U
518        csBundle(2 * i).uopIdx := (2 * i).U
519        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
520        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
521        csBundle(2 * i + 1).lsrc(2) := dest + i.U
522        csBundle(2 * i + 1).ldest := dest + i.U
523        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
524      }
525    }
526    is(UopSplitType.VEC_WFW) {
527      /*
528      f to vector move
529       */
530      csBundle(0).srcType(0) := SrcType.fp
531      csBundle(0).srcType(1) := SrcType.imm
532      csBundle(0).srcType(2) := SrcType.imm
533      csBundle(0).lsrc(1) := 0.U
534      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
535      csBundle(0).fuType := FuType.f2v.U
536      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
537      csBundle(0).rfWen := false.B
538      csBundle(0).fpWen := false.B
539      csBundle(0).vecWen := true.B
540
541      for (i <- 0 until MAX_VLMUL / 2) {
542        csBundle(2 * i + 1).srcType(0) := SrcType.vp
543        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
544        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
545        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
546        csBundle(2 * i + 1).ldest := dest + (2 * i).U
547        csBundle(2 * i + 1).uopIdx := (2 * i).U
548        csBundle(2 * i + 2).srcType(0) := SrcType.vp
549        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
550        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
551        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
552        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
553        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
554      }
555    }
556    is(UopSplitType.VEC_WXV) {
557      /*
558      i to vector move
559       */
560      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
561      csBundle(0).srcType(1) := SrcType.imm
562      csBundle(0).srcType(2) := SrcType.imm
563      csBundle(0).lsrc(1) := 0.U
564      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
565      csBundle(0).fuType := FuType.i2v.U
566      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
567      csBundle(0).vecWen := true.B
568
569      for (i <- 0 until MAX_VLMUL / 2) {
570        csBundle(2 * i + 1).srcType(0) := SrcType.vp
571        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
572        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
573        csBundle(2 * i + 1).lsrc(2) := dest + i.U
574        csBundle(2 * i + 1).ldest := dest + i.U
575        csBundle(2 * i + 1).uopIdx := (2 * i).U
576        csBundle(2 * i + 2).srcType(0) := SrcType.vp
577        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
578        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
579        csBundle(2 * i + 2).lsrc(2) := dest + i.U
580        csBundle(2 * i + 2).ldest := dest + i.U
581        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
582      }
583    }
584    is(UopSplitType.VEC_VVM) {
585      csBundle(0).lsrc(2) := dest
586      csBundle(0).ldest := dest
587      csBundle(0).uopIdx := 0.U
588      for (i <- 1 until MAX_VLMUL) {
589        csBundle(i).lsrc(0) := src1 + i.U
590        csBundle(i).lsrc(1) := src2 + i.U
591        csBundle(i).lsrc(2) := dest
592        csBundle(i).ldest := dest
593        csBundle(i).uopIdx := i.U
594      }
595    }
596    is(UopSplitType.VEC_VFM) {
597      /*
598      f to vector move
599       */
600      csBundle(0).srcType(0) := SrcType.fp
601      csBundle(0).srcType(1) := SrcType.imm
602      csBundle(0).srcType(2) := SrcType.imm
603      csBundle(0).lsrc(1) := 0.U
604      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
605      csBundle(0).fuType := FuType.f2v.U
606      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
607      csBundle(0).rfWen := false.B
608      csBundle(0).fpWen := false.B
609      csBundle(0).vecWen := true.B
610      //LMUL
611      csBundle(1).srcType(0) := SrcType.vp
612      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
613      csBundle(1).lsrc(2) := dest
614      csBundle(1).ldest := dest
615      csBundle(1).uopIdx := 0.U
616      for (i <- 1 until MAX_VLMUL) {
617        csBundle(i + 1).srcType(0) := SrcType.vp
618        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
619        csBundle(i + 1).lsrc(1) := src2 + i.U
620        csBundle(i + 1).lsrc(2) := dest
621        csBundle(i + 1).ldest := dest
622        csBundle(i + 1).uopIdx := i.U
623      }
624      csBundle(numOfUop - 1.U).ldest := dest
625    }
626    is(UopSplitType.VEC_VXM) {
627      /*
628      i to vector move
629       */
630      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
631      csBundle(0).srcType(1) := SrcType.imm
632      csBundle(0).srcType(2) := SrcType.imm
633      csBundle(0).lsrc(1) := 0.U
634      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
635      csBundle(0).fuType := FuType.i2v.U
636      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
637      csBundle(0).vecWen := true.B
638      //LMUL
639      csBundle(1).srcType(0) := SrcType.vp
640      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
641      csBundle(1).lsrc(2) := dest
642      csBundle(1).ldest := dest
643      csBundle(1).uopIdx := 0.U
644      for (i <- 1 until MAX_VLMUL) {
645        csBundle(i + 1).srcType(0) := SrcType.vp
646        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
647        csBundle(i + 1).lsrc(1) := src2 + i.U
648        csBundle(i + 1).lsrc(2) := dest
649        csBundle(i + 1).ldest := dest
650        csBundle(i + 1).uopIdx := i.U
651      }
652      csBundle(numOfUop - 1.U).ldest := dest
653    }
654    is(UopSplitType.VEC_SLIDE1UP) {
655      /*
656      i to vector move
657       */
658      csBundle(0).srcType(0) := SrcType.reg
659      csBundle(0).srcType(1) := SrcType.imm
660      csBundle(0).srcType(2) := SrcType.imm
661      csBundle(0).lsrc(1) := 0.U
662      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
663      csBundle(0).fuType := FuType.i2v.U
664      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
665      csBundle(0).vecWen := true.B
666      //LMUL
667      csBundle(1).srcType(0) := SrcType.vp
668      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
669      csBundle(1).lsrc(2) := dest
670      csBundle(1).ldest := dest
671      csBundle(1).uopIdx := 0.U
672      for (i <- 1 until MAX_VLMUL) {
673        csBundle(i + 1).srcType(0) := SrcType.vp
674        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
675        csBundle(i + 1).lsrc(1) := src2 + i.U
676        csBundle(i + 1).lsrc(2) := dest + i.U
677        csBundle(i + 1).ldest := dest + i.U
678        csBundle(i + 1).uopIdx := i.U
679      }
680    }
681    is(UopSplitType.VEC_FSLIDE1UP) {
682      /*
683      f to vector move
684       */
685      csBundle(0).srcType(0) := SrcType.fp
686      csBundle(0).srcType(1) := SrcType.imm
687      csBundle(0).srcType(2) := SrcType.imm
688      csBundle(0).lsrc(1) := 0.U
689      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
690      csBundle(0).fuType := FuType.f2v.U
691      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
692      csBundle(0).rfWen := false.B
693      csBundle(0).fpWen := false.B
694      csBundle(0).vecWen := true.B
695      //LMUL
696      csBundle(1).srcType(0) := SrcType.vp
697      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
698      csBundle(1).lsrc(1) := src2
699      csBundle(1).lsrc(2) := dest
700      csBundle(1).ldest := dest
701      csBundle(1).uopIdx := 0.U
702      for (i <- 1 until MAX_VLMUL) {
703        csBundle(i + 1).srcType(0) := SrcType.vp
704        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
705        csBundle(i + 1).lsrc(1) := src2 + i.U
706        csBundle(i + 1).lsrc(2) := dest + i.U
707        csBundle(i + 1).ldest := dest + i.U
708        csBundle(i + 1).uopIdx := i.U
709      }
710    }
711    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
712      /*
713      i to vector move
714       */
715      csBundle(0).srcType(0) := SrcType.reg
716      csBundle(0).srcType(1) := SrcType.imm
717      csBundle(0).srcType(2) := SrcType.imm
718      csBundle(0).lsrc(1) := 0.U
719      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
720      csBundle(0).fuType := FuType.i2v.U
721      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
722      csBundle(0).vecWen := true.B
723      //LMUL
724      for (i <- 0 until MAX_VLMUL) {
725        csBundle(2 * i + 1).srcType(0) := SrcType.vp
726        csBundle(2 * i + 1).srcType(1) := SrcType.vp
727        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
728        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
729        csBundle(2 * i + 1).lsrc(2) := dest + i.U
730        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
731        csBundle(2 * i + 1).uopIdx := (2 * i).U
732        if (2 * i + 2 < MAX_VLMUL * 2) {
733          csBundle(2 * i + 2).srcType(0) := SrcType.vp
734          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
735          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
736          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
737          csBundle(2 * i + 2).ldest := dest + i.U
738          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
739        }
740      }
741      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
742      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
743      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
744    }
745    is(UopSplitType.VEC_FSLIDE1DOWN) {
746      /*
747      f to vector move
748       */
749      csBundle(0).srcType(0) := SrcType.fp
750      csBundle(0).srcType(1) := SrcType.imm
751      csBundle(0).srcType(2) := SrcType.imm
752      csBundle(0).lsrc(1) := 0.U
753      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
754      csBundle(0).fuType := FuType.f2v.U
755      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
756      csBundle(0).rfWen := false.B
757      csBundle(0).fpWen := false.B
758      csBundle(0).vecWen := true.B
759      //LMUL
760      for (i <- 0 until MAX_VLMUL) {
761        csBundle(2 * i + 1).srcType(0) := SrcType.vp
762        csBundle(2 * i + 1).srcType(1) := SrcType.vp
763        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
764        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
765        csBundle(2 * i + 1).lsrc(2) := dest + i.U
766        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
767        csBundle(2 * i + 1).uopIdx := (2 * i).U
768        if (2 * i + 2 < MAX_VLMUL * 2) {
769          csBundle(2 * i + 2).srcType(0) := SrcType.vp
770          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
771          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
772          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
773          csBundle(2 * i + 2).ldest := dest + i.U
774          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
775        }
776      }
777      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
778      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
779      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
780    }
781    is(UopSplitType.VEC_VRED) {
782      when(vlmulReg === "b001".U) {
783        csBundle(0).srcType(2) := SrcType.DC
784        csBundle(0).lsrc(0) := src2 + 1.U
785        csBundle(0).lsrc(1) := src2
786        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
787        csBundle(0).uopIdx := 0.U
788      }
789      when(vlmulReg === "b010".U) {
790        csBundle(0).srcType(2) := SrcType.DC
791        csBundle(0).lsrc(0) := src2 + 1.U
792        csBundle(0).lsrc(1) := src2
793        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
794        csBundle(0).uopIdx := 0.U
795
796        csBundle(1).srcType(2) := SrcType.DC
797        csBundle(1).lsrc(0) := src2 + 3.U
798        csBundle(1).lsrc(1) := src2 + 2.U
799        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
800        csBundle(1).uopIdx := 1.U
801
802        csBundle(2).srcType(2) := SrcType.DC
803        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
804        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
805        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
806        csBundle(2).uopIdx := 2.U
807      }
808      when(vlmulReg === "b011".U) {
809        for (i <- 0 until MAX_VLMUL) {
810          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
811            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
812            csBundle(i).lsrc(1) := src2 + (i * 2).U
813            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
814          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
815            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
816            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
817            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
818          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
819            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
820            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
821            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
822          }
823          csBundle(i).srcType(2) := SrcType.DC
824          csBundle(i).uopIdx := i.U
825        }
826      }
827      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
828        /*
829         * 2 <= vlmul <= 8
830         */
831        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
832        csBundle(numOfUop - 1.U).lsrc(0) := src1
833        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
834        csBundle(numOfUop - 1.U).lsrc(2) := dest
835        csBundle(numOfUop - 1.U).ldest := dest
836        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
837      }
838    }
839    is(UopSplitType.VEC_VFRED) {
840      val vlmul = vlmulReg
841      val vsew = vsewReg
842      when(vlmul === VLmul.m8){
843        for (i <- 0 until 4) {
844          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
845          csBundle(i).lsrc(1) := src2 + (i * 2).U
846          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
847          csBundle(i).uopIdx := i.U
848        }
849        for (i <- 4 until 6) {
850          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
851          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
852          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
853          csBundle(i).uopIdx := i.U
854        }
855        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
856        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
857        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
858        csBundle(6).uopIdx := 6.U
859        when(vsew === VSew.e64) {
860          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
861          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
862          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
863          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
864          csBundle(7).uopIdx := 7.U
865          csBundle(8).lsrc(0) := src1
866          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
867          csBundle(8).ldest := dest
868          csBundle(8).uopIdx := 8.U
869        }
870        when(vsew === VSew.e32) {
871          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
872          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
873          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
874          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
875          csBundle(7).uopIdx := 7.U
876          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
877          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
878          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
879          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
880          csBundle(8).uopIdx := 8.U
881          csBundle(9).lsrc(0) := src1
882          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
883          csBundle(9).ldest := dest
884          csBundle(9).uopIdx := 9.U
885        }
886        when(vsew === VSew.e16) {
887          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
888          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
889          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
890          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
891          csBundle(7).uopIdx := 7.U
892          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
893          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
894          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
895          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
896          csBundle(8).uopIdx := 8.U
897          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
898          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
899          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
900          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
901          csBundle(9).uopIdx := 9.U
902          csBundle(10).lsrc(0) := src1
903          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
904          csBundle(10).ldest := dest
905          csBundle(10).uopIdx := 10.U
906        }
907      }
908      when(vlmul === VLmul.m4) {
909        for (i <- 0 until 2) {
910          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
911          csBundle(i).lsrc(1) := src2 + (i * 2).U
912          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
913          csBundle(i).uopIdx := i.U
914        }
915        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
916        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
917        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
918        csBundle(2).uopIdx := 2.U
919        when(vsew === VSew.e64) {
920          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
921          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
922          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
923          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
924          csBundle(3).uopIdx := 3.U
925          csBundle(4).lsrc(0) := src1
926          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
927          csBundle(4).ldest := dest
928          csBundle(4).uopIdx := 4.U
929        }
930        when(vsew === VSew.e32) {
931          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
932          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
933          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
934          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
935          csBundle(3).uopIdx := 3.U
936          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
937          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
938          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
939          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
940          csBundle(4).uopIdx := 4.U
941          csBundle(5).lsrc(0) := src1
942          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
943          csBundle(5).ldest := dest
944          csBundle(5).uopIdx := 5.U
945        }
946        when(vsew === VSew.e16) {
947          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
948          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
949          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
950          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
951          csBundle(3).uopIdx := 3.U
952          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
953          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
954          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
955          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
956          csBundle(4).uopIdx := 4.U
957          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
958          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
959          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
960          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
961          csBundle(5).uopIdx := 5.U
962          csBundle(6).lsrc(0) := src1
963          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
964          csBundle(6).ldest := dest
965          csBundle(6).uopIdx := 6.U
966        }
967      }
968      when(vlmul === VLmul.m2) {
969        csBundle(0).lsrc(0) := src2 + 1.U
970        csBundle(0).lsrc(1) := src2 + 0.U
971        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
972        csBundle(0).uopIdx := 0.U
973        when(vsew === VSew.e64) {
974          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
975          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
976          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
977          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
978          csBundle(1).uopIdx := 1.U
979          csBundle(2).lsrc(0) := src1
980          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
981          csBundle(2).ldest := dest
982          csBundle(2).uopIdx := 2.U
983        }
984        when(vsew === VSew.e32) {
985          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
986          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
987          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
988          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
989          csBundle(1).uopIdx := 1.U
990          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
991          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
992          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
993          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
994          csBundle(2).uopIdx := 2.U
995          csBundle(3).lsrc(0) := src1
996          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
997          csBundle(3).ldest := dest
998          csBundle(3).uopIdx := 3.U
999        }
1000        when(vsew === VSew.e16) {
1001          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1002          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1003          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1004          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1005          csBundle(1).uopIdx := 1.U
1006          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1007          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1008          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1009          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1010          csBundle(2).uopIdx := 2.U
1011          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1012          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1013          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1014          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
1015          csBundle(3).uopIdx := 3.U
1016          csBundle(4).lsrc(0) := src1
1017          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1018          csBundle(4).ldest := dest
1019          csBundle(4).uopIdx := 4.U
1020        }
1021      }
1022      when(vlmul === VLmul.m1) {
1023        when(vsew === VSew.e64) {
1024          csBundle(0).lsrc(0) := src2
1025          csBundle(0).lsrc(1) := src2
1026          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1027          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1028          csBundle(0).uopIdx := 0.U
1029          csBundle(1).lsrc(0) := src1
1030          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1031          csBundle(1).ldest := dest
1032          csBundle(1).uopIdx := 1.U
1033        }
1034        when(vsew === VSew.e32) {
1035          csBundle(0).lsrc(0) := src2
1036          csBundle(0).lsrc(1) := src2
1037          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1038          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1039          csBundle(0).uopIdx := 0.U
1040          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1041          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1042          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1043          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1044          csBundle(1).uopIdx := 1.U
1045          csBundle(2).lsrc(0) := src1
1046          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1047          csBundle(2).ldest := dest
1048          csBundle(2).uopIdx := 2.U
1049        }
1050        when(vsew === VSew.e16) {
1051          csBundle(0).lsrc(0) := src2
1052          csBundle(0).lsrc(1) := src2
1053          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1054          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1055          csBundle(0).uopIdx := 0.U
1056          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1057          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1058          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1059          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1060          csBundle(1).uopIdx := 1.U
1061          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1062          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1063          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1064          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1065          csBundle(2).uopIdx := 2.U
1066          csBundle(3).lsrc(0) := src1
1067          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1068          csBundle(3).ldest := dest
1069          csBundle(3).uopIdx := 3.U
1070        }
1071      }
1072      when(vlmul === VLmul.mf2) {
1073        when(vsew === VSew.e32) {
1074          csBundle(0).lsrc(0) := src2
1075          csBundle(0).lsrc(1) := src2
1076          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1077          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1078          csBundle(0).uopIdx := 0.U
1079          csBundle(1).lsrc(0) := src1
1080          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1081          csBundle(1).ldest := dest
1082          csBundle(1).uopIdx := 1.U
1083        }
1084        when(vsew === VSew.e16) {
1085          csBundle(0).lsrc(0) := src2
1086          csBundle(0).lsrc(1) := src2
1087          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1088          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1089          csBundle(0).uopIdx := 0.U
1090          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1091          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1092          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1093          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1094          csBundle(1).uopIdx := 1.U
1095          csBundle(2).lsrc(0) := src1
1096          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1097          csBundle(2).ldest := dest
1098          csBundle(2).uopIdx := 2.U
1099        }
1100      }
1101      when(vlmul === VLmul.mf4) {
1102        when(vsew === VSew.e16) {
1103          csBundle(0).lsrc(0) := src2
1104          csBundle(0).lsrc(1) := src2
1105          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1106          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1107          csBundle(0).uopIdx := 0.U
1108          csBundle(1).lsrc(0) := src1
1109          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1110          csBundle(1).ldest := dest
1111          csBundle(1).uopIdx := 1.U
1112        }
1113      }
1114    }
1115
1116    is(UopSplitType.VEC_VFREDOSUM) {
1117      import yunsuan.VfaluType
1118      val vlmul = vlmulReg
1119      val vsew = vsewReg
1120      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1121      when(vlmul === VLmul.m8) {
1122        when(vsew === VSew.e64) {
1123          val vlmax = 16
1124          for (i <- 0 until vlmax) {
1125            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1126            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1127            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1128            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1129            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1130            csBundle(i).uopIdx := i.U
1131          }
1132        }
1133        when(vsew === VSew.e32) {
1134          val vlmax = 32
1135          for (i <- 0 until vlmax) {
1136            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1137            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1138            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1139            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1140            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1141            csBundle(i).uopIdx := i.U
1142          }
1143        }
1144        when(vsew === VSew.e16) {
1145          val vlmax = 64
1146          for (i <- 0 until vlmax) {
1147            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1149            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1150            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1151            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1152            csBundle(i).uopIdx := i.U
1153          }
1154        }
1155      }
1156      when(vlmul === VLmul.m4) {
1157        when(vsew === VSew.e64) {
1158          val vlmax = 8
1159          for (i <- 0 until vlmax) {
1160            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1161            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1162            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1163            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1164            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1165            csBundle(i).uopIdx := i.U
1166          }
1167        }
1168        when(vsew === VSew.e32) {
1169          val vlmax = 16
1170          for (i <- 0 until vlmax) {
1171            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1172            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1174            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1175            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1176            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1177            csBundle(i).uopIdx := i.U
1178          }
1179        }
1180        when(vsew === VSew.e16) {
1181          val vlmax = 32
1182          for (i <- 0 until vlmax) {
1183            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1185            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1186            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1187            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1188            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1189            csBundle(i).uopIdx := i.U
1190          }
1191        }
1192      }
1193      when(vlmul === VLmul.m2) {
1194        when(vsew === VSew.e64) {
1195          val vlmax = 4
1196          for (i <- 0 until vlmax) {
1197            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1198            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1199            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1200            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1201            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1202            csBundle(i).uopIdx := i.U
1203          }
1204        }
1205        when(vsew === VSew.e32) {
1206          val vlmax = 8
1207          for (i <- 0 until vlmax) {
1208            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1209            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1211            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1212            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1213            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1214            csBundle(i).uopIdx := i.U
1215          }
1216        }
1217        when(vsew === VSew.e16) {
1218          val vlmax = 16
1219          for (i <- 0 until vlmax) {
1220            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1223            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1224            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1225            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1226            csBundle(i).uopIdx := i.U
1227          }
1228        }
1229      }
1230      when(vlmul === VLmul.m1) {
1231        when(vsew === VSew.e64) {
1232          val vlmax = 2
1233          for (i <- 0 until vlmax) {
1234            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1235            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1236            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1237            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1238            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1239            csBundle(i).uopIdx := i.U
1240          }
1241        }
1242        when(vsew === VSew.e32) {
1243          val vlmax = 4
1244          for (i <- 0 until vlmax) {
1245            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1248            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1249            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1250            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1251            csBundle(i).uopIdx := i.U
1252          }
1253        }
1254        when(vsew === VSew.e16) {
1255          val vlmax = 8
1256          for (i <- 0 until vlmax) {
1257            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1261            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1262            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1263            csBundle(i).uopIdx := i.U
1264          }
1265        }
1266      }
1267      when(vlmul === VLmul.mf2) {
1268        when(vsew === VSew.e32) {
1269          val vlmax = 2
1270          for (i <- 0 until vlmax) {
1271            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1272            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1273            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1274            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1275            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1276            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1277            csBundle(i).uopIdx := i.U
1278          }
1279        }
1280        when(vsew === VSew.e16) {
1281          val vlmax = 4
1282          for (i <- 0 until vlmax) {
1283            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1284            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1285            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1286            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1287            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1288            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1289            csBundle(i).uopIdx := i.U
1290          }
1291        }
1292      }
1293      when(vlmul === VLmul.mf4) {
1294        when(vsew === VSew.e16) {
1295          val vlmax = 2
1296          for (i <- 0 until vlmax) {
1297            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1298            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1299            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1300            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1301            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1302            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1303            csBundle(i).uopIdx := i.U
1304          }
1305        }
1306      }
1307    }
1308
1309    is(UopSplitType.VEC_SLIDEUP) {
1310      // i to vector move
1311      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1312      csBundle(0).srcType(1) := SrcType.imm
1313      csBundle(0).srcType(2) := SrcType.imm
1314      csBundle(0).lsrc(1) := 0.U
1315      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1316      csBundle(0).fuType := FuType.i2v.U
1317      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1318      csBundle(0).vecWen := true.B
1319      // LMUL
1320      for (i <- 0 until MAX_VLMUL)
1321        for (j <- 0 to i) {
1322          val old_vd = if (j == 0) {
1323            dest + i.U
1324          } else (VECTOR_TMP_REG_LMUL + j).U
1325          val vd = if (j == i) {
1326            dest + i.U
1327          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1328          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1329          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1330          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1331          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1332          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1333          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1334        }
1335    }
1336
1337    is(UopSplitType.VEC_SLIDEDOWN) {
1338      // i to vector move
1339      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1340      csBundle(0).srcType(1) := SrcType.imm
1341      csBundle(0).srcType(2) := SrcType.imm
1342      csBundle(0).lsrc(1) := 0.U
1343      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1344      csBundle(0).fuType := FuType.i2v.U
1345      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1346      csBundle(0).vecWen := true.B
1347      // LMUL
1348      for (i <- 0 until MAX_VLMUL)
1349        for (j <- (0 to i).reverse) {
1350          when(i.U < lmul) {
1351            val old_vd = if (j == 0) {
1352              dest + lmul - 1.U - i.U
1353            } else (VECTOR_TMP_REG_LMUL + j).U
1354            val vd = if (j == i) {
1355              dest + lmul - 1.U - i.U
1356            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1357            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1358            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1359            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1360            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1361            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1362            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1363          }
1364        }
1365    }
1366
1367    is(UopSplitType.VEC_M0X) {
1368      // LMUL
1369      for (i <- 0 until MAX_VLMUL) {
1370        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1371        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1372        csBundle(i).srcType(0) := srcType0
1373        csBundle(i).srcType(1) := SrcType.vp
1374        csBundle(i).rfWen := false.B
1375        csBundle(i).fpWen := false.B
1376        csBundle(i).vecWen := true.B
1377        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1378        csBundle(i).lsrc(1) := src2
1379        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1380        csBundle(i).ldest := ldest
1381        csBundle(i).uopIdx := i.U
1382      }
1383      csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B)
1384      csBundle(numOfUop - 1.U).fpWen := false.B
1385      csBundle(numOfUop - 1.U).vecWen := false.B
1386      csBundle(numOfUop - 1.U).ldest := dest
1387    }
1388
1389    is(UopSplitType.VEC_MVV) {
1390      // LMUL
1391      for (i <- 0 until MAX_VLMUL) {
1392        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1393        csBundle(i * 2 + 0).srcType(0) := srcType0
1394        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1395        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1396        csBundle(i * 2 + 0).lsrc(1) := src2
1397        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1398        csBundle(i * 2 + 0).ldest := dest + i.U
1399        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1400
1401        csBundle(i * 2 + 1).srcType(0) := srcType0
1402        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1403        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1404        csBundle(i * 2 + 1).lsrc(1) := src2
1405        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1406        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1407        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1408      }
1409    }
1410    is(UopSplitType.VEC_VWW) {
1411      for (i <- 0 until MAX_VLMUL*2) {
1412        when(i.U < lmul){
1413          csBundle(i).srcType(2) := SrcType.DC
1414          csBundle(i).lsrc(0) := src2 + i.U
1415          csBundle(i).lsrc(1) := src2 + i.U
1416          // csBundle(i).lsrc(2) := dest + (2 * i).U
1417          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1418          csBundle(i).uopIdx :=  i.U
1419        } otherwise {
1420          csBundle(i).srcType(2) := SrcType.DC
1421          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1422          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1423          // csBundle(i).lsrc(2) := dest + (2 * i).U
1424          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1425          csBundle(i).uopIdx := i.U
1426        }
1427        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1428        csBundle(numOfUop-1.U).lsrc(0) := src1
1429        csBundle(numOfUop-1.U).lsrc(2) := dest
1430        csBundle(numOfUop-1.U).ldest := dest
1431      }
1432    }
1433    is(UopSplitType.VEC_RGATHER) {
1434      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1435        for (i <- 0 until len)
1436          for (j <- 0 until len) {
1437            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1438            // csBundle(i * len + j).srcType(1) := SrcType.vp
1439            // csBundle(i * len + j).srcType(2) := SrcType.vp
1440            csBundle(i * len + j).lsrc(0) := src1 + i.U
1441            csBundle(i * len + j).lsrc(1) := src2 + j.U
1442            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1443            csBundle(i * len + j).lsrc(2) := vd_old
1444            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1445            csBundle(i * len + j).ldest := vd
1446            csBundle(i * len + j).uopIdx := (i * len + j).U
1447          }
1448      }
1449      switch(vlmulReg) {
1450        is("b001".U ){
1451          genCsBundle_VEC_RGATHER(2)
1452        }
1453        is("b010".U ){
1454          genCsBundle_VEC_RGATHER(4)
1455        }
1456        is("b011".U ){
1457          genCsBundle_VEC_RGATHER(8)
1458        }
1459      }
1460    }
1461    is(UopSplitType.VEC_RGATHER_VX) {
1462      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1463        for (i <- 0 until len)
1464          for (j <- 0 until len) {
1465            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1466            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1467            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1468            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1469            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1470            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1471            csBundle(i * len + j + 1).lsrc(2) := vd_old
1472            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1473            csBundle(i * len + j + 1).ldest := vd
1474            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1475          }
1476      }
1477      // i to vector move
1478      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1479      csBundle(0).srcType(1) := SrcType.imm
1480      csBundle(0).srcType(2) := SrcType.imm
1481      csBundle(0).lsrc(1) := 0.U
1482      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1483      csBundle(0).fuType := FuType.i2v.U
1484      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1485      csBundle(0).rfWen := false.B
1486      csBundle(0).fpWen := false.B
1487      csBundle(0).vecWen := true.B
1488      genCsBundle_RGATHER_VX(1)
1489      switch(vlmulReg) {
1490        is("b001".U ){
1491          genCsBundle_RGATHER_VX(2)
1492        }
1493        is("b010".U ){
1494          genCsBundle_RGATHER_VX(4)
1495        }
1496        is("b011".U ){
1497          genCsBundle_RGATHER_VX(8)
1498        }
1499      }
1500    }
1501    is(UopSplitType.VEC_RGATHEREI16) {
1502      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1503        for (i <- 0 until len)
1504          for (j <- 0 until len) {
1505            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1506            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1507            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1508            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1509            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1510            csBundle((i * len + j)*2+0).ldest := vd0
1511            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1512            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1513            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1514            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1515            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1516            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1517            csBundle((i * len + j)*2+1).ldest := vd1
1518            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1519          }
1520      }
1521      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1522        for (i <- 0 until len)
1523          for (j <- 0 until len) {
1524            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1525            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1526            csBundle(i * len + j).lsrc(0) := src1 + i.U
1527            csBundle(i * len + j).lsrc(1) := src2 + j.U
1528            csBundle(i * len + j).lsrc(2) := vd_old
1529            csBundle(i * len + j).ldest := vd
1530            csBundle(i * len + j).uopIdx := (i * len + j).U
1531          }
1532      }
1533      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1534        for (i <- 0 until len)
1535          for (j <- 0 until len) {
1536            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1537            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1538            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1539            csBundle(i * len + j).lsrc(1) := src2 + j.U
1540            csBundle(i * len + j).lsrc(2) := vd_old
1541            csBundle(i * len + j).ldest := vd
1542            csBundle(i * len + j).uopIdx := (i * len + j).U
1543          }
1544      }
1545      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1546        for (i <- 0 until len)
1547          for (j <- 0 until len) {
1548            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1549            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1550            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1551            csBundle(i * len + j).lsrc(1) := src2 + j.U
1552            csBundle(i * len + j).lsrc(2) := vd_old
1553            csBundle(i * len + j).ldest := vd
1554            csBundle(i * len + j).uopIdx := (i * len + j).U
1555          }
1556      }
1557      when(!vsewReg.orR){
1558        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1559      }.elsewhen(vsewReg === VSew.e32){
1560        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1561      }.elsewhen(vsewReg === VSew.e64){
1562        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1563      }.otherwise{
1564        genCsBundle_VEC_RGATHEREI16(1)
1565      }
1566      switch(vlmulReg) {
1567        is("b001".U) {
1568          when(!vsewReg.orR) {
1569            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1570          }.elsewhen(vsewReg === VSew.e32){
1571            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1572          }.elsewhen(vsewReg === VSew.e64){
1573            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1574          }.otherwise{
1575            genCsBundle_VEC_RGATHEREI16(2)
1576          }
1577        }
1578        is("b010".U) {
1579          when(!vsewReg.orR) {
1580            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1581          }.elsewhen(vsewReg === VSew.e32){
1582            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1583          }.elsewhen(vsewReg === VSew.e64){
1584            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1585          }.otherwise{
1586            genCsBundle_VEC_RGATHEREI16(4)
1587          }
1588        }
1589        is("b011".U) {
1590          when(vsewReg === VSew.e32){
1591            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1592          }.elsewhen(vsewReg === VSew.e64){
1593            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1594          }.otherwise{
1595            genCsBundle_VEC_RGATHEREI16(8)
1596          }
1597        }
1598      }
1599    }
1600    is(UopSplitType.VEC_COMPRESS) {
1601      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1602        for (i <- 0 until len) {
1603          val jlen = if (i == len-1) i+1 else i+2
1604          for (j <- 0 until jlen) {
1605            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1606            val vd = if(i==len-1) (dest + j.U) else {
1607              if (j == i+1) VECTOR_TMP_REG_LMUL.U  else (VECTOR_TMP_REG_LMUL + j + 1).U
1608            }
1609            csBundle(i*(i+3)/2 + j).vecWen := true.B
1610            csBundle(i*(i+3)/2 + j).v0Wen := false.B
1611            val src13Type = if (j == i+1) DontCare else SrcType.vp
1612            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1613            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1614            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1615            if (i == 0) {
1616              csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1617            } else {
1618              csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1619            }
1620            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1621            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1622            csBundle(i*(i+3)/2 + j).ldest := vd
1623            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1624          }
1625        }
1626      }
1627      switch(vlmulReg) {
1628        is("b001".U ){
1629          genCsBundle_VEC_COMPRESS(2)
1630        }
1631        is("b010".U ){
1632          genCsBundle_VEC_COMPRESS(4)
1633        }
1634        is("b011".U ){
1635          genCsBundle_VEC_COMPRESS(8)
1636        }
1637      }
1638    }
1639    is(UopSplitType.VEC_MVNR) {
1640      for (i <- 0 until MAX_VLMUL) {
1641        csBundle(i).lsrc(0) := src1 + i.U
1642        csBundle(i).lsrc(1) := src2 + i.U
1643        csBundle(i).lsrc(2) := dest + i.U
1644        csBundle(i).ldest := dest + i.U
1645        csBundle(i).uopIdx := i.U
1646      }
1647    }
1648    is(UopSplitType.VEC_US_LDST) {
1649      /*
1650      FMV.D.X
1651       */
1652      csBundle(0).srcType(0) := SrcType.reg
1653      csBundle(0).srcType(1) := SrcType.imm
1654      csBundle(0).lsrc(1) := 0.U
1655      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1656      csBundle(0).fuType := FuType.i2v.U
1657      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1658      csBundle(0).rfWen := false.B
1659      csBundle(0).fpWen := false.B
1660      csBundle(0).vecWen := true.B
1661      csBundle(0).vlsInstr := true.B
1662      //LMUL
1663      for (i <- 0 until MAX_VLMUL) {
1664        csBundle(i + 1).srcType(0) := SrcType.vp
1665        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1666        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1667        csBundle(i + 1).ldest := dest + i.U
1668        csBundle(i + 1).uopIdx := i.U
1669        csBundle(i + 1).vlsInstr := true.B
1670      }
1671      csBundle.head.waitForward := isUsSegment
1672      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1673    }
1674    is(UopSplitType.VEC_S_LDST) {
1675      /*
1676      FMV.D.X
1677       */
1678      csBundle(0).srcType(0) := SrcType.reg
1679      csBundle(0).srcType(1) := SrcType.imm
1680      csBundle(0).lsrc(1) := 0.U
1681      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1682      csBundle(0).fuType := FuType.i2v.U
1683      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1684      csBundle(0).rfWen := false.B
1685      csBundle(0).fpWen := false.B
1686      csBundle(0).vecWen := true.B
1687      csBundle(0).vlsInstr := true.B
1688
1689      csBundle(1).srcType(0) := SrcType.reg
1690      csBundle(1).srcType(1) := SrcType.imm
1691      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1692      csBundle(1).lsrc(1) := 0.U
1693      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1694      csBundle(1).fuType := FuType.i2v.U
1695      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1696      csBundle(1).rfWen := false.B
1697      csBundle(1).fpWen := false.B
1698      csBundle(1).vecWen := true.B
1699      csBundle(1).vlsInstr := true.B
1700
1701      //LMUL
1702      for (i <- 0 until MAX_VLMUL) {
1703        csBundle(i + 2).srcType(0) := SrcType.vp
1704        csBundle(i + 2).srcType(1) := SrcType.vp
1705        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1706        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1707        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1708        csBundle(i + 2).ldest := dest + i.U
1709        csBundle(i + 2).uopIdx := i.U
1710        csBundle(i + 2).vlsInstr := true.B
1711      }
1712      csBundle.head.waitForward := isSdSegment
1713      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1714    }
1715    is(UopSplitType.VEC_I_LDST) {
1716      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1717        for (i <- 0 until MAX_VLMUL) {
1718          val vecWen = if (i < lmul * nf) true.B else false.B
1719          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1720          csBundle(i + 1).srcType(0) := SrcType.vp
1721          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1722          csBundle(i + 1).srcType(1) := SrcType.no
1723          csBundle(i + 1).lsrc(1) := src2 + i.U
1724          csBundle(i + 1).srcType(2) := src2Type
1725          csBundle(i + 1).lsrc(2) := dest + i.U
1726          csBundle(i + 1).ldest := dest + i.U
1727          csBundle(i + 1).rfWen := false.B
1728          csBundle(i + 1).fpWen := false.B
1729          csBundle(i + 1).vecWen := vecWen
1730          csBundle(i + 1).uopIdx := i.U
1731          csBundle(i + 1).vlsInstr := true.B
1732        }
1733      }
1734      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1735        for (i <- 0 until MAX_VLMUL) {
1736          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1737          csBundle(i + 1).srcType(1) := src1Type
1738          csBundle(i + 1).lsrc(1) := src2 + i.U
1739        }
1740      }
1741
1742      val vlmul = vlmulReg
1743      val vsew = Cat(0.U(1.W), vsewReg)
1744      val veew = Cat(0.U(1.W), width)
1745      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1746      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1747        "b001".U -> 1.U,
1748        "b010".U -> 2.U,
1749        "b011".U -> 3.U
1750      ))
1751      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1752        "b001".U -> 1.U,
1753        "b010".U -> 2.U,
1754        "b011".U -> 3.U
1755      ))
1756      csBundle(0).srcType(0) := SrcType.reg
1757      csBundle(0).srcType(1) := SrcType.imm
1758      csBundle(0).lsrc(1) := 0.U
1759      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1760      csBundle(0).fuType := FuType.i2v.U
1761      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1762      csBundle(0).rfWen := false.B
1763      csBundle(0).fpWen := false.B
1764      csBundle(0).vecWen := true.B
1765      csBundle(0).vlsInstr := true.B
1766
1767      //LMUL
1768      when(nf === 0.U) {
1769        for (i <- 0 until MAX_VLMUL) {
1770          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1771          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1772          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1773          csBundle(i + 1).srcType(0) := SrcType.vp
1774          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1775          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1776          csBundle(i + 1).srcType(2) := SrcType.vp
1777          // lsrc2 is old vd
1778          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1779          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1780          csBundle(i + 1).uopIdx := i.U
1781          csBundle(i + 1).vlsInstr := true.B
1782        }
1783      }.otherwise{
1784        // nf > 1, is segment indexed load/store
1785        // gen src0, vd
1786        switch(simple_lmul) {
1787          is(0.U) {
1788            switch(nf) {
1789              is(1.U) {
1790                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1791              }
1792              is(2.U) {
1793                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1794              }
1795              is(3.U) {
1796                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1797              }
1798              is(4.U) {
1799                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1800              }
1801              is(5.U) {
1802                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1803              }
1804              is(6.U) {
1805                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1806              }
1807              is(7.U) {
1808                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1809              }
1810            }
1811          }
1812          is(1.U) {
1813            switch(nf) {
1814              is(1.U) {
1815                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1816              }
1817              is(2.U) {
1818                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1819              }
1820              is(3.U) {
1821                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1822              }
1823            }
1824          }
1825          is(2.U) {
1826            switch(nf) {
1827              is(1.U) {
1828                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1829              }
1830            }
1831          }
1832        }
1833
1834        // gen src1
1835        switch(simple_emul) {
1836          is(0.U) {
1837            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1838          }
1839          is(1.U) {
1840            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1841          }
1842          is(2.U) {
1843            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1844          }
1845          is(3.U) {
1846            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1847          }
1848        }
1849
1850        // when is vstore instructions, not set vecwen
1851        when(isVstore) {
1852          for (i <- 0 until MAX_VLMUL) {
1853            csBundle(i + 1).vecWen := false.B
1854          }
1855        }
1856      }
1857      csBundle.head.waitForward := isIxSegment
1858      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1859    }
1860  }
1861
1862  //readyFromRename Counter
1863  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1864
1865  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1866  val thisAllOut = uopRes <= readyCounter
1867
1868  switch(state) {
1869    is(s_idle) {
1870      when (inValid) {
1871        stateNext := s_active
1872        uopResNext := inUopInfo.numOfUop
1873      }
1874    }
1875    is(s_active) {
1876      when (thisAllOut) {
1877        when (inValid) {
1878          stateNext := s_active
1879          uopResNext := inUopInfo.numOfUop
1880        }.otherwise {
1881          stateNext := s_idle
1882          uopResNext := 0.U
1883        }
1884      }.otherwise {
1885        stateNext := s_active
1886        uopResNext := uopRes - readyCounter
1887      }
1888    }
1889  }
1890
1891  state := Mux(io.redirect, s_idle, stateNext)
1892  uopRes := Mux(io.redirect, 0.U, uopResNext)
1893
1894  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1895
1896  for(i <- 0 until RenameWidth) {
1897    outValids(i) := complexNum > i.U
1898    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1899  }
1900
1901  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1902  inReady := state === s_idle || state === s_active && thisAllOut
1903
1904//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1905//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1906//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1907//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1908//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1909//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1910//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1911//
1912//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1913//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1914//    0.U)
1915//  validToRename.zipWithIndex.foreach{
1916//    case(dst, i) =>
1917//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1918//      dst := MuxCase(false.B, Seq(
1919//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1920//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1921//      ).toSeq)
1922//  }
1923//
1924//  readyToIBuf.zipWithIndex.foreach {
1925//    case (dst, i) =>
1926//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1927//      dst := MuxCase(true.B, Seq(
1928//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1929//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1930//      ).toSeq)
1931//  }
1932//
1933//  io.deq.decodedInsts := decodedInsts
1934//  io.deq.complexNum := complexNum
1935//  io.deq.validToRename := validToRename
1936//  io.deq.readyToIBuf := readyToIBuf
1937}
1938