xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision f7063a43ab34da917ba6c670d21871314340c550)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(7.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  val outIsFirstUopInVd = IO(Output(Bool()))
43  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={
44    if (lmul * nfields <= 8) {
45      for (k <-0 until nfields) {
46        if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
47          var offset = 1 << (emul - lmul)
48          for (i <- 0 until (1 << emul)) {
49            if (uopIdx == k * (1 << emul) + i) {
50              return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0)
51            }
52          }
53        } else {              // lmul > emul, uop num is depend on lmul * nf
54          var offset = 1 << (lmul - emul)
55          for (i <- 0 until (1 << lmul)) {
56            if (uopIdx == k * (1 << lmul) + i) {
57              return (i / offset, i + k * (1 << lmul), 1)
58            }
59          }
60        }
61      }
62    }
63    return (0, 0, 1)
64  }
65  // strided load/store
66  var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq()
67  for (emul <- 0 until 4) {
68    for (lmul <- 0 until 4) {
69      for (nf <- 0 until 8) {
70        var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx)
71        var offsetVs2 = offset._1
72        var offsetVd = offset._2
73        var isFirstUopInVd = offset._3
74        combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd)
75      }
76    }
77  }
78  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
79    case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) =>
80      (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W)))
81  }, BitPat.N(7)))
82  outOffsetVs2 := out(5, 3)
83  outOffsetVd := out(2, 0)
84  outIsFirstUopInVd := out(6).asBool
85}
86
87trait VectorConstants {
88  val MAX_VLMUL = 8
89  val FP_TMP_REG_MV = 32
90  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
91  val MAX_INDEXED_LS_UOPNUM = 64
92}
93
94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
95  val redirect = Input(Bool())
96  val csrCtrl = Input(new CustomCSRCtrlIO)
97  val vtypeBypass = Input(new VType)
98  // When the first inst in decode vector is complex inst, pass it in
99  val in = Flipped(DecoupledIO(new Bundle {
100    val simpleDecodedInst = new DecodedInst
101    val uopInfo = new UopInfo
102  }))
103  val out = new Bundle {
104    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
105  }
106  val complexNum = Output(UInt(3.W))
107}
108
109/**
110  * @author zly
111  */
112class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
113  val io = IO(new DecodeUnitCompIO)
114
115  // alias
116  private val inReady = io.in.ready
117  private val inValid = io.in.valid
118  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
119  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
120  private val inUopInfo = io.in.bits.uopInfo
121  private val outValids = io.out.complexDecodedInsts.map(_.valid)
122  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
123  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
124  private val outComplexNum = io.complexNum
125
126  val maxUopSize = MaxUopSize
127  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
128    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
129      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
130    }.elsewhen(inInstFields.RS1 === 0.U) {
131      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
132    }
133  }
134
135  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
136  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
137  //input bits
138  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
139
140  val src1 = Cat(0.U(1.W), instFields.RS1)
141  val src2 = Cat(0.U(1.W), instFields.RS2)
142  val dest = Cat(0.U(1.W), instFields.RD)
143
144  val nf    = instFields.NF
145  val width = instFields.WIDTH(1, 0)
146
147  //output of DecodeUnit
148  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
149  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
150  val lmul = Wire(UInt(4.W))
151  val isVsetSimple = Wire(Bool())
152
153  val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i)))
154  indexedLSRegOffset.map(_.src := 0.U)
155
156  //pre decode
157  lmul := latchedUopInfo.lmul
158  isVsetSimple := latchedInst.isVset
159  val vlmulReg = latchedInst.vpu.vlmul
160  val vsewReg = latchedInst.vpu.vsew
161
162  //Type of uop Div
163  val typeOfSplit = latchedInst.uopSplitType
164  val src1Type = latchedInst.srcType(0)
165  val src1IsImm = src1Type === SrcType.imm
166  val src1IsFp = src1Type === SrcType.fp
167
168  numOfUop := latchedUopInfo.numOfUop
169  numOfWB := latchedUopInfo.numOfWB
170
171  //uops dispatch
172  val s_idle :: s_active :: Nil = Enum(2)
173  val state = RegInit(s_idle)
174  val stateNext = WireDefault(state)
175  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
176  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
177  val uopResNext = WireInit(uopRes)
178  val e64 = 3.U(2.W)
179
180  //uop div up to maxUopSize
181  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
182  csBundle.foreach { case dst =>
183    dst := latchedInst
184    dst.numUops := latchedUopInfo.numOfUop
185    dst.numWB := latchedUopInfo.numOfWB
186    dst.firstUop := false.B
187    dst.lastUop := false.B
188    dst.vlsInstr := false.B
189  }
190
191  csBundle(0).firstUop := true.B
192  csBundle(numOfUop - 1.U).lastUop := true.B
193
194  switch(typeOfSplit) {
195    is(UopSplitType.VSET) {
196      // In simple decoder, rfWen and vecWen are not set
197      when(isVsetSimple) {
198        // Default
199        // uop0 set rd, never flushPipe
200        csBundle(0).fuType := FuType.vsetiwi.U
201        csBundle(0).flushPipe := false.B
202        csBundle(0).rfWen := true.B
203        // uop1 set vl, vsetvl will flushPipe
204        csBundle(1).ldest := VCONFIG_IDX.U
205        csBundle(1).vecWen := true.B
206        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
207          csBundle(1).fuType := FuType.vsetfwf.U
208          csBundle(1).srcType(0) := SrcType.vp
209          csBundle(1).lsrc(0) := VCONFIG_IDX.U
210        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
211          // uop0: mv vtype gpr to vector region
212          csBundle(0).srcType(0) := SrcType.xp
213          csBundle(0).srcType(1) := SrcType.no
214          csBundle(0).lsrc(1) := 0.U
215          csBundle(0).ldest := FP_TMP_REG_MV.U
216          csBundle(0).fuType := FuType.i2v.U
217          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
218          csBundle(0).rfWen := false.B
219          csBundle(0).fpWen := true.B
220          csBundle(0).vecWen := false.B
221          csBundle(0).flushPipe := false.B
222          // uop1: uvsetvcfg_vv
223          csBundle(1).fuType := FuType.vsetfwf.U
224          // vl
225          csBundle(1).srcType(0) := SrcType.vp
226          csBundle(1).lsrc(0) := VCONFIG_IDX.U
227          // vtype
228          csBundle(1).srcType(1) := SrcType.fp
229          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
230          csBundle(1).vecWen := true.B
231          csBundle(1).ldest := VCONFIG_IDX.U
232        }
233        // use bypass vtype from vtypeGen
234        csBundle(0).vpu.connectVType(io.vtypeBypass)
235        csBundle(1).vpu.connectVType(io.vtypeBypass)
236      }
237    }
238    is(UopSplitType.VEC_VVV) {
239      for (i <- 0 until MAX_VLMUL) {
240        csBundle(i).lsrc(0) := src1 + i.U
241        csBundle(i).lsrc(1) := src2 + i.U
242        csBundle(i).lsrc(2) := dest + i.U
243        csBundle(i).ldest := dest + i.U
244        csBundle(i).uopIdx := i.U
245      }
246    }
247    is(UopSplitType.VEC_VFV) {
248      /*
249      i to vector move
250       */
251      csBundle(0).srcType(0) := SrcType.fp
252      csBundle(0).srcType(1) := SrcType.imm
253      csBundle(0).lsrc(1) := 0.U
254      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
255      csBundle(0).fuType := FuType.f2v.U
256      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
257      csBundle(0).vecWen := true.B
258      csBundle(0).vpu.isReverse := false.B
259      /*
260      LMUL
261       */
262      for (i <- 0 until MAX_VLMUL) {
263        csBundle(i + 1).srcType(0) := SrcType.vp
264        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
265        csBundle(i + 1).lsrc(1) := src2 + i.U
266        csBundle(i + 1).lsrc(2) := dest + i.U
267        csBundle(i + 1).ldest := dest + i.U
268        csBundle(i + 1).uopIdx := i.U
269      }
270    }
271    is(UopSplitType.VEC_EXT2) {
272      for (i <- 0 until MAX_VLMUL / 2) {
273        csBundle(2 * i).lsrc(1) := src2 + i.U
274        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
275        csBundle(2 * i).ldest := dest + (2 * i).U
276        csBundle(2 * i).uopIdx := (2 * i).U
277        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
278        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
279        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
280        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
281      }
282    }
283    is(UopSplitType.VEC_EXT4) {
284      for (i <- 0 until MAX_VLMUL / 4) {
285        csBundle(4 * i).lsrc(1) := src2 + i.U
286        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
287        csBundle(4 * i).ldest := dest + (4 * i).U
288        csBundle(4 * i).uopIdx := (4 * i).U
289        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
290        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
291        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
292        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
293        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
294        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
295        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
296        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
297        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
298        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
299        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
300        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
301      }
302    }
303    is(UopSplitType.VEC_EXT8) {
304      for (i <- 0 until MAX_VLMUL) {
305        csBundle(i).lsrc(1) := src2
306        csBundle(i).lsrc(2) := dest + i.U
307        csBundle(i).ldest := dest + i.U
308        csBundle(i).uopIdx := i.U
309      }
310    }
311    is(UopSplitType.VEC_0XV) {
312      /*
313      i/f to vector move
314       */
315      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
316      csBundle(0).srcType(1) := SrcType.imm
317      csBundle(0).lsrc(1) := 0.U
318      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
319      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
320      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
321      csBundle(0).rfWen := false.B
322      csBundle(0).fpWen := false.B
323      csBundle(0).vecWen := true.B
324      /*
325      vmv.s.x
326       */
327      csBundle(1).srcType(0) := SrcType.vp
328      csBundle(1).srcType(1) := SrcType.imm
329      csBundle(1).srcType(2) := SrcType.vp
330      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
331      csBundle(1).lsrc(1) := 0.U
332      csBundle(1).lsrc(2) := dest
333      csBundle(1).ldest := dest
334      csBundle(1).rfWen := false.B
335      csBundle(1).fpWen := false.B
336      csBundle(1).vecWen := true.B
337      csBundle(1).uopIdx := 0.U
338    }
339    is(UopSplitType.VEC_VXV) {
340      /*
341      i to vector move
342       */
343      csBundle(0).srcType(0) := SrcType.reg
344      csBundle(0).srcType(1) := SrcType.imm
345      csBundle(0).lsrc(1) := 0.U
346      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
347      csBundle(0).fuType := FuType.i2v.U
348      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
349      csBundle(0).vecWen := true.B
350      csBundle(0).vpu.isReverse := false.B
351      /*
352      LMUL
353       */
354      for (i <- 0 until MAX_VLMUL) {
355        csBundle(i + 1).srcType(0) := SrcType.vp
356        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
357        csBundle(i + 1).lsrc(1) := src2 + i.U
358        csBundle(i + 1).lsrc(2) := dest + i.U
359        csBundle(i + 1).ldest := dest + i.U
360        csBundle(i + 1).uopIdx := i.U
361      }
362    }
363    is(UopSplitType.VEC_VVW) {
364      for (i <- 0 until MAX_VLMUL / 2) {
365        csBundle(2 * i).lsrc(0) := src1 + i.U
366        csBundle(2 * i).lsrc(1) := src2 + i.U
367        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
368        csBundle(2 * i).ldest := dest + (2 * i).U
369        csBundle(2 * i).uopIdx := (2 * i).U
370        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
371        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
372        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
373        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
374        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
375      }
376    }
377    is(UopSplitType.VEC_VFW) {
378      /*
379      f to vector move
380       */
381      csBundle(0).srcType(0) := SrcType.fp
382      csBundle(0).srcType(1) := SrcType.imm
383      csBundle(0).lsrc(1) := 0.U
384      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
385      csBundle(0).fuType := FuType.f2v.U
386      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
387      csBundle(0).rfWen := false.B
388      csBundle(0).fpWen := false.B
389      csBundle(0).vecWen := true.B
390
391      for (i <- 0 until MAX_VLMUL / 2) {
392        csBundle(2 * i + 1).srcType(0) := SrcType.vp
393        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
394        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
395        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
396        csBundle(2 * i + 1).ldest := dest + (2 * i).U
397        csBundle(2 * i + 1).uopIdx := (2 * i).U
398        csBundle(2 * i + 2).srcType(0) := SrcType.vp
399        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
400        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
401        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
402        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
403        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
404      }
405    }
406    is(UopSplitType.VEC_WVW) {
407      for (i <- 0 until MAX_VLMUL / 2) {
408        csBundle(2 * i).lsrc(0) := src1 + i.U
409        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
410        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
411        csBundle(2 * i).ldest := dest + (2 * i).U
412        csBundle(2 * i).uopIdx := (2 * i).U
413        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
414        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
415        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
416        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
417        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
418      }
419    }
420    is(UopSplitType.VEC_VXW) {
421      /*
422      i to vector move
423       */
424      csBundle(0).srcType(0) := SrcType.reg
425      csBundle(0).srcType(1) := SrcType.imm
426      csBundle(0).lsrc(1) := 0.U
427      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
428      csBundle(0).fuType := FuType.i2v.U
429      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
430      csBundle(0).vecWen := true.B
431
432      for (i <- 0 until MAX_VLMUL / 2) {
433        csBundle(2 * i + 1).srcType(0) := SrcType.vp
434        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
435        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
436        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
437        csBundle(2 * i + 1).ldest := dest + (2 * i).U
438        csBundle(2 * i + 1).uopIdx := (2 * i).U
439        csBundle(2 * i + 2).srcType(0) := SrcType.vp
440        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
441        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
442        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
443        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
444        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
445      }
446    }
447    is(UopSplitType.VEC_WXW) {
448      /*
449      i to vector move
450       */
451      csBundle(0).srcType(0) := SrcType.reg
452      csBundle(0).srcType(1) := SrcType.imm
453      csBundle(0).lsrc(1) := 0.U
454      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
455      csBundle(0).fuType := FuType.i2v.U
456      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
457      csBundle(0).vecWen := true.B
458
459      for (i <- 0 until MAX_VLMUL / 2) {
460        csBundle(2 * i + 1).srcType(0) := SrcType.vp
461        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
462        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
463        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
464        csBundle(2 * i + 1).ldest := dest + (2 * i).U
465        csBundle(2 * i + 1).uopIdx := (2 * i).U
466        csBundle(2 * i + 2).srcType(0) := SrcType.vp
467        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
468        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
469        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
470        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
471        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
472      }
473    }
474    is(UopSplitType.VEC_WVV) {
475      for (i <- 0 until MAX_VLMUL / 2) {
476
477        csBundle(2 * i).lsrc(0) := src1 + i.U
478        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
479        csBundle(2 * i).lsrc(2) := dest + i.U
480        csBundle(2 * i).ldest := dest + i.U
481        csBundle(2 * i).uopIdx := (2 * i).U
482        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
483        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
484        csBundle(2 * i + 1).lsrc(2) := dest + i.U
485        csBundle(2 * i + 1).ldest := dest + i.U
486        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
487      }
488    }
489    is(UopSplitType.VEC_WFW) {
490      /*
491      f to vector move
492       */
493      csBundle(0).srcType(0) := SrcType.fp
494      csBundle(0).srcType(1) := SrcType.imm
495      csBundle(0).lsrc(1) := 0.U
496      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
497      csBundle(0).fuType := FuType.f2v.U
498      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
499      csBundle(0).rfWen := false.B
500      csBundle(0).fpWen := false.B
501      csBundle(0).vecWen := true.B
502
503      for (i <- 0 until MAX_VLMUL / 2) {
504        csBundle(2 * i + 1).srcType(0) := SrcType.vp
505        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
506        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
507        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
508        csBundle(2 * i + 1).ldest := dest + (2 * i).U
509        csBundle(2 * i + 1).uopIdx := (2 * i).U
510        csBundle(2 * i + 2).srcType(0) := SrcType.vp
511        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
512        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
513        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
514        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
515        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
516      }
517    }
518    is(UopSplitType.VEC_WXV) {
519      /*
520      i to vector move
521       */
522      csBundle(0).srcType(0) := SrcType.reg
523      csBundle(0).srcType(1) := SrcType.imm
524      csBundle(0).lsrc(1) := 0.U
525      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
526      csBundle(0).fuType := FuType.i2v.U
527      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
528      csBundle(0).vecWen := true.B
529
530      for (i <- 0 until MAX_VLMUL / 2) {
531        csBundle(2 * i + 1).srcType(0) := SrcType.vp
532        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
533        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
534        csBundle(2 * i + 1).lsrc(2) := dest + i.U
535        csBundle(2 * i + 1).ldest := dest + i.U
536        csBundle(2 * i + 1).uopIdx := (2 * i).U
537        csBundle(2 * i + 2).srcType(0) := SrcType.vp
538        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
539        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
540        csBundle(2 * i + 2).lsrc(2) := dest + i.U
541        csBundle(2 * i + 2).ldest := dest + i.U
542        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
543      }
544    }
545    is(UopSplitType.VEC_VVM) {
546      csBundle(0).lsrc(2) := dest
547      csBundle(0).ldest := dest
548      csBundle(0).uopIdx := 0.U
549      for (i <- 1 until MAX_VLMUL) {
550        csBundle(i).lsrc(0) := src1 + i.U
551        csBundle(i).lsrc(1) := src2 + i.U
552        csBundle(i).lsrc(2) := dest
553        csBundle(i).ldest := dest
554        csBundle(i).uopIdx := i.U
555      }
556    }
557    is(UopSplitType.VEC_VFM) {
558      /*
559      f to vector move
560       */
561      csBundle(0).srcType(0) := SrcType.fp
562      csBundle(0).srcType(1) := SrcType.imm
563      csBundle(0).lsrc(1) := 0.U
564      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
565      csBundle(0).fuType := FuType.f2v.U
566      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
567      csBundle(0).rfWen := false.B
568      csBundle(0).fpWen := false.B
569      csBundle(0).vecWen := true.B
570      //LMUL
571      csBundle(1).srcType(0) := SrcType.vp
572      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
573      csBundle(1).lsrc(2) := dest
574      csBundle(1).ldest := dest
575      csBundle(1).uopIdx := 0.U
576      for (i <- 1 until MAX_VLMUL) {
577        csBundle(i + 1).srcType(0) := SrcType.vp
578        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
579        csBundle(i + 1).lsrc(1) := src2 + i.U
580        csBundle(i + 1).lsrc(2) := dest
581        csBundle(i + 1).ldest := dest
582        csBundle(i + 1).uopIdx := i.U
583      }
584      csBundle(numOfUop - 1.U).ldest := dest
585    }
586    is(UopSplitType.VEC_VXM) {
587      /*
588      i to vector move
589       */
590      csBundle(0).srcType(0) := SrcType.reg
591      csBundle(0).srcType(1) := SrcType.imm
592      csBundle(0).lsrc(1) := 0.U
593      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
594      csBundle(0).fuType := FuType.i2v.U
595      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
596      csBundle(0).vecWen := true.B
597      //LMUL
598      csBundle(1).srcType(0) := SrcType.vp
599      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
600      csBundle(1).lsrc(2) := dest
601      csBundle(1).ldest := dest
602      csBundle(1).uopIdx := 0.U
603      for (i <- 1 until MAX_VLMUL) {
604        csBundle(i + 1).srcType(0) := SrcType.vp
605        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
606        csBundle(i + 1).lsrc(1) := src2 + i.U
607        csBundle(i + 1).lsrc(2) := dest
608        csBundle(i + 1).ldest := dest
609        csBundle(i + 1).uopIdx := i.U
610      }
611      csBundle(numOfUop - 1.U).ldest := dest
612    }
613    is(UopSplitType.VEC_SLIDE1UP) {
614      /*
615      i to vector move
616       */
617      csBundle(0).srcType(0) := SrcType.reg
618      csBundle(0).srcType(1) := SrcType.imm
619      csBundle(0).lsrc(1) := 0.U
620      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
621      csBundle(0).fuType := FuType.i2v.U
622      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
623      csBundle(0).vecWen := true.B
624      //LMUL
625      csBundle(1).srcType(0) := SrcType.vp
626      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
627      csBundle(1).lsrc(2) := dest
628      csBundle(1).ldest := dest
629      csBundle(1).uopIdx := 0.U
630      for (i <- 1 until MAX_VLMUL) {
631        csBundle(i + 1).srcType(0) := SrcType.vp
632        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
633        csBundle(i + 1).lsrc(1) := src2 + i.U
634        csBundle(i + 1).lsrc(2) := dest + i.U
635        csBundle(i + 1).ldest := dest + i.U
636        csBundle(i + 1).uopIdx := i.U
637      }
638    }
639    is(UopSplitType.VEC_FSLIDE1UP) {
640      /*
641      i to vector move
642       */
643      csBundle(0).srcType(0) := SrcType.fp
644      csBundle(0).srcType(1) := SrcType.imm
645      csBundle(0).lsrc(1) := 0.U
646      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
647      csBundle(0).fuType := FuType.f2v.U
648      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
649      csBundle(0).rfWen := false.B
650      csBundle(0).fpWen := false.B
651      csBundle(0).vecWen := true.B
652      //LMUL
653      csBundle(1).srcType(0) := SrcType.vp
654      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
655      csBundle(1).lsrc(1) := src2
656      csBundle(1).lsrc(2) := dest
657      csBundle(1).ldest := dest
658      csBundle(1).uopIdx := 0.U
659      for (i <- 1 until MAX_VLMUL) {
660        csBundle(i + 1).srcType(0) := SrcType.vp
661        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
662        csBundle(i + 1).lsrc(1) := src2 + i.U
663        csBundle(i + 1).lsrc(2) := dest + i.U
664        csBundle(i + 1).ldest := dest + i.U
665        csBundle(i + 1).uopIdx := i.U
666      }
667    }
668    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
669      /*
670      i to vector move
671       */
672      csBundle(0).srcType(0) := SrcType.reg
673      csBundle(0).srcType(1) := SrcType.imm
674      csBundle(0).lsrc(1) := 0.U
675      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
676      csBundle(0).fuType := FuType.i2v.U
677      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
678      csBundle(0).vecWen := true.B
679      //LMUL
680      for (i <- 0 until MAX_VLMUL) {
681        csBundle(2 * i + 1).srcType(0) := SrcType.vp
682        csBundle(2 * i + 1).srcType(1) := SrcType.vp
683        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
684        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
685        csBundle(2 * i + 1).lsrc(2) := dest + i.U
686        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
687        csBundle(2 * i + 1).uopIdx := (2 * i).U
688        if (2 * i + 2 < MAX_VLMUL * 2) {
689          csBundle(2 * i + 2).srcType(0) := SrcType.vp
690          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
691          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
692          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
693          csBundle(2 * i + 2).ldest := dest + i.U
694          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
695        }
696      }
697      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
698      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
699      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
700    }
701    is(UopSplitType.VEC_FSLIDE1DOWN) {
702      /*
703      i to vector move
704       */
705      csBundle(0).srcType(0) := SrcType.fp
706      csBundle(0).srcType(1) := SrcType.imm
707      csBundle(0).lsrc(1) := 0.U
708      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
709      csBundle(0).fuType := FuType.f2v.U
710      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
711      csBundle(0).rfWen := false.B
712      csBundle(0).fpWen := false.B
713      csBundle(0).vecWen := true.B
714      //LMUL
715      for (i <- 0 until MAX_VLMUL) {
716        csBundle(2 * i + 1).srcType(0) := SrcType.vp
717        csBundle(2 * i + 1).srcType(1) := SrcType.vp
718        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
719        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
720        csBundle(2 * i + 1).lsrc(2) := dest + i.U
721        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
722        csBundle(2 * i + 1).uopIdx := (2 * i).U
723        if (2 * i + 2 < MAX_VLMUL * 2) {
724          csBundle(2 * i + 2).srcType(0) := SrcType.vp
725          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
726          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
727          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
728          csBundle(2 * i + 2).ldest := dest + i.U
729          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
730        }
731      }
732      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
733      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
734      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
735    }
736    is(UopSplitType.VEC_VRED) {
737      when(vlmulReg === "b001".U) {
738        csBundle(0).srcType(2) := SrcType.DC
739        csBundle(0).lsrc(0) := src2 + 1.U
740        csBundle(0).lsrc(1) := src2
741        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
742        csBundle(0).uopIdx := 0.U
743      }
744      when(vlmulReg === "b010".U) {
745        csBundle(0).srcType(2) := SrcType.DC
746        csBundle(0).lsrc(0) := src2 + 1.U
747        csBundle(0).lsrc(1) := src2
748        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
749        csBundle(0).uopIdx := 0.U
750
751        csBundle(1).srcType(2) := SrcType.DC
752        csBundle(1).lsrc(0) := src2 + 3.U
753        csBundle(1).lsrc(1) := src2 + 2.U
754        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
755        csBundle(1).uopIdx := 1.U
756
757        csBundle(2).srcType(2) := SrcType.DC
758        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
759        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
760        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
761        csBundle(2).uopIdx := 2.U
762      }
763      when(vlmulReg === "b011".U) {
764        for (i <- 0 until MAX_VLMUL) {
765          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
766            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
767            csBundle(i).lsrc(1) := src2 + (i * 2).U
768            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
769          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
770            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
771            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
772            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
773          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
774            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
775            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
776            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
777          }
778          csBundle(i).srcType(2) := SrcType.DC
779          csBundle(i).uopIdx := i.U
780        }
781      }
782      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
783        /*
784         * 2 <= vlmul <= 8
785         */
786        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
787        csBundle(numOfUop - 1.U).lsrc(0) := src1
788        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
789        csBundle(numOfUop - 1.U).lsrc(2) := dest
790        csBundle(numOfUop - 1.U).ldest := dest
791        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
792      }
793    }
794    is(UopSplitType.VEC_VFRED) {
795      val vlmul = vlmulReg
796      val vsew = vsewReg
797      when(vlmul === VLmul.m8){
798        for (i <- 0 until 4) {
799          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
800          csBundle(i).lsrc(1) := src2 + (i * 2).U
801          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
802          csBundle(i).uopIdx := i.U
803        }
804        for (i <- 4 until 6) {
805          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
806          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
807          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
808          csBundle(i).uopIdx := i.U
809        }
810        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
811        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
812        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
813        csBundle(6).uopIdx := 6.U
814        when(vsew === VSew.e64) {
815          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
816          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
817          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
818          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
819          csBundle(7).uopIdx := 7.U
820          csBundle(8).lsrc(0) := src1
821          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
822          csBundle(8).ldest := dest
823          csBundle(8).uopIdx := 8.U
824        }
825        when(vsew === VSew.e32) {
826          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
827          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
828          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
829          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
830          csBundle(7).uopIdx := 7.U
831          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
832          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
833          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
834          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
835          csBundle(8).uopIdx := 8.U
836          csBundle(9).lsrc(0) := src1
837          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
838          csBundle(9).ldest := dest
839          csBundle(9).uopIdx := 9.U
840        }
841        when(vsew === VSew.e16) {
842          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
843          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
844          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
845          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
846          csBundle(7).uopIdx := 7.U
847          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
848          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
849          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
850          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
851          csBundle(8).uopIdx := 8.U
852          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
853          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
854          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
855          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
856          csBundle(9).uopIdx := 9.U
857          csBundle(10).lsrc(0) := src1
858          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
859          csBundle(10).ldest := dest
860          csBundle(10).uopIdx := 10.U
861        }
862      }
863      when(vlmul === VLmul.m4) {
864        for (i <- 0 until 2) {
865          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
866          csBundle(i).lsrc(1) := src2 + (i * 2).U
867          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
868          csBundle(i).uopIdx := i.U
869        }
870        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
871        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
872        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
873        csBundle(2).uopIdx := 2.U
874        when(vsew === VSew.e64) {
875          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
876          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
877          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
878          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
879          csBundle(3).uopIdx := 3.U
880          csBundle(4).lsrc(0) := src1
881          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
882          csBundle(4).ldest := dest
883          csBundle(4).uopIdx := 4.U
884        }
885        when(vsew === VSew.e32) {
886          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
887          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
888          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
889          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
890          csBundle(3).uopIdx := 3.U
891          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
892          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
893          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
894          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
895          csBundle(4).uopIdx := 4.U
896          csBundle(5).lsrc(0) := src1
897          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
898          csBundle(5).ldest := dest
899          csBundle(5).uopIdx := 5.U
900        }
901        when(vsew === VSew.e16) {
902          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
903          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
904          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
905          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
906          csBundle(3).uopIdx := 3.U
907          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
908          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
909          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
910          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
911          csBundle(4).uopIdx := 4.U
912          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
913          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
914          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
915          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
916          csBundle(5).uopIdx := 5.U
917          csBundle(6).lsrc(0) := src1
918          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
919          csBundle(6).ldest := dest
920          csBundle(6).uopIdx := 6.U
921        }
922      }
923      when(vlmul === VLmul.m2) {
924        csBundle(0).lsrc(0) := src2 + 1.U
925        csBundle(0).lsrc(1) := src2 + 0.U
926        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
927        csBundle(0).uopIdx := 0.U
928        when(vsew === VSew.e64) {
929          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
930          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
931          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
932          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
933          csBundle(1).uopIdx := 1.U
934          csBundle(2).lsrc(0) := src1
935          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
936          csBundle(2).ldest := dest
937          csBundle(2).uopIdx := 2.U
938        }
939        when(vsew === VSew.e32) {
940          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
941          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
942          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
943          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
944          csBundle(1).uopIdx := 1.U
945          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
946          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
947          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
948          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
949          csBundle(2).uopIdx := 2.U
950          csBundle(3).lsrc(0) := src1
951          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
952          csBundle(3).ldest := dest
953          csBundle(3).uopIdx := 3.U
954        }
955        when(vsew === VSew.e16) {
956          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
957          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
958          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
959          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
960          csBundle(1).uopIdx := 1.U
961          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
962          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
963          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
964          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
965          csBundle(2).uopIdx := 2.U
966          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
967          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
968          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
969          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
970          csBundle(3).uopIdx := 3.U
971          csBundle(4).lsrc(0) := src1
972          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
973          csBundle(4).ldest := dest
974          csBundle(4).uopIdx := 4.U
975        }
976      }
977      when(vlmul === VLmul.m1) {
978        when(vsew === VSew.e64) {
979          csBundle(0).lsrc(0) := src2
980          csBundle(0).lsrc(1) := src2
981          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
982          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
983          csBundle(0).uopIdx := 0.U
984          csBundle(1).lsrc(0) := src1
985          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
986          csBundle(1).ldest := dest
987          csBundle(1).uopIdx := 1.U
988        }
989        when(vsew === VSew.e32) {
990          csBundle(0).lsrc(0) := src2
991          csBundle(0).lsrc(1) := src2
992          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
993          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
994          csBundle(0).uopIdx := 0.U
995          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
996          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
997          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
998          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
999          csBundle(1).uopIdx := 1.U
1000          csBundle(2).lsrc(0) := src1
1001          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1002          csBundle(2).ldest := dest
1003          csBundle(2).uopIdx := 2.U
1004        }
1005        when(vsew === VSew.e16) {
1006          csBundle(0).lsrc(0) := src2
1007          csBundle(0).lsrc(1) := src2
1008          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1009          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1010          csBundle(0).uopIdx := 0.U
1011          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1012          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1013          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1014          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1015          csBundle(1).uopIdx := 1.U
1016          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1017          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1018          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1019          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1020          csBundle(2).uopIdx := 2.U
1021          csBundle(3).lsrc(0) := src1
1022          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1023          csBundle(3).ldest := dest
1024          csBundle(3).uopIdx := 3.U
1025        }
1026      }
1027      when(vlmul === VLmul.mf2) {
1028        when(vsew === VSew.e32) {
1029          csBundle(0).lsrc(0) := src2
1030          csBundle(0).lsrc(1) := src2
1031          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1032          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1033          csBundle(0).uopIdx := 0.U
1034          csBundle(1).lsrc(0) := src1
1035          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1036          csBundle(1).ldest := dest
1037          csBundle(1).uopIdx := 1.U
1038        }
1039        when(vsew === VSew.e16) {
1040          csBundle(0).lsrc(0) := src2
1041          csBundle(0).lsrc(1) := src2
1042          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1043          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1044          csBundle(0).uopIdx := 0.U
1045          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1046          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1047          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1048          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1049          csBundle(1).uopIdx := 1.U
1050          csBundle(2).lsrc(0) := src1
1051          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1052          csBundle(2).ldest := dest
1053          csBundle(2).uopIdx := 2.U
1054        }
1055      }
1056      when(vlmul === VLmul.mf4) {
1057        when(vsew === VSew.e16) {
1058          csBundle(0).lsrc(0) := src2
1059          csBundle(0).lsrc(1) := src2
1060          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1061          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1062          csBundle(0).uopIdx := 0.U
1063          csBundle(1).lsrc(0) := src1
1064          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1065          csBundle(1).ldest := dest
1066          csBundle(1).uopIdx := 1.U
1067        }
1068      }
1069    }
1070
1071    is(UopSplitType.VEC_VFREDOSUM) {
1072      import yunsuan.VfaluType
1073      val vlmul = vlmulReg
1074      val vsew = vsewReg
1075      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1076      when(vlmul === VLmul.m8) {
1077        when(vsew === VSew.e64) {
1078          val vlmax = 16
1079          for (i <- 0 until vlmax) {
1080            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1081            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1082            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1083            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1084            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1085            csBundle(i).uopIdx := i.U
1086          }
1087        }
1088        when(vsew === VSew.e32) {
1089          val vlmax = 32
1090          for (i <- 0 until vlmax) {
1091            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1092            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1093            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1094            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1095            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1096            csBundle(i).uopIdx := i.U
1097          }
1098        }
1099        when(vsew === VSew.e16) {
1100          val vlmax = 64
1101          for (i <- 0 until vlmax) {
1102            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1103            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1104            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1105            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1106            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1107            csBundle(i).uopIdx := i.U
1108          }
1109        }
1110      }
1111      when(vlmul === VLmul.m4) {
1112        when(vsew === VSew.e64) {
1113          val vlmax = 8
1114          for (i <- 0 until vlmax) {
1115            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1116            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1117            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1118            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1119            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1120            csBundle(i).uopIdx := i.U
1121          }
1122        }
1123        when(vsew === VSew.e32) {
1124          val vlmax = 16
1125          for (i <- 0 until vlmax) {
1126            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1127            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1128            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1129            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1130            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1131            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1132            csBundle(i).uopIdx := i.U
1133          }
1134        }
1135        when(vsew === VSew.e16) {
1136          val vlmax = 32
1137          for (i <- 0 until vlmax) {
1138            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1139            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1140            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1141            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1142            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1143            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1144            csBundle(i).uopIdx := i.U
1145          }
1146        }
1147      }
1148      when(vlmul === VLmul.m2) {
1149        when(vsew === VSew.e64) {
1150          val vlmax = 4
1151          for (i <- 0 until vlmax) {
1152            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1153            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1154            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1155            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1156            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1157            csBundle(i).uopIdx := i.U
1158          }
1159        }
1160        when(vsew === VSew.e32) {
1161          val vlmax = 8
1162          for (i <- 0 until vlmax) {
1163            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1164            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1165            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1166            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1167            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1168            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1169            csBundle(i).uopIdx := i.U
1170          }
1171        }
1172        when(vsew === VSew.e16) {
1173          val vlmax = 16
1174          for (i <- 0 until vlmax) {
1175            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1176            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1177            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1178            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1179            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1180            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1181            csBundle(i).uopIdx := i.U
1182          }
1183        }
1184      }
1185      when(vlmul === VLmul.m1) {
1186        when(vsew === VSew.e64) {
1187          val vlmax = 2
1188          for (i <- 0 until vlmax) {
1189            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1190            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1191            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1192            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1193            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1194            csBundle(i).uopIdx := i.U
1195          }
1196        }
1197        when(vsew === VSew.e32) {
1198          val vlmax = 4
1199          for (i <- 0 until vlmax) {
1200            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1201            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1202            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1203            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1204            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1205            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1206            csBundle(i).uopIdx := i.U
1207          }
1208        }
1209        when(vsew === VSew.e16) {
1210          val vlmax = 8
1211          for (i <- 0 until vlmax) {
1212            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1213            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1214            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1215            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1216            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1217            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1218            csBundle(i).uopIdx := i.U
1219          }
1220        }
1221      }
1222      when(vlmul === VLmul.mf2) {
1223        when(vsew === VSew.e32) {
1224          val vlmax = 2
1225          for (i <- 0 until vlmax) {
1226            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1227            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1228            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1229            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1230            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1231            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1232            csBundle(i).uopIdx := i.U
1233          }
1234        }
1235        when(vsew === VSew.e16) {
1236          val vlmax = 4
1237          for (i <- 0 until vlmax) {
1238            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1239            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1240            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1241            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1242            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1243            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1244            csBundle(i).uopIdx := i.U
1245          }
1246        }
1247      }
1248      when(vlmul === VLmul.mf4) {
1249        when(vsew === VSew.e16) {
1250          val vlmax = 2
1251          for (i <- 0 until vlmax) {
1252            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1253            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1254            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1255            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1256            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1257            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1258            csBundle(i).uopIdx := i.U
1259          }
1260        }
1261      }
1262    }
1263
1264    is(UopSplitType.VEC_SLIDEUP) {
1265      // i to vector move
1266      csBundle(0).srcType(0) := SrcType.reg
1267      csBundle(0).srcType(1) := SrcType.imm
1268      csBundle(0).lsrc(1) := 0.U
1269      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1270      csBundle(0).fuType := FuType.i2v.U
1271      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1272      csBundle(0).vecWen := true.B
1273      // LMUL
1274      for (i <- 0 until MAX_VLMUL)
1275        for (j <- 0 to i) {
1276          val old_vd = if (j == 0) {
1277            dest + i.U
1278          } else (VECTOR_TMP_REG_LMUL + j).U
1279          val vd = if (j == i) {
1280            dest + i.U
1281          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1282          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1283          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1284          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1285          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1286          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1287          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1288        }
1289    }
1290
1291    is(UopSplitType.VEC_SLIDEDOWN) {
1292      // i to vector move
1293      csBundle(0).srcType(0) := SrcType.reg
1294      csBundle(0).srcType(1) := SrcType.imm
1295      csBundle(0).lsrc(1) := 0.U
1296      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1297      csBundle(0).fuType := FuType.i2v.U
1298      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1299      csBundle(0).vecWen := true.B
1300      // LMUL
1301      for (i <- 0 until MAX_VLMUL)
1302        for (j <- (0 to i).reverse) {
1303          when(i.U < lmul) {
1304            val old_vd = if (j == 0) {
1305              dest + lmul - 1.U - i.U
1306            } else (VECTOR_TMP_REG_LMUL + j).U
1307            val vd = if (j == i) {
1308              dest + lmul - 1.U - i.U
1309            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1310            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1311            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1312            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1313            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1314            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1315            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1316          }
1317        }
1318    }
1319
1320    is(UopSplitType.VEC_M0X) {
1321      // LMUL
1322      for (i <- 0 until MAX_VLMUL) {
1323        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1324        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1325        csBundle(i).srcType(0) := srcType0
1326        csBundle(i).srcType(1) := SrcType.vp
1327        csBundle(i).rfWen := false.B
1328        csBundle(i).fpWen := false.B
1329        csBundle(i).vecWen := true.B
1330        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1331        csBundle(i).lsrc(1) := src2
1332        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1333        csBundle(i).ldest := ldest
1334        csBundle(i).uopIdx := i.U
1335      }
1336      csBundle(lmul - 1.U).rfWen := true.B
1337      csBundle(lmul - 1.U).fpWen := false.B
1338      csBundle(lmul - 1.U).vecWen := false.B
1339      csBundle(lmul - 1.U).ldest := dest
1340    }
1341
1342    is(UopSplitType.VEC_MVV) {
1343      // LMUL
1344      for (i <- 0 until MAX_VLMUL) {
1345        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1346        csBundle(i * 2 + 0).srcType(0) := srcType0
1347        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1348        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1349        csBundle(i * 2 + 0).lsrc(1) := src2
1350        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1351        csBundle(i * 2 + 0).ldest := dest + i.U
1352        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1353
1354        csBundle(i * 2 + 1).srcType(0) := srcType0
1355        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1356        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1357        csBundle(i * 2 + 1).lsrc(1) := src2
1358        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1359        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1360        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1361      }
1362    }
1363
1364    is(UopSplitType.VEC_M0X_VFIRST) {
1365      // LMUL
1366      csBundle(0).rfWen := true.B
1367      csBundle(0).fpWen := false.B
1368      csBundle(0).vecWen := false.B
1369      csBundle(0).ldest := dest
1370    }
1371    is(UopSplitType.VEC_VWW) {
1372      for (i <- 0 until MAX_VLMUL*2) {
1373        when(i.U < lmul){
1374          csBundle(i).srcType(2) := SrcType.DC
1375          csBundle(i).lsrc(0) := src2 + i.U
1376          csBundle(i).lsrc(1) := src2 + i.U
1377          // csBundle(i).lsrc(2) := dest + (2 * i).U
1378          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1379          csBundle(i).uopIdx :=  i.U
1380        } otherwise {
1381          csBundle(i).srcType(2) := SrcType.DC
1382          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1383          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1384          // csBundle(i).lsrc(2) := dest + (2 * i).U
1385          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1386          csBundle(i).uopIdx := i.U
1387        }
1388        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1389        csBundle(numOfUop-1.U).lsrc(0) := src1
1390        csBundle(numOfUop-1.U).lsrc(2) := dest
1391        csBundle(numOfUop-1.U).ldest := dest
1392      }
1393    }
1394    is(UopSplitType.VEC_RGATHER) {
1395      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1396        for (i <- 0 until len)
1397          for (j <- 0 until len) {
1398            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1399            // csBundle(i * len + j).srcType(1) := SrcType.vp
1400            // csBundle(i * len + j).srcType(2) := SrcType.vp
1401            csBundle(i * len + j).lsrc(0) := src1 + i.U
1402            csBundle(i * len + j).lsrc(1) := src2 + j.U
1403            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1404            csBundle(i * len + j).lsrc(2) := vd_old
1405            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1406            csBundle(i * len + j).ldest := vd
1407            csBundle(i * len + j).uopIdx := (i * len + j).U
1408          }
1409      }
1410      switch(vlmulReg) {
1411        is("b001".U ){
1412          genCsBundle_VEC_RGATHER(2)
1413        }
1414        is("b010".U ){
1415          genCsBundle_VEC_RGATHER(4)
1416        }
1417        is("b011".U ){
1418          genCsBundle_VEC_RGATHER(8)
1419        }
1420      }
1421    }
1422    is(UopSplitType.VEC_RGATHER_VX) {
1423      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1424        for (i <- 0 until len)
1425          for (j <- 0 until len) {
1426            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1427            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1428            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1429            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1430            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1431            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1432            csBundle(i * len + j + 1).lsrc(2) := vd_old
1433            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1434            csBundle(i * len + j + 1).ldest := vd
1435            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1436          }
1437      }
1438      // i to vector move
1439      csBundle(0).srcType(0) := SrcType.reg
1440      csBundle(0).srcType(1) := SrcType.imm
1441      csBundle(0).lsrc(1) := 0.U
1442      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1443      csBundle(0).fuType := FuType.i2v.U
1444      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1445      csBundle(0).rfWen := false.B
1446      csBundle(0).fpWen := false.B
1447      csBundle(0).vecWen := true.B
1448      genCsBundle_RGATHER_VX(1)
1449      switch(vlmulReg) {
1450        is("b001".U ){
1451          genCsBundle_RGATHER_VX(2)
1452        }
1453        is("b010".U ){
1454          genCsBundle_RGATHER_VX(4)
1455        }
1456        is("b011".U ){
1457          genCsBundle_RGATHER_VX(8)
1458        }
1459      }
1460    }
1461    is(UopSplitType.VEC_RGATHEREI16) {
1462      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1463        for (i <- 0 until len)
1464          for (j <- 0 until len) {
1465            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1466            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1467            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1468            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1469            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1470            csBundle((i * len + j)*2+0).ldest := vd0
1471            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1472            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1473            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1474            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1475            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1476            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1477            csBundle((i * len + j)*2+1).ldest := vd1
1478            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1479          }
1480      }
1481      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1482        for (i <- 0 until len)
1483          for (j <- 0 until len) {
1484            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1485            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1486            csBundle(i * len + j).lsrc(0) := src1 + i.U
1487            csBundle(i * len + j).lsrc(1) := src2 + j.U
1488            csBundle(i * len + j).lsrc(2) := vd_old
1489            csBundle(i * len + j).ldest := vd
1490            csBundle(i * len + j).uopIdx := (i * len + j).U
1491          }
1492      }
1493      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1494        for (i <- 0 until len)
1495          for (j <- 0 until len) {
1496            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1497            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1498            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1499            csBundle(i * len + j).lsrc(1) := src2 + j.U
1500            csBundle(i * len + j).lsrc(2) := vd_old
1501            csBundle(i * len + j).ldest := vd
1502            csBundle(i * len + j).uopIdx := (i * len + j).U
1503          }
1504      }
1505      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1506        for (i <- 0 until len)
1507          for (j <- 0 until len) {
1508            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1509            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1510            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1511            csBundle(i * len + j).lsrc(1) := src2 + j.U
1512            csBundle(i * len + j).lsrc(2) := vd_old
1513            csBundle(i * len + j).ldest := vd
1514            csBundle(i * len + j).uopIdx := (i * len + j).U
1515          }
1516      }
1517      when(!vsewReg.orR){
1518        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1519      }.elsewhen(vsewReg === VSew.e32){
1520        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1521      }.elsewhen(vsewReg === VSew.e64){
1522        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1523      }.otherwise{
1524        genCsBundle_VEC_RGATHEREI16(1)
1525      }
1526      switch(vlmulReg) {
1527        is("b001".U) {
1528          when(!vsewReg.orR) {
1529            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1530          }.elsewhen(vsewReg === VSew.e32){
1531            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1532          }.elsewhen(vsewReg === VSew.e64){
1533            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1534          }.otherwise{
1535            genCsBundle_VEC_RGATHEREI16(2)
1536          }
1537        }
1538        is("b010".U) {
1539          when(!vsewReg.orR) {
1540            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1541          }.elsewhen(vsewReg === VSew.e32){
1542            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1543          }.elsewhen(vsewReg === VSew.e64){
1544            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1545          }.otherwise{
1546            genCsBundle_VEC_RGATHEREI16(4)
1547          }
1548        }
1549        is("b011".U) {
1550          when(vsewReg === VSew.e32){
1551            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1552          }.elsewhen(vsewReg === VSew.e64){
1553            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1554          }.otherwise{
1555            genCsBundle_VEC_RGATHEREI16(8)
1556          }
1557        }
1558      }
1559    }
1560    is(UopSplitType.VEC_COMPRESS) {
1561      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1562        for (i <- 0 until len) {
1563          val jlen = if (i == len-1) i+1 else i+2
1564          for (j <- 0 until jlen) {
1565            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1566            val vd = if(i==len-1) (dest + j.U) else {
1567              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1568            }
1569            val src13Type = if (j == i+1) DontCare else SrcType.vp
1570            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1571            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1572            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1573            csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp
1574            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1575            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1576            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1577            csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1578            csBundle(i*(i+3)/2 + j).ldest := vd
1579            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1580          }
1581        }
1582      }
1583      switch(vlmulReg) {
1584        is("b001".U ){
1585          genCsBundle_VEC_COMPRESS(2)
1586        }
1587        is("b010".U ){
1588          genCsBundle_VEC_COMPRESS(4)
1589        }
1590        is("b011".U ){
1591          genCsBundle_VEC_COMPRESS(8)
1592        }
1593      }
1594    }
1595    is(UopSplitType.VEC_MVNR) {
1596      for (i <- 0 until MAX_VLMUL) {
1597        csBundle(i).lsrc(0) := src1 + i.U
1598        csBundle(i).lsrc(1) := src2 + i.U
1599        csBundle(i).lsrc(2) := dest + i.U
1600        csBundle(i).ldest := dest + i.U
1601        csBundle(i).uopIdx := i.U
1602      }
1603    }
1604    is(UopSplitType.VEC_US_LDST) {
1605      /*
1606      FMV.D.X
1607       */
1608      csBundle(0).srcType(0) := SrcType.reg
1609      csBundle(0).srcType(1) := SrcType.imm
1610      csBundle(0).lsrc(1) := 0.U
1611      csBundle(0).ldest := FP_TMP_REG_MV.U
1612      csBundle(0).fuType := FuType.i2v.U
1613      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1614      csBundle(0).rfWen := false.B
1615      csBundle(0).fpWen := true.B
1616      csBundle(0).vecWen := false.B
1617      csBundle(0).vlsInstr := true.B
1618      //LMUL
1619      for (i <- 0 until MAX_VLMUL) {
1620        csBundle(i + 1).srcType(0) := SrcType.fp
1621        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1622        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1623        csBundle(i + 1).ldest := dest + i.U
1624        csBundle(i + 1).uopIdx := i.U
1625        csBundle(i + 1).vlsInstr := true.B
1626      }
1627    }
1628    is(UopSplitType.VEC_S_LDST) {
1629      /*
1630      FMV.D.X
1631       */
1632      csBundle(0).srcType(0) := SrcType.reg
1633      csBundle(0).srcType(1) := SrcType.imm
1634      csBundle(0).lsrc(1) := 0.U
1635      csBundle(0).ldest := FP_TMP_REG_MV.U
1636      csBundle(0).fuType := FuType.i2v.U
1637      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1638      csBundle(0).rfWen := false.B
1639      csBundle(0).fpWen := true.B
1640      csBundle(0).vecWen := false.B
1641      csBundle(0).vlsInstr := true.B
1642
1643      csBundle(1).srcType(0) := SrcType.reg
1644      csBundle(1).srcType(1) := SrcType.imm
1645      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1646      csBundle(1).lsrc(1) := 0.U
1647      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1648      csBundle(1).fuType := FuType.i2v.U
1649      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1650      csBundle(1).rfWen := false.B
1651      csBundle(1).fpWen := true.B
1652      csBundle(1).vecWen := false.B
1653      csBundle(1).vlsInstr := true.B
1654
1655      //LMUL
1656      for (i <- 0 until MAX_VLMUL) {
1657        csBundle(i + 2).srcType(0) := SrcType.fp
1658        csBundle(i + 2).srcType(1) := SrcType.fp
1659        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1660        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1661        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1662        csBundle(i + 2).ldest := dest + i.U
1663        csBundle(i + 2).uopIdx := i.U
1664        csBundle(i + 2).vlsInstr := true.B
1665      }
1666    }
1667    is(UopSplitType.VEC_I_LDST) {
1668    /*
1669      FMV.D.X
1670       */
1671      val vlmul = vlmulReg
1672      val vsew = Cat(0.U(1.W), vsewReg)
1673      val veew = Cat(0.U(1.W), width)
1674      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1675      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1676        "b001".U -> 1.U,
1677        "b010".U -> 2.U,
1678        "b011".U -> 3.U
1679      ))
1680      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1681        "b001".U -> 1.U,
1682        "b010".U -> 2.U,
1683        "b011".U -> 3.U
1684      ))
1685      csBundle(0).srcType(0) := SrcType.reg
1686      csBundle(0).srcType(1) := SrcType.imm
1687      csBundle(0).lsrc(1) := 0.U
1688      csBundle(0).ldest := FP_TMP_REG_MV.U
1689      csBundle(0).fuType := FuType.i2v.U
1690      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1691      csBundle(0).rfWen := false.B
1692      csBundle(0).fpWen := true.B
1693      csBundle(0).vecWen := false.B
1694      csBundle(0).vlsInstr := true.B
1695
1696      //LMUL
1697      for (i <- 0 until MAX_INDEXED_LS_UOPNUM) {
1698        indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf)
1699        val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1700        val offsetVd = indexedLSRegOffset(i).outOffsetVd
1701        val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd
1702        csBundle(i + 1).srcType(0) := SrcType.fp
1703        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1704        csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1705        /**
1706          * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and
1707          * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same
1708          * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be
1709          * deadlock for indexed instructions with emul > lmul.
1710          *
1711          * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest
1712          * N-1 uops will read temporary vector register.
1713          */
1714        // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1715        csBundle(i + 1).lsrc(2) := Mux(
1716          isFirstUopInVd,
1717          Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)),
1718          VECTOR_TMP_REG_LMUL.U
1719        )
1720        csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1721        csBundle(i + 1).uopIdx := i.U
1722        csBundle(i + 1).vlsInstr := true.B
1723      }
1724    }
1725  }
1726
1727  //readyFromRename Counter
1728  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1729
1730  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1731  val thisAllOut = uopRes <= readyCounter
1732
1733  switch(state) {
1734    is(s_idle) {
1735      when (inValid) {
1736        stateNext := s_active
1737        uopResNext := inUopInfo.numOfUop
1738      }
1739    }
1740    is(s_active) {
1741      when (thisAllOut) {
1742        when (inValid) {
1743          stateNext := s_active
1744          uopResNext := inUopInfo.numOfUop
1745        }.otherwise {
1746          stateNext := s_idle
1747          uopResNext := 0.U
1748        }
1749      }.otherwise {
1750        stateNext := s_active
1751        uopResNext := uopRes - readyCounter
1752      }
1753    }
1754  }
1755
1756  state := Mux(io.redirect, s_idle, stateNext)
1757  uopRes := Mux(io.redirect, 0.U, uopResNext)
1758
1759  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1760
1761  for(i <- 0 until RenameWidth) {
1762    outValids(i) := complexNum > i.U
1763    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1764  }
1765
1766  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1767  inReady := state === s_idle || state === s_active && thisAllOut
1768
1769//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1770//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1771//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1772//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1773//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1774//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1775//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1776//
1777//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1778//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1779//    0.U)
1780//  validToRename.zipWithIndex.foreach{
1781//    case(dst, i) =>
1782//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1783//      dst := MuxCase(false.B, Seq(
1784//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1785//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1786//      ).toSeq)
1787//  }
1788//
1789//  readyToIBuf.zipWithIndex.foreach {
1790//    case (dst, i) =>
1791//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1792//      dst := MuxCase(true.B, Seq(
1793//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1794//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1795//      ).toSeq)
1796//  }
1797//
1798//  io.deq.decodedInsts := decodedInsts
1799//  io.deq.complexNum := complexNum
1800//  io.deq.validToRename := validToRename
1801//  io.deq.readyToIBuf := readyToIBuf
1802}
1803