xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 783a1d5f5aa7a8275b8ced286d8c75d16ca3a231)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(7.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  val outIsFirstUopInVd = IO(Output(Bool()))
43  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={
44    if (lmul * nfields <= 8) {
45      for (k <-0 until nfields) {
46        if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
47          var offset = 1 << (emul - lmul)
48          for (i <- 0 until (1 << emul)) {
49            if (uopIdx == k * (1 << emul) + i) {
50              return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0)
51            }
52          }
53        } else {              // lmul > emul, uop num is depend on lmul * nf
54          var offset = 1 << (lmul - emul)
55          for (i <- 0 until (1 << lmul)) {
56            if (uopIdx == k * (1 << lmul) + i) {
57              return (i / offset, i + k * (1 << lmul), 1)
58            }
59          }
60        }
61      }
62    }
63    return (0, 0, 1)
64  }
65  // strided load/store
66  var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq()
67  for (emul <- 0 until 4) {
68    for (lmul <- 0 until 4) {
69      for (nf <- 0 until 8) {
70        var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx)
71        var offsetVs2 = offset._1
72        var offsetVd = offset._2
73        var isFirstUopInVd = offset._3
74        combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd)
75      }
76    }
77  }
78  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
79    case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) =>
80      (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W)))
81  }, BitPat.N(7)))
82  outOffsetVs2 := out(5, 3)
83  outOffsetVd := out(2, 0)
84  outIsFirstUopInVd := out(6).asBool
85}
86
87trait VectorConstants {
88  val MAX_VLMUL = 8
89  val FP_TMP_REG_MV = 32
90  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
91  val MAX_INDEXED_LS_UOPNUM = 64
92}
93
94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
95  val redirect = Input(Bool())
96  val csrCtrl = Input(new CustomCSRCtrlIO)
97  val vtypeBypass = Input(new VType)
98  // When the first inst in decode vector is complex inst, pass it in
99  val in = Flipped(DecoupledIO(new Bundle {
100    val simpleDecodedInst = new DecodedInst
101    val uopInfo = new UopInfo
102  }))
103  val out = new Bundle {
104    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
105  }
106  val complexNum = Output(UInt(3.W))
107}
108
109/**
110  * @author zly
111  */
112class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
113  val io = IO(new DecodeUnitCompIO)
114
115  // alias
116  private val inReady = io.in.ready
117  private val inValid = io.in.valid
118  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
119  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
120  private val inUopInfo = io.in.bits.uopInfo
121  private val outValids = io.out.complexDecodedInsts.map(_.valid)
122  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
123  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
124  private val outComplexNum = io.complexNum
125
126  val maxUopSize = MaxUopSize
127  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
128    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
129      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
130    }.elsewhen(inInstFields.RS1 === 0.U) {
131      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
132    }
133  }
134
135  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
136  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
137  //input bits
138  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
139
140  val src1 = Cat(0.U(1.W), instFields.RS1)
141  val src2 = Cat(0.U(1.W), instFields.RS2)
142  val dest = Cat(0.U(1.W), instFields.RD)
143
144  val nf    = instFields.NF
145  val width = instFields.WIDTH(1, 0)
146
147  //output of DecodeUnit
148  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
149  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
150  val lmul = Wire(UInt(4.W))
151  val isVsetSimple = Wire(Bool())
152
153  val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i)))
154  indexedLSRegOffset.map(_.src := 0.U)
155
156  //pre decode
157  lmul := latchedUopInfo.lmul
158  isVsetSimple := latchedInst.isVset
159  val vlmulReg = latchedInst.vpu.vlmul
160  val vsewReg = latchedInst.vpu.vsew
161
162  //Type of uop Div
163  val typeOfSplit = latchedInst.uopSplitType
164  val src1Type = latchedInst.srcType(0)
165  val src1IsImm = src1Type === SrcType.imm
166  val src1IsFp = src1Type === SrcType.fp
167
168  numOfUop := latchedUopInfo.numOfUop
169  numOfWB := latchedUopInfo.numOfWB
170
171  //uops dispatch
172  val s_idle :: s_active :: Nil = Enum(2)
173  val state = RegInit(s_idle)
174  val stateNext = WireDefault(state)
175  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
176  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
177  val uopResNext = WireInit(uopRes)
178
179  //uop div up to maxUopSize
180  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
181  csBundle.foreach { case dst =>
182    dst := latchedInst
183    dst.numUops := latchedUopInfo.numOfUop
184    dst.numWB := latchedUopInfo.numOfWB
185    dst.firstUop := false.B
186    dst.lastUop := false.B
187    dst.vlsInstr := false.B
188  }
189
190  csBundle(0).firstUop := true.B
191  csBundle(numOfUop - 1.U).lastUop := true.B
192
193  switch(typeOfSplit) {
194    is(UopSplitType.VSET) {
195      // In simple decoder, rfWen and vecWen are not set
196      when(isVsetSimple) {
197        // Default
198        // uop0 set rd, never flushPipe
199        csBundle(0).fuType := FuType.vsetiwi.U
200        csBundle(0).flushPipe := false.B
201        csBundle(0).rfWen := true.B
202        // uop1 set vl, vsetvl will flushPipe
203        csBundle(1).ldest := VCONFIG_IDX.U
204        csBundle(1).vecWen := true.B
205        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
206          csBundle(1).fuType := FuType.vsetfwf.U
207          csBundle(1).srcType(0) := SrcType.vp
208          csBundle(1).lsrc(0) := VCONFIG_IDX.U
209        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
210          // uop0: mv vtype gpr to vector region
211          csBundle(0).srcType(0) := SrcType.xp
212          csBundle(0).srcType(1) := SrcType.no
213          csBundle(0).lsrc(1) := 0.U
214          csBundle(0).ldest := FP_TMP_REG_MV.U
215          csBundle(0).fuType := FuType.i2f.U
216          csBundle(0).fpWen := true.B
217          csBundle(0).fpu.isAddSub := false.B
218          csBundle(0).fpu.typeTagIn := FPU.D
219          csBundle(0).fpu.typeTagOut := FPU.D
220          csBundle(0).fpu.fromInt := true.B
221          csBundle(0).fpu.wflags := false.B
222          csBundle(0).fpu.fpWen := true.B
223          csBundle(0).fpu.div := false.B
224          csBundle(0).fpu.sqrt := false.B
225          csBundle(0).fpu.fcvt := false.B
226          csBundle(0).flushPipe := false.B
227          // uop1: uvsetvcfg_vv
228          csBundle(1).fuType := FuType.vsetfwf.U
229          // vl
230          csBundle(1).srcType(0) := SrcType.vp
231          csBundle(1).lsrc(0) := VCONFIG_IDX.U
232          // vtype
233          csBundle(1).srcType(1) := SrcType.fp
234          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
235          csBundle(1).vecWen := true.B
236          csBundle(1).ldest := VCONFIG_IDX.U
237        }
238        // use bypass vtype from vtypeGen
239        csBundle(0).vpu.connectVType(io.vtypeBypass)
240        csBundle(1).vpu.connectVType(io.vtypeBypass)
241      }
242    }
243    is(UopSplitType.VEC_VVV) {
244      for (i <- 0 until MAX_VLMUL) {
245        csBundle(i).lsrc(0) := src1 + i.U
246        csBundle(i).lsrc(1) := src2 + i.U
247        csBundle(i).lsrc(2) := dest + i.U
248        csBundle(i).ldest := dest + i.U
249        csBundle(i).uopIdx := i.U
250      }
251    }
252    is(UopSplitType.VEC_VFV) {
253      /*
254      i to vector move
255       */
256      csBundle(0).srcType(0) := SrcType.fp
257      csBundle(0).srcType(1) := SrcType.imm
258      csBundle(0).lsrc(1) := 0.U
259      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
260      csBundle(0).fuType := FuType.f2v.U
261      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
262      csBundle(0).vecWen := true.B
263      csBundle(0).vpu.isReverse := false.B
264      /*
265      LMUL
266       */
267      for (i <- 0 until MAX_VLMUL) {
268        csBundle(i + 1).srcType(0) := SrcType.vp
269        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
270        csBundle(i + 1).lsrc(1) := src2 + i.U
271        csBundle(i + 1).lsrc(2) := dest + i.U
272        csBundle(i + 1).ldest := dest + i.U
273        csBundle(i + 1).uopIdx := i.U
274      }
275    }
276    is(UopSplitType.VEC_EXT2) {
277      for (i <- 0 until MAX_VLMUL / 2) {
278        csBundle(2 * i).lsrc(1) := src2 + i.U
279        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
280        csBundle(2 * i).ldest := dest + (2 * i).U
281        csBundle(2 * i).uopIdx := (2 * i).U
282        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
283        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
284        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
285        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
286      }
287    }
288    is(UopSplitType.VEC_EXT4) {
289      for (i <- 0 until MAX_VLMUL / 4) {
290        csBundle(4 * i).lsrc(1) := src2 + i.U
291        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
292        csBundle(4 * i).ldest := dest + (4 * i).U
293        csBundle(4 * i).uopIdx := (4 * i).U
294        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
295        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
296        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
297        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
298        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
299        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
300        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
301        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
302        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
303        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
304        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
305        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
306      }
307    }
308    is(UopSplitType.VEC_EXT8) {
309      for (i <- 0 until MAX_VLMUL) {
310        csBundle(i).lsrc(1) := src2
311        csBundle(i).lsrc(2) := dest + i.U
312        csBundle(i).ldest := dest + i.U
313        csBundle(i).uopIdx := i.U
314      }
315    }
316    is(UopSplitType.VEC_0XV) {
317      /*
318      i/f to vector move
319       */
320      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
321      csBundle(0).srcType(1) := SrcType.imm
322      csBundle(0).lsrc(1) := 0.U
323      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
324      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
325      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
326      csBundle(0).rfWen := false.B
327      csBundle(0).fpWen := false.B
328      csBundle(0).vecWen := true.B
329      /*
330      vmv.s.x
331       */
332      csBundle(1).srcType(0) := SrcType.vp
333      csBundle(1).srcType(1) := SrcType.imm
334      csBundle(1).srcType(2) := SrcType.vp
335      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
336      csBundle(1).lsrc(1) := 0.U
337      csBundle(1).lsrc(2) := dest
338      csBundle(1).ldest := dest
339      csBundle(1).rfWen := false.B
340      csBundle(1).fpWen := false.B
341      csBundle(1).vecWen := true.B
342      csBundle(1).uopIdx := 0.U
343    }
344    is(UopSplitType.VEC_VXV) {
345      /*
346      i to vector move
347       */
348      csBundle(0).srcType(0) := SrcType.reg
349      csBundle(0).srcType(1) := SrcType.imm
350      csBundle(0).lsrc(1) := 0.U
351      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
352      csBundle(0).fuType := FuType.i2v.U
353      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
354      csBundle(0).vecWen := true.B
355      csBundle(0).vpu.isReverse := false.B
356      /*
357      LMUL
358       */
359      for (i <- 0 until MAX_VLMUL) {
360        csBundle(i + 1).srcType(0) := SrcType.vp
361        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
362        csBundle(i + 1).lsrc(1) := src2 + i.U
363        csBundle(i + 1).lsrc(2) := dest + i.U
364        csBundle(i + 1).ldest := dest + i.U
365        csBundle(i + 1).uopIdx := i.U
366      }
367    }
368    is(UopSplitType.VEC_VVW) {
369      for (i <- 0 until MAX_VLMUL / 2) {
370        csBundle(2 * i).lsrc(0) := src1 + i.U
371        csBundle(2 * i).lsrc(1) := src2 + i.U
372        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
373        csBundle(2 * i).ldest := dest + (2 * i).U
374        csBundle(2 * i).uopIdx := (2 * i).U
375        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
376        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
377        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
378        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
379        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
380      }
381    }
382    is(UopSplitType.VEC_VFW) {
383      /*
384      f to vector move
385       */
386      csBundle(0).srcType(0) := SrcType.fp
387      csBundle(0).srcType(1) := SrcType.imm
388      csBundle(0).lsrc(1) := 0.U
389      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
390      csBundle(0).fuType := FuType.f2v.U
391      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
392      csBundle(0).rfWen := false.B
393      csBundle(0).fpWen := false.B
394      csBundle(0).vecWen := true.B
395
396      for (i <- 0 until MAX_VLMUL / 2) {
397        csBundle(2 * i + 1).srcType(0) := SrcType.vp
398        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
399        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
400        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
401        csBundle(2 * i + 1).ldest := dest + (2 * i).U
402        csBundle(2 * i + 1).uopIdx := (2 * i).U
403        csBundle(2 * i + 2).srcType(0) := SrcType.vp
404        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
405        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
406        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
407        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
408        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
409      }
410    }
411    is(UopSplitType.VEC_WVW) {
412      for (i <- 0 until MAX_VLMUL / 2) {
413        csBundle(2 * i).lsrc(0) := src1 + i.U
414        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
415        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
416        csBundle(2 * i).ldest := dest + (2 * i).U
417        csBundle(2 * i).uopIdx := (2 * i).U
418        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
419        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
420        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
421        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
422        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
423      }
424    }
425    is(UopSplitType.VEC_VXW) {
426      /*
427      i to vector move
428       */
429      csBundle(0).srcType(0) := SrcType.reg
430      csBundle(0).srcType(1) := SrcType.imm
431      csBundle(0).lsrc(1) := 0.U
432      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
433      csBundle(0).fuType := FuType.i2v.U
434      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
435      csBundle(0).vecWen := true.B
436
437      for (i <- 0 until MAX_VLMUL / 2) {
438        csBundle(2 * i + 1).srcType(0) := SrcType.vp
439        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
440        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
441        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
442        csBundle(2 * i + 1).ldest := dest + (2 * i).U
443        csBundle(2 * i + 1).uopIdx := (2 * i).U
444        csBundle(2 * i + 2).srcType(0) := SrcType.vp
445        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
446        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
447        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
448        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
449        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
450      }
451    }
452    is(UopSplitType.VEC_WXW) {
453      /*
454      i to vector move
455       */
456      csBundle(0).srcType(0) := SrcType.reg
457      csBundle(0).srcType(1) := SrcType.imm
458      csBundle(0).lsrc(1) := 0.U
459      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
460      csBundle(0).fuType := FuType.i2v.U
461      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
462      csBundle(0).vecWen := true.B
463
464      for (i <- 0 until MAX_VLMUL / 2) {
465        csBundle(2 * i + 1).srcType(0) := SrcType.vp
466        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
467        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
468        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
469        csBundle(2 * i + 1).ldest := dest + (2 * i).U
470        csBundle(2 * i + 1).uopIdx := (2 * i).U
471        csBundle(2 * i + 2).srcType(0) := SrcType.vp
472        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
473        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
474        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
475        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
476        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
477      }
478    }
479    is(UopSplitType.VEC_WVV) {
480      for (i <- 0 until MAX_VLMUL / 2) {
481
482        csBundle(2 * i).lsrc(0) := src1 + i.U
483        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
484        csBundle(2 * i).lsrc(2) := dest + i.U
485        csBundle(2 * i).ldest := dest + i.U
486        csBundle(2 * i).uopIdx := (2 * i).U
487        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
488        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
489        csBundle(2 * i + 1).lsrc(2) := dest + i.U
490        csBundle(2 * i + 1).ldest := dest + i.U
491        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
492      }
493    }
494    is(UopSplitType.VEC_WFW) {
495      /*
496      f to vector move
497       */
498      csBundle(0).srcType(0) := SrcType.fp
499      csBundle(0).srcType(1) := SrcType.imm
500      csBundle(0).lsrc(1) := 0.U
501      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
502      csBundle(0).fuType := FuType.f2v.U
503      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
504      csBundle(0).rfWen := false.B
505      csBundle(0).fpWen := false.B
506      csBundle(0).vecWen := true.B
507
508      for (i <- 0 until MAX_VLMUL / 2) {
509        csBundle(2 * i + 1).srcType(0) := SrcType.vp
510        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
511        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
512        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
513        csBundle(2 * i + 1).ldest := dest + (2 * i).U
514        csBundle(2 * i + 1).uopIdx := (2 * i).U
515        csBundle(2 * i + 2).srcType(0) := SrcType.vp
516        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
517        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
518        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
519        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
520        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
521      }
522    }
523    is(UopSplitType.VEC_WXV) {
524      /*
525      i to vector move
526       */
527      csBundle(0).srcType(0) := SrcType.reg
528      csBundle(0).srcType(1) := SrcType.imm
529      csBundle(0).lsrc(1) := 0.U
530      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
531      csBundle(0).fuType := FuType.i2v.U
532      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
533      csBundle(0).vecWen := true.B
534
535      for (i <- 0 until MAX_VLMUL / 2) {
536        csBundle(2 * i + 1).srcType(0) := SrcType.vp
537        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
538        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
539        csBundle(2 * i + 1).lsrc(2) := dest + i.U
540        csBundle(2 * i + 1).ldest := dest + i.U
541        csBundle(2 * i + 1).uopIdx := (2 * i).U
542        csBundle(2 * i + 2).srcType(0) := SrcType.vp
543        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
544        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
545        csBundle(2 * i + 2).lsrc(2) := dest + i.U
546        csBundle(2 * i + 2).ldest := dest + i.U
547        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
548      }
549    }
550    is(UopSplitType.VEC_VVM) {
551      csBundle(0).lsrc(2) := dest
552      csBundle(0).ldest := dest
553      csBundle(0).uopIdx := 0.U
554      for (i <- 1 until MAX_VLMUL) {
555        csBundle(i).lsrc(0) := src1 + i.U
556        csBundle(i).lsrc(1) := src2 + i.U
557        csBundle(i).lsrc(2) := dest
558        csBundle(i).ldest := dest
559        csBundle(i).uopIdx := i.U
560      }
561    }
562    is(UopSplitType.VEC_VFM) {
563      /*
564      f to vector move
565       */
566      csBundle(0).srcType(0) := SrcType.fp
567      csBundle(0).srcType(1) := SrcType.imm
568      csBundle(0).lsrc(1) := 0.U
569      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
570      csBundle(0).fuType := FuType.f2v.U
571      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
572      csBundle(0).rfWen := false.B
573      csBundle(0).fpWen := false.B
574      csBundle(0).vecWen := true.B
575      //LMUL
576      csBundle(1).srcType(0) := SrcType.vp
577      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
578      csBundle(1).lsrc(2) := dest
579      csBundle(1).ldest := dest
580      csBundle(1).uopIdx := 0.U
581      for (i <- 1 until MAX_VLMUL) {
582        csBundle(i + 1).srcType(0) := SrcType.vp
583        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
584        csBundle(i + 1).lsrc(1) := src2 + i.U
585        csBundle(i + 1).lsrc(2) := dest
586        csBundle(i + 1).ldest := dest
587        csBundle(i + 1).uopIdx := i.U
588      }
589      csBundle(numOfUop - 1.U).ldest := dest
590    }
591    is(UopSplitType.VEC_VXM) {
592      /*
593      i to vector move
594       */
595      csBundle(0).srcType(0) := SrcType.reg
596      csBundle(0).srcType(1) := SrcType.imm
597      csBundle(0).lsrc(1) := 0.U
598      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
599      csBundle(0).fuType := FuType.i2v.U
600      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
601      csBundle(0).vecWen := true.B
602      //LMUL
603      csBundle(1).srcType(0) := SrcType.vp
604      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
605      csBundle(1).lsrc(2) := dest
606      csBundle(1).ldest := dest
607      csBundle(1).uopIdx := 0.U
608      for (i <- 1 until MAX_VLMUL) {
609        csBundle(i + 1).srcType(0) := SrcType.vp
610        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
611        csBundle(i + 1).lsrc(1) := src2 + i.U
612        csBundle(i + 1).lsrc(2) := dest
613        csBundle(i + 1).ldest := dest
614        csBundle(i + 1).uopIdx := i.U
615      }
616      csBundle(numOfUop - 1.U).ldest := dest
617    }
618    is(UopSplitType.VEC_SLIDE1UP) {
619      /*
620      i to vector move
621       */
622      csBundle(0).srcType(0) := SrcType.reg
623      csBundle(0).srcType(1) := SrcType.imm
624      csBundle(0).lsrc(1) := 0.U
625      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
626      csBundle(0).fuType := FuType.i2v.U
627      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
628      csBundle(0).vecWen := true.B
629      //LMUL
630      csBundle(1).srcType(0) := SrcType.vp
631      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
632      csBundle(1).lsrc(2) := dest
633      csBundle(1).ldest := dest
634      csBundle(1).uopIdx := 0.U
635      for (i <- 1 until MAX_VLMUL) {
636        csBundle(i + 1).srcType(0) := SrcType.vp
637        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
638        csBundle(i + 1).lsrc(1) := src2 + i.U
639        csBundle(i + 1).lsrc(2) := dest + i.U
640        csBundle(i + 1).ldest := dest + i.U
641        csBundle(i + 1).uopIdx := i.U
642      }
643    }
644    is(UopSplitType.VEC_FSLIDE1UP) {
645      /*
646      i to vector move
647       */
648      csBundle(0).srcType(0) := SrcType.fp
649      csBundle(0).srcType(1) := SrcType.imm
650      csBundle(0).lsrc(1) := 0.U
651      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
652      csBundle(0).fuType := FuType.f2v.U
653      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
654      csBundle(0).rfWen := false.B
655      csBundle(0).fpWen := false.B
656      csBundle(0).vecWen := true.B
657      //LMUL
658      csBundle(1).srcType(0) := SrcType.vp
659      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
660      csBundle(1).lsrc(1) := src2
661      csBundle(1).lsrc(2) := dest
662      csBundle(1).ldest := dest
663      csBundle(1).uopIdx := 0.U
664      for (i <- 1 until MAX_VLMUL) {
665        csBundle(i + 1).srcType(0) := SrcType.vp
666        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
667        csBundle(i + 1).lsrc(1) := src2 + i.U
668        csBundle(i + 1).lsrc(2) := dest + i.U
669        csBundle(i + 1).ldest := dest + i.U
670        csBundle(i + 1).uopIdx := i.U
671      }
672    }
673    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
674      /*
675      i to vector move
676       */
677      csBundle(0).srcType(0) := SrcType.reg
678      csBundle(0).srcType(1) := SrcType.imm
679      csBundle(0).lsrc(1) := 0.U
680      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
681      csBundle(0).fuType := FuType.i2v.U
682      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
683      csBundle(0).vecWen := true.B
684      //LMUL
685      for (i <- 0 until MAX_VLMUL) {
686        csBundle(2 * i + 1).srcType(0) := SrcType.vp
687        csBundle(2 * i + 1).srcType(1) := SrcType.vp
688        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
689        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
690        csBundle(2 * i + 1).lsrc(2) := dest + i.U
691        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
692        csBundle(2 * i + 1).uopIdx := (2 * i).U
693        if (2 * i + 2 < MAX_VLMUL * 2) {
694          csBundle(2 * i + 2).srcType(0) := SrcType.vp
695          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
696          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
697          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
698          csBundle(2 * i + 2).ldest := dest + i.U
699          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
700        }
701      }
702      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
703      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
704      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
705    }
706    is(UopSplitType.VEC_FSLIDE1DOWN) {
707      /*
708      i to vector move
709       */
710      csBundle(0).srcType(0) := SrcType.fp
711      csBundle(0).srcType(1) := SrcType.imm
712      csBundle(0).lsrc(1) := 0.U
713      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
714      csBundle(0).fuType := FuType.f2v.U
715      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
716      csBundle(0).rfWen := false.B
717      csBundle(0).fpWen := false.B
718      csBundle(0).vecWen := true.B
719      //LMUL
720      for (i <- 0 until MAX_VLMUL) {
721        csBundle(2 * i + 1).srcType(0) := SrcType.vp
722        csBundle(2 * i + 1).srcType(1) := SrcType.vp
723        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
724        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
725        csBundle(2 * i + 1).lsrc(2) := dest + i.U
726        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
727        csBundle(2 * i + 1).uopIdx := (2 * i).U
728        if (2 * i + 2 < MAX_VLMUL * 2) {
729          csBundle(2 * i + 2).srcType(0) := SrcType.vp
730          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
731          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
732          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
733          csBundle(2 * i + 2).ldest := dest + i.U
734          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
735        }
736      }
737      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
738      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
739      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
740    }
741    is(UopSplitType.VEC_VRED) {
742      when(vlmulReg === "b001".U) {
743        csBundle(0).srcType(2) := SrcType.DC
744        csBundle(0).lsrc(0) := src2 + 1.U
745        csBundle(0).lsrc(1) := src2
746        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
747        csBundle(0).uopIdx := 0.U
748      }
749      when(vlmulReg === "b010".U) {
750        csBundle(0).srcType(2) := SrcType.DC
751        csBundle(0).lsrc(0) := src2 + 1.U
752        csBundle(0).lsrc(1) := src2
753        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
754        csBundle(0).uopIdx := 0.U
755
756        csBundle(1).srcType(2) := SrcType.DC
757        csBundle(1).lsrc(0) := src2 + 3.U
758        csBundle(1).lsrc(1) := src2 + 2.U
759        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
760        csBundle(1).uopIdx := 1.U
761
762        csBundle(2).srcType(2) := SrcType.DC
763        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
764        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
765        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
766        csBundle(2).uopIdx := 2.U
767      }
768      when(vlmulReg === "b011".U) {
769        for (i <- 0 until MAX_VLMUL) {
770          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
771            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
772            csBundle(i).lsrc(1) := src2 + (i * 2).U
773            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
774          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
775            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
776            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
777            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
778          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
779            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
780            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
781            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
782          }
783          csBundle(i).srcType(2) := SrcType.DC
784          csBundle(i).uopIdx := i.U
785        }
786      }
787      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
788        /*
789         * 2 <= vlmul <= 8
790         */
791        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
792        csBundle(numOfUop - 1.U).lsrc(0) := src1
793        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
794        csBundle(numOfUop - 1.U).lsrc(2) := dest
795        csBundle(numOfUop - 1.U).ldest := dest
796        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
797      }
798    }
799    is(UopSplitType.VEC_VFRED) {
800      val vlmul = vlmulReg
801      val vsew = vsewReg
802      when(vlmul === VLmul.m8){
803        for (i <- 0 until 4) {
804          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
805          csBundle(i).lsrc(1) := src2 + (i * 2).U
806          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
807          csBundle(i).uopIdx := i.U
808        }
809        for (i <- 4 until 6) {
810          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
811          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
812          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
813          csBundle(i).uopIdx := i.U
814        }
815        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
816        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
817        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
818        csBundle(6).uopIdx := 6.U
819        when(vsew === VSew.e64) {
820          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
821          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
822          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
823          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
824          csBundle(7).uopIdx := 7.U
825          csBundle(8).lsrc(0) := src1
826          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
827          csBundle(8).ldest := dest
828          csBundle(8).uopIdx := 8.U
829        }
830        when(vsew === VSew.e32) {
831          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
832          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
833          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
834          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
835          csBundle(7).uopIdx := 7.U
836          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
837          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
838          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
839          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
840          csBundle(8).uopIdx := 8.U
841          csBundle(9).lsrc(0) := src1
842          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
843          csBundle(9).ldest := dest
844          csBundle(9).uopIdx := 9.U
845        }
846        when(vsew === VSew.e16) {
847          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
848          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
849          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
850          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
851          csBundle(7).uopIdx := 7.U
852          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
853          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
854          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
855          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
856          csBundle(8).uopIdx := 8.U
857          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
858          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
859          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
860          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
861          csBundle(9).uopIdx := 9.U
862          csBundle(10).lsrc(0) := src1
863          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
864          csBundle(10).ldest := dest
865          csBundle(10).uopIdx := 10.U
866        }
867      }
868      when(vlmul === VLmul.m4) {
869        for (i <- 0 until 2) {
870          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
871          csBundle(i).lsrc(1) := src2 + (i * 2).U
872          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
873          csBundle(i).uopIdx := i.U
874        }
875        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
876        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
877        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
878        csBundle(2).uopIdx := 2.U
879        when(vsew === VSew.e64) {
880          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
881          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
882          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
883          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
884          csBundle(3).uopIdx := 3.U
885          csBundle(4).lsrc(0) := src1
886          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
887          csBundle(4).ldest := dest
888          csBundle(4).uopIdx := 4.U
889        }
890        when(vsew === VSew.e32) {
891          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
892          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
893          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
894          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
895          csBundle(3).uopIdx := 3.U
896          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
897          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
898          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
899          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
900          csBundle(4).uopIdx := 4.U
901          csBundle(5).lsrc(0) := src1
902          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
903          csBundle(5).ldest := dest
904          csBundle(5).uopIdx := 5.U
905        }
906        when(vsew === VSew.e16) {
907          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
908          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
909          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
910          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
911          csBundle(3).uopIdx := 3.U
912          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
913          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
914          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
915          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
916          csBundle(4).uopIdx := 4.U
917          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
918          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
919          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
920          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
921          csBundle(5).uopIdx := 5.U
922          csBundle(6).lsrc(0) := src1
923          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
924          csBundle(6).ldest := dest
925          csBundle(6).uopIdx := 6.U
926        }
927      }
928      when(vlmul === VLmul.m2) {
929        csBundle(0).lsrc(0) := src2 + 1.U
930        csBundle(0).lsrc(1) := src2 + 0.U
931        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
932        csBundle(0).uopIdx := 0.U
933        when(vsew === VSew.e64) {
934          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
935          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
936          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
937          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
938          csBundle(1).uopIdx := 1.U
939          csBundle(2).lsrc(0) := src1
940          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
941          csBundle(2).ldest := dest
942          csBundle(2).uopIdx := 2.U
943        }
944        when(vsew === VSew.e32) {
945          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
946          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
947          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
948          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
949          csBundle(1).uopIdx := 1.U
950          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
951          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
952          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
953          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
954          csBundle(2).uopIdx := 2.U
955          csBundle(3).lsrc(0) := src1
956          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
957          csBundle(3).ldest := dest
958          csBundle(3).uopIdx := 3.U
959        }
960        when(vsew === VSew.e16) {
961          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
962          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
963          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
964          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
965          csBundle(1).uopIdx := 1.U
966          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
967          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
968          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
969          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
970          csBundle(2).uopIdx := 2.U
971          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
972          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
973          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
974          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
975          csBundle(3).uopIdx := 3.U
976          csBundle(4).lsrc(0) := src1
977          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
978          csBundle(4).ldest := dest
979          csBundle(4).uopIdx := 4.U
980        }
981      }
982      when(vlmul === VLmul.m1) {
983        when(vsew === VSew.e64) {
984          csBundle(0).lsrc(0) := src2
985          csBundle(0).lsrc(1) := src2
986          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
987          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
988          csBundle(0).uopIdx := 0.U
989          csBundle(1).lsrc(0) := src1
990          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
991          csBundle(1).ldest := dest
992          csBundle(1).uopIdx := 1.U
993        }
994        when(vsew === VSew.e32) {
995          csBundle(0).lsrc(0) := src2
996          csBundle(0).lsrc(1) := src2
997          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
998          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
999          csBundle(0).uopIdx := 0.U
1000          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1001          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1002          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1003          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1004          csBundle(1).uopIdx := 1.U
1005          csBundle(2).lsrc(0) := src1
1006          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1007          csBundle(2).ldest := dest
1008          csBundle(2).uopIdx := 2.U
1009        }
1010        when(vsew === VSew.e16) {
1011          csBundle(0).lsrc(0) := src2
1012          csBundle(0).lsrc(1) := src2
1013          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1014          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1015          csBundle(0).uopIdx := 0.U
1016          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1017          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1018          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1019          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1020          csBundle(1).uopIdx := 1.U
1021          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1022          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1023          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1024          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1025          csBundle(2).uopIdx := 2.U
1026          csBundle(3).lsrc(0) := src1
1027          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1028          csBundle(3).ldest := dest
1029          csBundle(3).uopIdx := 3.U
1030        }
1031      }
1032      when(vlmul === VLmul.mf2) {
1033        when(vsew === VSew.e32) {
1034          csBundle(0).lsrc(0) := src2
1035          csBundle(0).lsrc(1) := src2
1036          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1037          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1038          csBundle(0).uopIdx := 0.U
1039          csBundle(1).lsrc(0) := src1
1040          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1041          csBundle(1).ldest := dest
1042          csBundle(1).uopIdx := 1.U
1043        }
1044        when(vsew === VSew.e16) {
1045          csBundle(0).lsrc(0) := src2
1046          csBundle(0).lsrc(1) := src2
1047          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1048          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1049          csBundle(0).uopIdx := 0.U
1050          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1051          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1052          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1053          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1054          csBundle(1).uopIdx := 1.U
1055          csBundle(2).lsrc(0) := src1
1056          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1057          csBundle(2).ldest := dest
1058          csBundle(2).uopIdx := 2.U
1059        }
1060      }
1061      when(vlmul === VLmul.mf4) {
1062        when(vsew === VSew.e16) {
1063          csBundle(0).lsrc(0) := src2
1064          csBundle(0).lsrc(1) := src2
1065          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1066          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1067          csBundle(0).uopIdx := 0.U
1068          csBundle(1).lsrc(0) := src1
1069          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1070          csBundle(1).ldest := dest
1071          csBundle(1).uopIdx := 1.U
1072        }
1073      }
1074    }
1075
1076    is(UopSplitType.VEC_VFREDOSUM) {
1077      import yunsuan.VfaluType
1078      val vlmul = vlmulReg
1079      val vsew = vsewReg
1080      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1081      when(vlmul === VLmul.m8) {
1082        when(vsew === VSew.e64) {
1083          val vlmax = 16
1084          for (i <- 0 until vlmax) {
1085            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1086            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1087            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1088            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1089            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1090            csBundle(i).uopIdx := i.U
1091          }
1092        }
1093        when(vsew === VSew.e32) {
1094          val vlmax = 32
1095          for (i <- 0 until vlmax) {
1096            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1097            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1098            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1099            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1100            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1101            csBundle(i).uopIdx := i.U
1102          }
1103        }
1104        when(vsew === VSew.e16) {
1105          val vlmax = 64
1106          for (i <- 0 until vlmax) {
1107            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1108            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1109            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1110            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1111            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1112            csBundle(i).uopIdx := i.U
1113          }
1114        }
1115      }
1116      when(vlmul === VLmul.m4) {
1117        when(vsew === VSew.e64) {
1118          val vlmax = 8
1119          for (i <- 0 until vlmax) {
1120            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1121            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1122            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1123            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1124            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1125            csBundle(i).uopIdx := i.U
1126          }
1127        }
1128        when(vsew === VSew.e32) {
1129          val vlmax = 16
1130          for (i <- 0 until vlmax) {
1131            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1132            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1133            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1134            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1135            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1136            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1137            csBundle(i).uopIdx := i.U
1138          }
1139        }
1140        when(vsew === VSew.e16) {
1141          val vlmax = 32
1142          for (i <- 0 until vlmax) {
1143            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1144            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1145            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1146            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1148            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1149            csBundle(i).uopIdx := i.U
1150          }
1151        }
1152      }
1153      when(vlmul === VLmul.m2) {
1154        when(vsew === VSew.e64) {
1155          val vlmax = 4
1156          for (i <- 0 until vlmax) {
1157            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1158            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1161            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1162            csBundle(i).uopIdx := i.U
1163          }
1164        }
1165        when(vsew === VSew.e32) {
1166          val vlmax = 8
1167          for (i <- 0 until vlmax) {
1168            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1169            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1170            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1172            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1173            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1174            csBundle(i).uopIdx := i.U
1175          }
1176        }
1177        when(vsew === VSew.e16) {
1178          val vlmax = 16
1179          for (i <- 0 until vlmax) {
1180            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1181            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1182            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1183            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1185            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1186            csBundle(i).uopIdx := i.U
1187          }
1188        }
1189      }
1190      when(vlmul === VLmul.m1) {
1191        when(vsew === VSew.e64) {
1192          val vlmax = 2
1193          for (i <- 0 until vlmax) {
1194            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1195            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1197            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1198            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1199            csBundle(i).uopIdx := i.U
1200          }
1201        }
1202        when(vsew === VSew.e32) {
1203          val vlmax = 4
1204          for (i <- 0 until vlmax) {
1205            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1206            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1207            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1208            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1209            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1210            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1211            csBundle(i).uopIdx := i.U
1212          }
1213        }
1214        when(vsew === VSew.e16) {
1215          val vlmax = 8
1216          for (i <- 0 until vlmax) {
1217            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1218            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1219            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1220            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1222            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1223            csBundle(i).uopIdx := i.U
1224          }
1225        }
1226      }
1227      when(vlmul === VLmul.mf2) {
1228        when(vsew === VSew.e32) {
1229          val vlmax = 2
1230          for (i <- 0 until vlmax) {
1231            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1232            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1235            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1236            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1237            csBundle(i).uopIdx := i.U
1238          }
1239        }
1240        when(vsew === VSew.e16) {
1241          val vlmax = 4
1242          for (i <- 0 until vlmax) {
1243            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1244            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1245            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1248            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1249            csBundle(i).uopIdx := i.U
1250          }
1251        }
1252      }
1253      when(vlmul === VLmul.mf4) {
1254        when(vsew === VSew.e16) {
1255          val vlmax = 2
1256          for (i <- 0 until vlmax) {
1257            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1261            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1262            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1263            csBundle(i).uopIdx := i.U
1264          }
1265        }
1266      }
1267    }
1268
1269    is(UopSplitType.VEC_SLIDEUP) {
1270      // i to vector move
1271      csBundle(0).srcType(0) := SrcType.reg
1272      csBundle(0).srcType(1) := SrcType.imm
1273      csBundle(0).lsrc(1) := 0.U
1274      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1275      csBundle(0).fuType := FuType.i2v.U
1276      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1277      csBundle(0).vecWen := true.B
1278      // LMUL
1279      for (i <- 0 until MAX_VLMUL)
1280        for (j <- 0 to i) {
1281          val old_vd = if (j == 0) {
1282            dest + i.U
1283          } else (VECTOR_TMP_REG_LMUL + j).U
1284          val vd = if (j == i) {
1285            dest + i.U
1286          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1287          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1288          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1289          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1290          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1291          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1292          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1293        }
1294    }
1295
1296    is(UopSplitType.VEC_SLIDEDOWN) {
1297      // i to vector move
1298      csBundle(0).srcType(0) := SrcType.reg
1299      csBundle(0).srcType(1) := SrcType.imm
1300      csBundle(0).lsrc(1) := 0.U
1301      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1302      csBundle(0).fuType := FuType.i2v.U
1303      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1304      csBundle(0).vecWen := true.B
1305      // LMUL
1306      for (i <- 0 until MAX_VLMUL)
1307        for (j <- (0 to i).reverse) {
1308          when(i.U < lmul) {
1309            val old_vd = if (j == 0) {
1310              dest + lmul - 1.U - i.U
1311            } else (VECTOR_TMP_REG_LMUL + j).U
1312            val vd = if (j == i) {
1313              dest + lmul - 1.U - i.U
1314            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1315            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1316            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1317            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1318            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1319            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1320            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1321          }
1322        }
1323    }
1324
1325    is(UopSplitType.VEC_M0X) {
1326      // LMUL
1327      for (i <- 0 until MAX_VLMUL) {
1328        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1329        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1330        csBundle(i).srcType(0) := srcType0
1331        csBundle(i).srcType(1) := SrcType.vp
1332        csBundle(i).rfWen := false.B
1333        csBundle(i).fpWen := false.B
1334        csBundle(i).vecWen := true.B
1335        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1336        csBundle(i).lsrc(1) := src2
1337        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1338        csBundle(i).ldest := ldest
1339        csBundle(i).uopIdx := i.U
1340      }
1341      csBundle(lmul - 1.U).rfWen := true.B
1342      csBundle(lmul - 1.U).fpWen := false.B
1343      csBundle(lmul - 1.U).vecWen := false.B
1344      csBundle(lmul - 1.U).ldest := dest
1345    }
1346
1347    is(UopSplitType.VEC_MVV) {
1348      // LMUL
1349      for (i <- 0 until MAX_VLMUL) {
1350        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1351        csBundle(i * 2 + 0).srcType(0) := srcType0
1352        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1353        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1354        csBundle(i * 2 + 0).lsrc(1) := src2
1355        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1356        csBundle(i * 2 + 0).ldest := dest + i.U
1357        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1358
1359        csBundle(i * 2 + 1).srcType(0) := srcType0
1360        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1361        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1362        csBundle(i * 2 + 1).lsrc(1) := src2
1363        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1364        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1365        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1366      }
1367    }
1368
1369    is(UopSplitType.VEC_M0X_VFIRST) {
1370      // LMUL
1371      csBundle(0).rfWen := true.B
1372      csBundle(0).fpWen := false.B
1373      csBundle(0).vecWen := false.B
1374      csBundle(0).ldest := dest
1375    }
1376    is(UopSplitType.VEC_VWW) {
1377      for (i <- 0 until MAX_VLMUL*2) {
1378        when(i.U < lmul){
1379          csBundle(i).srcType(2) := SrcType.DC
1380          csBundle(i).lsrc(0) := src2 + i.U
1381          csBundle(i).lsrc(1) := src2 + i.U
1382          // csBundle(i).lsrc(2) := dest + (2 * i).U
1383          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1384          csBundle(i).uopIdx :=  i.U
1385        } otherwise {
1386          csBundle(i).srcType(2) := SrcType.DC
1387          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1388          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1389          // csBundle(i).lsrc(2) := dest + (2 * i).U
1390          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1391          csBundle(i).uopIdx := i.U
1392        }
1393        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1394        csBundle(numOfUop-1.U).lsrc(0) := src1
1395        csBundle(numOfUop-1.U).lsrc(2) := dest
1396        csBundle(numOfUop-1.U).ldest := dest
1397      }
1398    }
1399    is(UopSplitType.VEC_RGATHER) {
1400      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1401        for (i <- 0 until len)
1402          for (j <- 0 until len) {
1403            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1404            // csBundle(i * len + j).srcType(1) := SrcType.vp
1405            // csBundle(i * len + j).srcType(2) := SrcType.vp
1406            csBundle(i * len + j).lsrc(0) := src1 + i.U
1407            csBundle(i * len + j).lsrc(1) := src2 + j.U
1408            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1409            csBundle(i * len + j).lsrc(2) := vd_old
1410            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1411            csBundle(i * len + j).ldest := vd
1412            csBundle(i * len + j).uopIdx := (i * len + j).U
1413          }
1414      }
1415      switch(vlmulReg) {
1416        is("b001".U ){
1417          genCsBundle_VEC_RGATHER(2)
1418        }
1419        is("b010".U ){
1420          genCsBundle_VEC_RGATHER(4)
1421        }
1422        is("b011".U ){
1423          genCsBundle_VEC_RGATHER(8)
1424        }
1425      }
1426    }
1427    is(UopSplitType.VEC_RGATHER_VX) {
1428      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1429        for (i <- 0 until len)
1430          for (j <- 0 until len) {
1431            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1432            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1433            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1434            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1435            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1436            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1437            csBundle(i * len + j + 1).lsrc(2) := vd_old
1438            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1439            csBundle(i * len + j + 1).ldest := vd
1440            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1441          }
1442      }
1443      // i to vector move
1444      csBundle(0).srcType(0) := SrcType.reg
1445      csBundle(0).srcType(1) := SrcType.imm
1446      csBundle(0).lsrc(1) := 0.U
1447      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1448      csBundle(0).fuType := FuType.i2v.U
1449      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1450      csBundle(0).rfWen := false.B
1451      csBundle(0).fpWen := false.B
1452      csBundle(0).vecWen := true.B
1453      genCsBundle_RGATHER_VX(1)
1454      switch(vlmulReg) {
1455        is("b001".U ){
1456          genCsBundle_RGATHER_VX(2)
1457        }
1458        is("b010".U ){
1459          genCsBundle_RGATHER_VX(4)
1460        }
1461        is("b011".U ){
1462          genCsBundle_RGATHER_VX(8)
1463        }
1464      }
1465    }
1466    is(UopSplitType.VEC_RGATHEREI16) {
1467      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1468        for (i <- 0 until len)
1469          for (j <- 0 until len) {
1470            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1471            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1472            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1473            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1474            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1475            csBundle((i * len + j)*2+0).ldest := vd0
1476            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1477            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1478            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1479            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1480            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1481            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1482            csBundle((i * len + j)*2+1).ldest := vd1
1483            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1484          }
1485      }
1486      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1487        for (i <- 0 until len)
1488          for (j <- 0 until len) {
1489            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1490            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1491            csBundle(i * len + j).lsrc(0) := src1 + i.U
1492            csBundle(i * len + j).lsrc(1) := src2 + j.U
1493            csBundle(i * len + j).lsrc(2) := vd_old
1494            csBundle(i * len + j).ldest := vd
1495            csBundle(i * len + j).uopIdx := (i * len + j).U
1496          }
1497      }
1498      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1499        for (i <- 0 until len)
1500          for (j <- 0 until len) {
1501            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1502            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1503            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1504            csBundle(i * len + j).lsrc(1) := src2 + j.U
1505            csBundle(i * len + j).lsrc(2) := vd_old
1506            csBundle(i * len + j).ldest := vd
1507            csBundle(i * len + j).uopIdx := (i * len + j).U
1508          }
1509      }
1510      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1511        for (i <- 0 until len)
1512          for (j <- 0 until len) {
1513            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1514            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1515            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1516            csBundle(i * len + j).lsrc(1) := src2 + j.U
1517            csBundle(i * len + j).lsrc(2) := vd_old
1518            csBundle(i * len + j).ldest := vd
1519            csBundle(i * len + j).uopIdx := (i * len + j).U
1520          }
1521      }
1522      when(!vsewReg.orR){
1523        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1524      }.elsewhen(vsewReg === VSew.e32){
1525        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1526      }.elsewhen(vsewReg === VSew.e64){
1527        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1528      }.otherwise{
1529        genCsBundle_VEC_RGATHEREI16(1)
1530      }
1531      switch(vlmulReg) {
1532        is("b001".U) {
1533          when(!vsewReg.orR) {
1534            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1535          }.elsewhen(vsewReg === VSew.e32){
1536            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1537          }.elsewhen(vsewReg === VSew.e64){
1538            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1539          }.otherwise{
1540            genCsBundle_VEC_RGATHEREI16(2)
1541          }
1542        }
1543        is("b010".U) {
1544          when(!vsewReg.orR) {
1545            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1546          }.elsewhen(vsewReg === VSew.e32){
1547            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1548          }.elsewhen(vsewReg === VSew.e64){
1549            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1550          }.otherwise{
1551            genCsBundle_VEC_RGATHEREI16(4)
1552          }
1553        }
1554        is("b011".U) {
1555          when(vsewReg === VSew.e32){
1556            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1557          }.elsewhen(vsewReg === VSew.e64){
1558            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1559          }.otherwise{
1560            genCsBundle_VEC_RGATHEREI16(8)
1561          }
1562        }
1563      }
1564    }
1565    is(UopSplitType.VEC_COMPRESS) {
1566      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1567        for (i <- 0 until len) {
1568          val jlen = if (i == len-1) i+1 else i+2
1569          for (j <- 0 until jlen) {
1570            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1571            val vd = if(i==len-1) (dest + j.U) else {
1572              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1573            }
1574            val src13Type = if (j == i+1) DontCare else SrcType.vp
1575            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1576            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1577            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1578            csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp
1579            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1580            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1581            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1582            csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1583            csBundle(i*(i+3)/2 + j).ldest := vd
1584            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1585          }
1586        }
1587      }
1588      switch(vlmulReg) {
1589        is("b001".U ){
1590          genCsBundle_VEC_COMPRESS(2)
1591        }
1592        is("b010".U ){
1593          genCsBundle_VEC_COMPRESS(4)
1594        }
1595        is("b011".U ){
1596          genCsBundle_VEC_COMPRESS(8)
1597        }
1598      }
1599    }
1600    is(UopSplitType.VEC_MVNR) {
1601      for (i <- 0 until MAX_VLMUL) {
1602        csBundle(i).lsrc(0) := src1 + i.U
1603        csBundle(i).lsrc(1) := src2 + i.U
1604        csBundle(i).lsrc(2) := dest + i.U
1605        csBundle(i).ldest := dest + i.U
1606        csBundle(i).uopIdx := i.U
1607      }
1608    }
1609    is(UopSplitType.VEC_US_LDST) {
1610      /*
1611      FMV.D.X
1612       */
1613      csBundle(0).srcType(0) := SrcType.reg
1614      csBundle(0).srcType(1) := SrcType.imm
1615      csBundle(0).lsrc(1) := 0.U
1616      csBundle(0).ldest := FP_TMP_REG_MV.U
1617      csBundle(0).fuType := FuType.i2f.U
1618      csBundle(0).rfWen := false.B
1619      csBundle(0).fpWen := true.B
1620      csBundle(0).vecWen := false.B
1621      csBundle(0).fpu.isAddSub := false.B
1622      csBundle(0).fpu.typeTagIn := FPU.D
1623      csBundle(0).fpu.typeTagOut := FPU.D
1624      csBundle(0).fpu.fromInt := true.B
1625      csBundle(0).fpu.wflags := false.B
1626      csBundle(0).fpu.fpWen := true.B
1627      csBundle(0).fpu.div := false.B
1628      csBundle(0).fpu.sqrt := false.B
1629      csBundle(0).fpu.fcvt := false.B
1630      csBundle(0).vlsInstr := true.B
1631      //LMUL
1632      for (i <- 0 until MAX_VLMUL) {
1633        csBundle(i + 1).srcType(0) := SrcType.fp
1634        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1635        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1636        csBundle(i + 1).ldest := dest + i.U
1637        csBundle(i + 1).uopIdx := i.U
1638        csBundle(i + 1).vlsInstr := true.B
1639      }
1640    }
1641    is(UopSplitType.VEC_S_LDST) {
1642      /*
1643      FMV.D.X
1644       */
1645      csBundle(0).srcType(0) := SrcType.reg
1646      csBundle(0).srcType(1) := SrcType.imm
1647      csBundle(0).lsrc(1) := 0.U
1648      csBundle(0).ldest := FP_TMP_REG_MV.U
1649      csBundle(0).fuType := FuType.i2f.U
1650      csBundle(0).rfWen := false.B
1651      csBundle(0).fpWen := true.B
1652      csBundle(0).vecWen := false.B
1653      csBundle(0).fpu.isAddSub := false.B
1654      csBundle(0).fpu.typeTagIn := FPU.D
1655      csBundle(0).fpu.typeTagOut := FPU.D
1656      csBundle(0).fpu.fromInt := true.B
1657      csBundle(0).fpu.wflags := false.B
1658      csBundle(0).fpu.fpWen := true.B
1659      csBundle(0).fpu.div := false.B
1660      csBundle(0).fpu.sqrt := false.B
1661      csBundle(0).fpu.fcvt := false.B
1662      csBundle(0).vlsInstr := true.B
1663
1664      csBundle(1).srcType(0) := SrcType.reg
1665      csBundle(1).srcType(1) := SrcType.imm
1666      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1667      csBundle(1).lsrc(1) := 0.U
1668      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1669      csBundle(1).fuType := FuType.i2f.U
1670      csBundle(1).rfWen := false.B
1671      csBundle(1).fpWen := true.B
1672      csBundle(1).vecWen := false.B
1673      csBundle(1).fpu.isAddSub := false.B
1674      csBundle(1).fpu.typeTagIn := FPU.D
1675      csBundle(1).fpu.typeTagOut := FPU.D
1676      csBundle(1).fpu.fromInt := true.B
1677      csBundle(1).fpu.wflags := false.B
1678      csBundle(1).fpu.fpWen := true.B
1679      csBundle(1).fpu.div := false.B
1680      csBundle(1).fpu.sqrt := false.B
1681      csBundle(1).fpu.fcvt := false.B
1682      csBundle(1).vlsInstr := true.B
1683
1684      //LMUL
1685      for (i <- 0 until MAX_VLMUL) {
1686        csBundle(i + 2).srcType(0) := SrcType.fp
1687        csBundle(i + 2).srcType(1) := SrcType.fp
1688        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1689        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1690        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1691        csBundle(i + 2).ldest := dest + i.U
1692        csBundle(i + 2).uopIdx := i.U
1693        csBundle(i + 2).vlsInstr := true.B
1694      }
1695    }
1696    is(UopSplitType.VEC_I_LDST) {
1697    /*
1698      FMV.D.X
1699       */
1700      val vlmul = vlmulReg
1701      val vsew = Cat(0.U(1.W), vsewReg)
1702      val veew = Cat(0.U(1.W), width)
1703      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1704      val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array(
1705        "b001".U -> 1.U,
1706        "b010".U -> 2.U,
1707        "b011".U -> 3.U
1708      ))
1709      val simple_emul = MuxLookup(vemul, 0.U(2.W), Array(
1710        "b001".U -> 1.U,
1711        "b010".U -> 2.U,
1712        "b011".U -> 3.U
1713      ))
1714      csBundle(0).srcType(0) := SrcType.reg
1715      csBundle(0).srcType(1) := SrcType.imm
1716      csBundle(0).lsrc(1) := 0.U
1717      csBundle(0).ldest := FP_TMP_REG_MV.U
1718      csBundle(0).fuType := FuType.i2f.U
1719      csBundle(0).rfWen := false.B
1720      csBundle(0).fpWen := true.B
1721      csBundle(0).vecWen := false.B
1722      csBundle(0).fpu.isAddSub := false.B
1723      csBundle(0).fpu.typeTagIn := FPU.D
1724      csBundle(0).fpu.typeTagOut := FPU.D
1725      csBundle(0).fpu.fromInt := true.B
1726      csBundle(0).fpu.wflags := false.B
1727      csBundle(0).fpu.fpWen := true.B
1728      csBundle(0).fpu.div := false.B
1729      csBundle(0).fpu.sqrt := false.B
1730      csBundle(0).fpu.fcvt := false.B
1731      csBundle(0).vlsInstr := true.B
1732
1733      //LMUL
1734      for (i <- 0 until MAX_INDEXED_LS_UOPNUM) {
1735        indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf)
1736        val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1737        val offsetVd = indexedLSRegOffset(i).outOffsetVd
1738        val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd
1739        csBundle(i + 1).srcType(0) := SrcType.fp
1740        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1741        csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1742        /**
1743          * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and
1744          * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same
1745          * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be
1746          * deadlock for indexed instructions with emul > lmul.
1747          *
1748          * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest
1749          * N-1 uops will read temporary vector register.
1750          */
1751        // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1752        csBundle(i + 1).lsrc(2) := Mux(
1753          isFirstUopInVd,
1754          Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)),
1755          VECTOR_TMP_REG_LMUL.U
1756        )
1757        csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1758        csBundle(i + 1).uopIdx := i.U
1759        csBundle(i + 1).vlsInstr := true.B
1760      }
1761    }
1762  }
1763
1764  //readyFromRename Counter
1765  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1766
1767  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1768  val thisAllOut = uopRes <= readyCounter
1769
1770  switch(state) {
1771    is(s_idle) {
1772      when (inValid) {
1773        stateNext := s_active
1774        uopResNext := inUopInfo.numOfUop
1775      }
1776    }
1777    is(s_active) {
1778      when (thisAllOut) {
1779        when (inValid) {
1780          stateNext := s_active
1781          uopResNext := inUopInfo.numOfUop
1782        }.otherwise {
1783          stateNext := s_idle
1784          uopResNext := 0.U
1785        }
1786      }.otherwise {
1787        stateNext := s_active
1788        uopResNext := uopRes - readyCounter
1789      }
1790    }
1791  }
1792
1793  state := Mux(io.redirect, s_idle, stateNext)
1794  uopRes := Mux(io.redirect, 0.U, uopResNext)
1795
1796  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1797
1798  for(i <- 0 until RenameWidth) {
1799    outValids(i) := complexNum > i.U
1800    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1801  }
1802
1803  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1804  inReady := state === s_idle || state === s_active && thisAllOut
1805
1806//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1807//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1808//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1809//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1810//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1811//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1812//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1813//
1814//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1815//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1816//    0.U)
1817//  validToRename.zipWithIndex.foreach{
1818//    case(dst, i) =>
1819//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1820//      dst := MuxCase(false.B, Seq(
1821//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1822//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1823//      ).toSeq)
1824//  }
1825//
1826//  readyToIBuf.zipWithIndex.foreach {
1827//    case (dst, i) =>
1828//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1829//      dst := MuxCase(true.B, Seq(
1830//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1831//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1832//      ).toSeq)
1833//  }
1834//
1835//  io.deq.decodedInsts := decodedInsts
1836//  io.deq.complexNum := complexNum
1837//  io.deq.validToRename := validToRename
1838//  io.deq.readyToIBuf := readyToIBuf
1839}
1840