xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 4aa0028654716f3ef660f985eb6662c6c75b70d0)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(7.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  val outIsFirstUopInVd = IO(Output(Bool()))
43  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={
44    if (lmul * nfields <= 8) {
45      for (k <-0 until nfields) {
46        if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
47          var offset = 1 << (emul - lmul)
48          for (i <- 0 until (1 << emul)) {
49            if (uopIdx == k * (1 << emul) + i) {
50              return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0)
51            }
52          }
53        } else {              // lmul > emul, uop num is depend on lmul * nf
54          var offset = 1 << (lmul - emul)
55          for (i <- 0 until (1 << lmul)) {
56            if (uopIdx == k * (1 << lmul) + i) {
57              return (i / offset, i + k * (1 << lmul), 1)
58            }
59          }
60        }
61      }
62    }
63    return (0, 0, 1)
64  }
65  // strided load/store
66  var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq()
67  for (emul <- 0 until 4) {
68    for (lmul <- 0 until 4) {
69      for (nf <- 0 until 8) {
70        var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx)
71        var offsetVs2 = offset._1
72        var offsetVd = offset._2
73        var isFirstUopInVd = offset._3
74        combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd)
75      }
76    }
77  }
78  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
79    case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) =>
80      (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W)))
81  }, BitPat.N(7)))
82  outOffsetVs2 := out(5, 3)
83  outOffsetVd := out(2, 0)
84  outIsFirstUopInVd := out(6).asBool
85}
86
87trait VectorConstants {
88  val MAX_VLMUL = 8
89  val FP_TMP_REG_MV = 32
90  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
91  val MAX_INDEXED_LS_UOPNUM = 64
92}
93
94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
95  val redirect = Input(Bool())
96  val csrCtrl = Input(new CustomCSRCtrlIO)
97  val vtypeBypass = Input(new VType)
98  // When the first inst in decode vector is complex inst, pass it in
99  val in = Flipped(DecoupledIO(new Bundle {
100    val simpleDecodedInst = new DecodedInst
101    val uopInfo = new UopInfo
102  }))
103  val out = new Bundle {
104    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
105  }
106  val complexNum = Output(UInt(3.W))
107}
108
109/**
110  * @author zly
111  */
112class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
113  val io = IO(new DecodeUnitCompIO)
114
115  // alias
116  private val inReady = io.in.ready
117  private val inValid = io.in.valid
118  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
119  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
120  private val inUopInfo = io.in.bits.uopInfo
121  private val outValids = io.out.complexDecodedInsts.map(_.valid)
122  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
123  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
124  private val outComplexNum = io.complexNum
125
126  val maxUopSize = MaxUopSize
127  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
128    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
129      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
130    }.elsewhen(inInstFields.RS1 === 0.U) {
131      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
132    }
133  }
134
135  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
136  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
137  //input bits
138  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
139
140  val src1 = Cat(0.U(1.W), instFields.RS1)
141  val src2 = Cat(0.U(1.W), instFields.RS2)
142  val dest = Cat(0.U(1.W), instFields.RD)
143
144  val nf    = instFields.NF
145  val width = instFields.WIDTH(1, 0)
146
147  //output of DecodeUnit
148  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
149  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
150  val lmul = Wire(UInt(4.W))
151  val isVsetSimple = Wire(Bool())
152
153  val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i)))
154  indexedLSRegOffset.map(_.src := 0.U)
155
156  //pre decode
157  lmul := latchedUopInfo.lmul
158  isVsetSimple := latchedInst.isVset
159  val vlmulReg = latchedInst.vpu.vlmul
160  val vsewReg = latchedInst.vpu.vsew
161
162  //Type of uop Div
163  val typeOfSplit = latchedInst.uopSplitType
164  val src1Type = latchedInst.srcType(0)
165  val src1IsImm = src1Type === SrcType.imm
166  val src1IsFp = src1Type === SrcType.fp
167
168  numOfUop := latchedUopInfo.numOfUop
169  numOfWB := latchedUopInfo.numOfWB
170
171  //uops dispatch
172  val s_idle :: s_active :: Nil = Enum(2)
173  val state = RegInit(s_idle)
174  val stateNext = WireDefault(state)
175  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
176  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
177  val uopResNext = WireInit(uopRes)
178  val e64 = 3.U(2.W)
179  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
180  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
181  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
182
183  //uop div up to maxUopSize
184  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
185  csBundle.foreach { case dst =>
186    dst := latchedInst
187    dst.numUops := latchedUopInfo.numOfUop
188    dst.numWB := latchedUopInfo.numOfWB
189    dst.firstUop := false.B
190    dst.lastUop := false.B
191    dst.vlsInstr := false.B
192  }
193
194  csBundle(0).firstUop := true.B
195  csBundle(numOfUop - 1.U).lastUop := true.B
196
197  switch(typeOfSplit) {
198    is(UopSplitType.VSET) {
199      // In simple decoder, rfWen and vecWen are not set
200      when(isVsetSimple) {
201        // Default
202        // uop0 set rd, never flushPipe
203        csBundle(0).fuType := FuType.vsetiwi.U
204        csBundle(0).flushPipe := false.B
205        csBundle(0).rfWen := true.B
206        // uop1 set vl, vsetvl will flushPipe
207        csBundle(1).ldest := VCONFIG_IDX.U
208        csBundle(1).vecWen := true.B
209        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
210          // write nothing, uop0 is a nop instruction
211          csBundle(0).rfWen := false.B
212          csBundle(0).fpWen := false.B
213          csBundle(0).vecWen := false.B
214          csBundle(1).fuType := FuType.vsetfwf.U
215          csBundle(1).srcType(0) := SrcType.vp
216          csBundle(1).lsrc(0) := VCONFIG_IDX.U
217        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
218          // uop0: mv vtype gpr to vector region
219          csBundle(0).srcType(0) := SrcType.xp
220          csBundle(0).srcType(1) := SrcType.no
221          csBundle(0).lsrc(1) := 0.U
222          csBundle(0).ldest := FP_TMP_REG_MV.U
223          csBundle(0).fuType := FuType.i2v.U
224          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
225          csBundle(0).rfWen := false.B
226          csBundle(0).fpWen := true.B
227          csBundle(0).vecWen := false.B
228          csBundle(0).flushPipe := false.B
229          // uop1: uvsetvcfg_vv
230          csBundle(1).fuType := FuType.vsetfwf.U
231          // vl
232          csBundle(1).srcType(0) := SrcType.vp
233          csBundle(1).lsrc(0) := VCONFIG_IDX.U
234          // vtype
235          csBundle(1).srcType(1) := SrcType.fp
236          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
237          csBundle(1).vecWen := true.B
238          csBundle(1).ldest := VCONFIG_IDX.U
239        }
240        // use bypass vtype from vtypeGen
241        csBundle(0).vpu.connectVType(io.vtypeBypass)
242        csBundle(1).vpu.connectVType(io.vtypeBypass)
243      }
244    }
245    is(UopSplitType.VEC_VVV) {
246      for (i <- 0 until MAX_VLMUL) {
247        csBundle(i).lsrc(0) := src1 + i.U
248        csBundle(i).lsrc(1) := src2 + i.U
249        csBundle(i).lsrc(2) := dest + i.U
250        csBundle(i).ldest := dest + i.U
251        csBundle(i).uopIdx := i.U
252      }
253    }
254    is(UopSplitType.VEC_VFV) {
255      /*
256      i to vector move
257       */
258      csBundle(0).srcType(0) := SrcType.fp
259      csBundle(0).srcType(1) := SrcType.imm
260      csBundle(0).lsrc(1) := 0.U
261      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
262      csBundle(0).fuType := FuType.f2v.U
263      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
264      csBundle(0).vecWen := true.B
265      csBundle(0).vpu.isReverse := false.B
266      /*
267      LMUL
268       */
269      for (i <- 0 until MAX_VLMUL) {
270        csBundle(i + 1).srcType(0) := SrcType.vp
271        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
272        csBundle(i + 1).lsrc(1) := src2 + i.U
273        csBundle(i + 1).lsrc(2) := dest + i.U
274        csBundle(i + 1).ldest := dest + i.U
275        csBundle(i + 1).uopIdx := i.U
276      }
277    }
278    is(UopSplitType.VEC_EXT2) {
279      for (i <- 0 until MAX_VLMUL / 2) {
280        csBundle(2 * i).lsrc(1) := src2 + i.U
281        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
282        csBundle(2 * i).ldest := dest + (2 * i).U
283        csBundle(2 * i).uopIdx := (2 * i).U
284        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
285        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
286        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
287        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
288      }
289    }
290    is(UopSplitType.VEC_EXT4) {
291      for (i <- 0 until MAX_VLMUL / 4) {
292        csBundle(4 * i).lsrc(1) := src2 + i.U
293        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
294        csBundle(4 * i).ldest := dest + (4 * i).U
295        csBundle(4 * i).uopIdx := (4 * i).U
296        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
297        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
298        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
299        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
300        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
301        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
302        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
303        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
304        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
305        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
306        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
307        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
308      }
309    }
310    is(UopSplitType.VEC_EXT8) {
311      for (i <- 0 until MAX_VLMUL) {
312        csBundle(i).lsrc(1) := src2
313        csBundle(i).lsrc(2) := dest + i.U
314        csBundle(i).ldest := dest + i.U
315        csBundle(i).uopIdx := i.U
316      }
317    }
318    is(UopSplitType.VEC_0XV) {
319      /*
320      i/f to vector move
321       */
322      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
323      csBundle(0).srcType(1) := SrcType.imm
324      csBundle(0).lsrc(1) := 0.U
325      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
326      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
327      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
328      csBundle(0).rfWen := false.B
329      csBundle(0).fpWen := false.B
330      csBundle(0).vecWen := true.B
331      /*
332      vmv.s.x
333       */
334      csBundle(1).srcType(0) := SrcType.vp
335      csBundle(1).srcType(1) := SrcType.imm
336      csBundle(1).srcType(2) := SrcType.vp
337      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
338      csBundle(1).lsrc(1) := 0.U
339      csBundle(1).lsrc(2) := dest
340      csBundle(1).ldest := dest
341      csBundle(1).rfWen := false.B
342      csBundle(1).fpWen := false.B
343      csBundle(1).vecWen := true.B
344      csBundle(1).uopIdx := 0.U
345    }
346    is(UopSplitType.VEC_VXV) {
347      /*
348      i to vector move
349       */
350      csBundle(0).srcType(0) := SrcType.reg
351      csBundle(0).srcType(1) := SrcType.imm
352      csBundle(0).lsrc(1) := 0.U
353      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
354      csBundle(0).fuType := FuType.i2v.U
355      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
356      csBundle(0).vecWen := true.B
357      csBundle(0).vpu.isReverse := false.B
358      /*
359      LMUL
360       */
361      for (i <- 0 until MAX_VLMUL) {
362        csBundle(i + 1).srcType(0) := SrcType.vp
363        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
364        csBundle(i + 1).lsrc(1) := src2 + i.U
365        csBundle(i + 1).lsrc(2) := dest + i.U
366        csBundle(i + 1).ldest := dest + i.U
367        csBundle(i + 1).uopIdx := i.U
368      }
369    }
370    is(UopSplitType.VEC_VVW) {
371      for (i <- 0 until MAX_VLMUL / 2) {
372        csBundle(2 * i).lsrc(0) := src1 + i.U
373        csBundle(2 * i).lsrc(1) := src2 + i.U
374        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
375        csBundle(2 * i).ldest := dest + (2 * i).U
376        csBundle(2 * i).uopIdx := (2 * i).U
377        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
378        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
379        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
380        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
381        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
382      }
383    }
384    is(UopSplitType.VEC_VFW) {
385      /*
386      f to vector move
387       */
388      csBundle(0).srcType(0) := SrcType.fp
389      csBundle(0).srcType(1) := SrcType.imm
390      csBundle(0).lsrc(1) := 0.U
391      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
392      csBundle(0).fuType := FuType.f2v.U
393      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
394      csBundle(0).rfWen := false.B
395      csBundle(0).fpWen := false.B
396      csBundle(0).vecWen := true.B
397
398      for (i <- 0 until MAX_VLMUL / 2) {
399        csBundle(2 * i + 1).srcType(0) := SrcType.vp
400        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
401        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
402        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
403        csBundle(2 * i + 1).ldest := dest + (2 * i).U
404        csBundle(2 * i + 1).uopIdx := (2 * i).U
405        csBundle(2 * i + 2).srcType(0) := SrcType.vp
406        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
407        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
408        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
409        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
410        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
411      }
412    }
413    is(UopSplitType.VEC_WVW) {
414      for (i <- 0 until MAX_VLMUL / 2) {
415        csBundle(2 * i).lsrc(0) := src1 + i.U
416        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
417        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
418        csBundle(2 * i).ldest := dest + (2 * i).U
419        csBundle(2 * i).uopIdx := (2 * i).U
420        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
421        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
422        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
423        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
424        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
425      }
426    }
427    is(UopSplitType.VEC_VXW) {
428      /*
429      i to vector move
430       */
431      csBundle(0).srcType(0) := SrcType.reg
432      csBundle(0).srcType(1) := SrcType.imm
433      csBundle(0).lsrc(1) := 0.U
434      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
435      csBundle(0).fuType := FuType.i2v.U
436      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
437      csBundle(0).vecWen := true.B
438
439      for (i <- 0 until MAX_VLMUL / 2) {
440        csBundle(2 * i + 1).srcType(0) := SrcType.vp
441        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
442        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
443        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
444        csBundle(2 * i + 1).ldest := dest + (2 * i).U
445        csBundle(2 * i + 1).uopIdx := (2 * i).U
446        csBundle(2 * i + 2).srcType(0) := SrcType.vp
447        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
448        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
449        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
450        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
451        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
452      }
453    }
454    is(UopSplitType.VEC_WXW) {
455      /*
456      i to vector move
457       */
458      csBundle(0).srcType(0) := SrcType.reg
459      csBundle(0).srcType(1) := SrcType.imm
460      csBundle(0).lsrc(1) := 0.U
461      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
462      csBundle(0).fuType := FuType.i2v.U
463      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
464      csBundle(0).vecWen := true.B
465
466      for (i <- 0 until MAX_VLMUL / 2) {
467        csBundle(2 * i + 1).srcType(0) := SrcType.vp
468        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
469        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
470        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
471        csBundle(2 * i + 1).ldest := dest + (2 * i).U
472        csBundle(2 * i + 1).uopIdx := (2 * i).U
473        csBundle(2 * i + 2).srcType(0) := SrcType.vp
474        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
475        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
476        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
477        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
478        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
479      }
480    }
481    is(UopSplitType.VEC_WVV) {
482      for (i <- 0 until MAX_VLMUL / 2) {
483
484        csBundle(2 * i).lsrc(0) := src1 + i.U
485        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
486        csBundle(2 * i).lsrc(2) := dest + i.U
487        csBundle(2 * i).ldest := dest + i.U
488        csBundle(2 * i).uopIdx := (2 * i).U
489        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
490        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
491        csBundle(2 * i + 1).lsrc(2) := dest + i.U
492        csBundle(2 * i + 1).ldest := dest + i.U
493        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
494      }
495    }
496    is(UopSplitType.VEC_WFW) {
497      /*
498      f to vector move
499       */
500      csBundle(0).srcType(0) := SrcType.fp
501      csBundle(0).srcType(1) := SrcType.imm
502      csBundle(0).lsrc(1) := 0.U
503      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
504      csBundle(0).fuType := FuType.f2v.U
505      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
506      csBundle(0).rfWen := false.B
507      csBundle(0).fpWen := false.B
508      csBundle(0).vecWen := true.B
509
510      for (i <- 0 until MAX_VLMUL / 2) {
511        csBundle(2 * i + 1).srcType(0) := SrcType.vp
512        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
513        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
514        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
515        csBundle(2 * i + 1).ldest := dest + (2 * i).U
516        csBundle(2 * i + 1).uopIdx := (2 * i).U
517        csBundle(2 * i + 2).srcType(0) := SrcType.vp
518        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
519        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
520        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
521        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
522        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
523      }
524    }
525    is(UopSplitType.VEC_WXV) {
526      /*
527      i to vector move
528       */
529      csBundle(0).srcType(0) := SrcType.reg
530      csBundle(0).srcType(1) := SrcType.imm
531      csBundle(0).lsrc(1) := 0.U
532      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
533      csBundle(0).fuType := FuType.i2v.U
534      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
535      csBundle(0).vecWen := true.B
536
537      for (i <- 0 until MAX_VLMUL / 2) {
538        csBundle(2 * i + 1).srcType(0) := SrcType.vp
539        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
540        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
541        csBundle(2 * i + 1).lsrc(2) := dest + i.U
542        csBundle(2 * i + 1).ldest := dest + i.U
543        csBundle(2 * i + 1).uopIdx := (2 * i).U
544        csBundle(2 * i + 2).srcType(0) := SrcType.vp
545        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
546        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
547        csBundle(2 * i + 2).lsrc(2) := dest + i.U
548        csBundle(2 * i + 2).ldest := dest + i.U
549        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
550      }
551    }
552    is(UopSplitType.VEC_VVM) {
553      csBundle(0).lsrc(2) := dest
554      csBundle(0).ldest := dest
555      csBundle(0).uopIdx := 0.U
556      for (i <- 1 until MAX_VLMUL) {
557        csBundle(i).lsrc(0) := src1 + i.U
558        csBundle(i).lsrc(1) := src2 + i.U
559        csBundle(i).lsrc(2) := dest
560        csBundle(i).ldest := dest
561        csBundle(i).uopIdx := i.U
562      }
563    }
564    is(UopSplitType.VEC_VFM) {
565      /*
566      f to vector move
567       */
568      csBundle(0).srcType(0) := SrcType.fp
569      csBundle(0).srcType(1) := SrcType.imm
570      csBundle(0).lsrc(1) := 0.U
571      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
572      csBundle(0).fuType := FuType.f2v.U
573      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
574      csBundle(0).rfWen := false.B
575      csBundle(0).fpWen := false.B
576      csBundle(0).vecWen := true.B
577      //LMUL
578      csBundle(1).srcType(0) := SrcType.vp
579      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
580      csBundle(1).lsrc(2) := dest
581      csBundle(1).ldest := dest
582      csBundle(1).uopIdx := 0.U
583      for (i <- 1 until MAX_VLMUL) {
584        csBundle(i + 1).srcType(0) := SrcType.vp
585        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
586        csBundle(i + 1).lsrc(1) := src2 + i.U
587        csBundle(i + 1).lsrc(2) := dest
588        csBundle(i + 1).ldest := dest
589        csBundle(i + 1).uopIdx := i.U
590      }
591      csBundle(numOfUop - 1.U).ldest := dest
592    }
593    is(UopSplitType.VEC_VXM) {
594      /*
595      i to vector move
596       */
597      csBundle(0).srcType(0) := SrcType.reg
598      csBundle(0).srcType(1) := SrcType.imm
599      csBundle(0).lsrc(1) := 0.U
600      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
601      csBundle(0).fuType := FuType.i2v.U
602      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
603      csBundle(0).vecWen := true.B
604      //LMUL
605      csBundle(1).srcType(0) := SrcType.vp
606      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
607      csBundle(1).lsrc(2) := dest
608      csBundle(1).ldest := dest
609      csBundle(1).uopIdx := 0.U
610      for (i <- 1 until MAX_VLMUL) {
611        csBundle(i + 1).srcType(0) := SrcType.vp
612        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
613        csBundle(i + 1).lsrc(1) := src2 + i.U
614        csBundle(i + 1).lsrc(2) := dest
615        csBundle(i + 1).ldest := dest
616        csBundle(i + 1).uopIdx := i.U
617      }
618      csBundle(numOfUop - 1.U).ldest := dest
619    }
620    is(UopSplitType.VEC_SLIDE1UP) {
621      /*
622      i to vector move
623       */
624      csBundle(0).srcType(0) := SrcType.reg
625      csBundle(0).srcType(1) := SrcType.imm
626      csBundle(0).lsrc(1) := 0.U
627      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
628      csBundle(0).fuType := FuType.i2v.U
629      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
630      csBundle(0).vecWen := true.B
631      //LMUL
632      csBundle(1).srcType(0) := SrcType.vp
633      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
634      csBundle(1).lsrc(2) := dest
635      csBundle(1).ldest := dest
636      csBundle(1).uopIdx := 0.U
637      for (i <- 1 until MAX_VLMUL) {
638        csBundle(i + 1).srcType(0) := SrcType.vp
639        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
640        csBundle(i + 1).lsrc(1) := src2 + i.U
641        csBundle(i + 1).lsrc(2) := dest + i.U
642        csBundle(i + 1).ldest := dest + i.U
643        csBundle(i + 1).uopIdx := i.U
644      }
645    }
646    is(UopSplitType.VEC_FSLIDE1UP) {
647      /*
648      i to vector move
649       */
650      csBundle(0).srcType(0) := SrcType.fp
651      csBundle(0).srcType(1) := SrcType.imm
652      csBundle(0).lsrc(1) := 0.U
653      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
654      csBundle(0).fuType := FuType.f2v.U
655      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
656      csBundle(0).rfWen := false.B
657      csBundle(0).fpWen := false.B
658      csBundle(0).vecWen := true.B
659      //LMUL
660      csBundle(1).srcType(0) := SrcType.vp
661      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
662      csBundle(1).lsrc(1) := src2
663      csBundle(1).lsrc(2) := dest
664      csBundle(1).ldest := dest
665      csBundle(1).uopIdx := 0.U
666      for (i <- 1 until MAX_VLMUL) {
667        csBundle(i + 1).srcType(0) := SrcType.vp
668        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
669        csBundle(i + 1).lsrc(1) := src2 + i.U
670        csBundle(i + 1).lsrc(2) := dest + i.U
671        csBundle(i + 1).ldest := dest + i.U
672        csBundle(i + 1).uopIdx := i.U
673      }
674    }
675    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
676      /*
677      i to vector move
678       */
679      csBundle(0).srcType(0) := SrcType.reg
680      csBundle(0).srcType(1) := SrcType.imm
681      csBundle(0).lsrc(1) := 0.U
682      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
683      csBundle(0).fuType := FuType.i2v.U
684      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
685      csBundle(0).vecWen := true.B
686      //LMUL
687      for (i <- 0 until MAX_VLMUL) {
688        csBundle(2 * i + 1).srcType(0) := SrcType.vp
689        csBundle(2 * i + 1).srcType(1) := SrcType.vp
690        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
691        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
692        csBundle(2 * i + 1).lsrc(2) := dest + i.U
693        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
694        csBundle(2 * i + 1).uopIdx := (2 * i).U
695        if (2 * i + 2 < MAX_VLMUL * 2) {
696          csBundle(2 * i + 2).srcType(0) := SrcType.vp
697          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
698          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
699          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
700          csBundle(2 * i + 2).ldest := dest + i.U
701          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
702        }
703      }
704      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
705      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
706      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
707    }
708    is(UopSplitType.VEC_FSLIDE1DOWN) {
709      /*
710      i to vector move
711       */
712      csBundle(0).srcType(0) := SrcType.fp
713      csBundle(0).srcType(1) := SrcType.imm
714      csBundle(0).lsrc(1) := 0.U
715      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
716      csBundle(0).fuType := FuType.f2v.U
717      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
718      csBundle(0).rfWen := false.B
719      csBundle(0).fpWen := false.B
720      csBundle(0).vecWen := true.B
721      //LMUL
722      for (i <- 0 until MAX_VLMUL) {
723        csBundle(2 * i + 1).srcType(0) := SrcType.vp
724        csBundle(2 * i + 1).srcType(1) := SrcType.vp
725        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
726        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
727        csBundle(2 * i + 1).lsrc(2) := dest + i.U
728        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
729        csBundle(2 * i + 1).uopIdx := (2 * i).U
730        if (2 * i + 2 < MAX_VLMUL * 2) {
731          csBundle(2 * i + 2).srcType(0) := SrcType.vp
732          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
733          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
734          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
735          csBundle(2 * i + 2).ldest := dest + i.U
736          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
737        }
738      }
739      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
740      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
741      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
742    }
743    is(UopSplitType.VEC_VRED) {
744      when(vlmulReg === "b001".U) {
745        csBundle(0).srcType(2) := SrcType.DC
746        csBundle(0).lsrc(0) := src2 + 1.U
747        csBundle(0).lsrc(1) := src2
748        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
749        csBundle(0).uopIdx := 0.U
750      }
751      when(vlmulReg === "b010".U) {
752        csBundle(0).srcType(2) := SrcType.DC
753        csBundle(0).lsrc(0) := src2 + 1.U
754        csBundle(0).lsrc(1) := src2
755        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
756        csBundle(0).uopIdx := 0.U
757
758        csBundle(1).srcType(2) := SrcType.DC
759        csBundle(1).lsrc(0) := src2 + 3.U
760        csBundle(1).lsrc(1) := src2 + 2.U
761        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
762        csBundle(1).uopIdx := 1.U
763
764        csBundle(2).srcType(2) := SrcType.DC
765        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
766        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
767        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
768        csBundle(2).uopIdx := 2.U
769      }
770      when(vlmulReg === "b011".U) {
771        for (i <- 0 until MAX_VLMUL) {
772          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
773            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
774            csBundle(i).lsrc(1) := src2 + (i * 2).U
775            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
776          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
777            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
778            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
779            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
780          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
781            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
782            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
783            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
784          }
785          csBundle(i).srcType(2) := SrcType.DC
786          csBundle(i).uopIdx := i.U
787        }
788      }
789      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
790        /*
791         * 2 <= vlmul <= 8
792         */
793        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
794        csBundle(numOfUop - 1.U).lsrc(0) := src1
795        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
796        csBundle(numOfUop - 1.U).lsrc(2) := dest
797        csBundle(numOfUop - 1.U).ldest := dest
798        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
799      }
800    }
801    is(UopSplitType.VEC_VFRED) {
802      val vlmul = vlmulReg
803      val vsew = vsewReg
804      when(vlmul === VLmul.m8){
805        for (i <- 0 until 4) {
806          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
807          csBundle(i).lsrc(1) := src2 + (i * 2).U
808          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
809          csBundle(i).uopIdx := i.U
810        }
811        for (i <- 4 until 6) {
812          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
813          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
814          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
815          csBundle(i).uopIdx := i.U
816        }
817        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
818        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
819        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
820        csBundle(6).uopIdx := 6.U
821        when(vsew === VSew.e64) {
822          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
823          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
824          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
825          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
826          csBundle(7).uopIdx := 7.U
827          csBundle(8).lsrc(0) := src1
828          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
829          csBundle(8).ldest := dest
830          csBundle(8).uopIdx := 8.U
831        }
832        when(vsew === VSew.e32) {
833          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
834          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
835          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
836          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
837          csBundle(7).uopIdx := 7.U
838          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
839          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
840          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
841          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
842          csBundle(8).uopIdx := 8.U
843          csBundle(9).lsrc(0) := src1
844          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
845          csBundle(9).ldest := dest
846          csBundle(9).uopIdx := 9.U
847        }
848        when(vsew === VSew.e16) {
849          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
850          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
851          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
852          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
853          csBundle(7).uopIdx := 7.U
854          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
855          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
856          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
857          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
858          csBundle(8).uopIdx := 8.U
859          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
860          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
861          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
862          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
863          csBundle(9).uopIdx := 9.U
864          csBundle(10).lsrc(0) := src1
865          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
866          csBundle(10).ldest := dest
867          csBundle(10).uopIdx := 10.U
868        }
869      }
870      when(vlmul === VLmul.m4) {
871        for (i <- 0 until 2) {
872          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
873          csBundle(i).lsrc(1) := src2 + (i * 2).U
874          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
875          csBundle(i).uopIdx := i.U
876        }
877        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
878        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
879        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
880        csBundle(2).uopIdx := 2.U
881        when(vsew === VSew.e64) {
882          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
883          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
884          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
885          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
886          csBundle(3).uopIdx := 3.U
887          csBundle(4).lsrc(0) := src1
888          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
889          csBundle(4).ldest := dest
890          csBundle(4).uopIdx := 4.U
891        }
892        when(vsew === VSew.e32) {
893          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
894          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
895          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
896          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
897          csBundle(3).uopIdx := 3.U
898          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
899          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
900          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
901          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
902          csBundle(4).uopIdx := 4.U
903          csBundle(5).lsrc(0) := src1
904          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
905          csBundle(5).ldest := dest
906          csBundle(5).uopIdx := 5.U
907        }
908        when(vsew === VSew.e16) {
909          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
910          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
911          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
912          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
913          csBundle(3).uopIdx := 3.U
914          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
915          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
916          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
917          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
918          csBundle(4).uopIdx := 4.U
919          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
920          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
921          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
922          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
923          csBundle(5).uopIdx := 5.U
924          csBundle(6).lsrc(0) := src1
925          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
926          csBundle(6).ldest := dest
927          csBundle(6).uopIdx := 6.U
928        }
929      }
930      when(vlmul === VLmul.m2) {
931        csBundle(0).lsrc(0) := src2 + 1.U
932        csBundle(0).lsrc(1) := src2 + 0.U
933        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
934        csBundle(0).uopIdx := 0.U
935        when(vsew === VSew.e64) {
936          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
937          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
938          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
939          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
940          csBundle(1).uopIdx := 1.U
941          csBundle(2).lsrc(0) := src1
942          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
943          csBundle(2).ldest := dest
944          csBundle(2).uopIdx := 2.U
945        }
946        when(vsew === VSew.e32) {
947          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
948          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
949          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
950          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
951          csBundle(1).uopIdx := 1.U
952          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
953          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
954          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
955          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
956          csBundle(2).uopIdx := 2.U
957          csBundle(3).lsrc(0) := src1
958          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
959          csBundle(3).ldest := dest
960          csBundle(3).uopIdx := 3.U
961        }
962        when(vsew === VSew.e16) {
963          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
964          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
965          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
966          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
967          csBundle(1).uopIdx := 1.U
968          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
969          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
970          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
971          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
972          csBundle(2).uopIdx := 2.U
973          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
974          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
975          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
976          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
977          csBundle(3).uopIdx := 3.U
978          csBundle(4).lsrc(0) := src1
979          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
980          csBundle(4).ldest := dest
981          csBundle(4).uopIdx := 4.U
982        }
983      }
984      when(vlmul === VLmul.m1) {
985        when(vsew === VSew.e64) {
986          csBundle(0).lsrc(0) := src2
987          csBundle(0).lsrc(1) := src2
988          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
989          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
990          csBundle(0).uopIdx := 0.U
991          csBundle(1).lsrc(0) := src1
992          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
993          csBundle(1).ldest := dest
994          csBundle(1).uopIdx := 1.U
995        }
996        when(vsew === VSew.e32) {
997          csBundle(0).lsrc(0) := src2
998          csBundle(0).lsrc(1) := src2
999          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1000          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1001          csBundle(0).uopIdx := 0.U
1002          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1003          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1004          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1005          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1006          csBundle(1).uopIdx := 1.U
1007          csBundle(2).lsrc(0) := src1
1008          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1009          csBundle(2).ldest := dest
1010          csBundle(2).uopIdx := 2.U
1011        }
1012        when(vsew === VSew.e16) {
1013          csBundle(0).lsrc(0) := src2
1014          csBundle(0).lsrc(1) := src2
1015          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1016          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1017          csBundle(0).uopIdx := 0.U
1018          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1019          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1020          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1021          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1022          csBundle(1).uopIdx := 1.U
1023          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1024          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1025          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1026          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1027          csBundle(2).uopIdx := 2.U
1028          csBundle(3).lsrc(0) := src1
1029          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1030          csBundle(3).ldest := dest
1031          csBundle(3).uopIdx := 3.U
1032        }
1033      }
1034      when(vlmul === VLmul.mf2) {
1035        when(vsew === VSew.e32) {
1036          csBundle(0).lsrc(0) := src2
1037          csBundle(0).lsrc(1) := src2
1038          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1039          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1040          csBundle(0).uopIdx := 0.U
1041          csBundle(1).lsrc(0) := src1
1042          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1043          csBundle(1).ldest := dest
1044          csBundle(1).uopIdx := 1.U
1045        }
1046        when(vsew === VSew.e16) {
1047          csBundle(0).lsrc(0) := src2
1048          csBundle(0).lsrc(1) := src2
1049          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1050          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1051          csBundle(0).uopIdx := 0.U
1052          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1053          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1054          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1055          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1056          csBundle(1).uopIdx := 1.U
1057          csBundle(2).lsrc(0) := src1
1058          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1059          csBundle(2).ldest := dest
1060          csBundle(2).uopIdx := 2.U
1061        }
1062      }
1063      when(vlmul === VLmul.mf4) {
1064        when(vsew === VSew.e16) {
1065          csBundle(0).lsrc(0) := src2
1066          csBundle(0).lsrc(1) := src2
1067          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1068          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1069          csBundle(0).uopIdx := 0.U
1070          csBundle(1).lsrc(0) := src1
1071          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1072          csBundle(1).ldest := dest
1073          csBundle(1).uopIdx := 1.U
1074        }
1075      }
1076    }
1077
1078    is(UopSplitType.VEC_VFREDOSUM) {
1079      import yunsuan.VfaluType
1080      val vlmul = vlmulReg
1081      val vsew = vsewReg
1082      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1083      when(vlmul === VLmul.m8) {
1084        when(vsew === VSew.e64) {
1085          val vlmax = 16
1086          for (i <- 0 until vlmax) {
1087            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1088            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1089            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1090            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1091            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1092            csBundle(i).uopIdx := i.U
1093          }
1094        }
1095        when(vsew === VSew.e32) {
1096          val vlmax = 32
1097          for (i <- 0 until vlmax) {
1098            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1099            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1100            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1101            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1102            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1103            csBundle(i).uopIdx := i.U
1104          }
1105        }
1106        when(vsew === VSew.e16) {
1107          val vlmax = 64
1108          for (i <- 0 until vlmax) {
1109            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1110            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1111            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1112            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1113            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1114            csBundle(i).uopIdx := i.U
1115          }
1116        }
1117      }
1118      when(vlmul === VLmul.m4) {
1119        when(vsew === VSew.e64) {
1120          val vlmax = 8
1121          for (i <- 0 until vlmax) {
1122            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1123            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1124            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1125            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1126            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1127            csBundle(i).uopIdx := i.U
1128          }
1129        }
1130        when(vsew === VSew.e32) {
1131          val vlmax = 16
1132          for (i <- 0 until vlmax) {
1133            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1134            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1135            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1136            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1137            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1138            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1139            csBundle(i).uopIdx := i.U
1140          }
1141        }
1142        when(vsew === VSew.e16) {
1143          val vlmax = 32
1144          for (i <- 0 until vlmax) {
1145            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1146            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1149            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1150            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1151            csBundle(i).uopIdx := i.U
1152          }
1153        }
1154      }
1155      when(vlmul === VLmul.m2) {
1156        when(vsew === VSew.e64) {
1157          val vlmax = 4
1158          for (i <- 0 until vlmax) {
1159            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1161            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1162            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1163            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1164            csBundle(i).uopIdx := i.U
1165          }
1166        }
1167        when(vsew === VSew.e32) {
1168          val vlmax = 8
1169          for (i <- 0 until vlmax) {
1170            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1172            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1174            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1175            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1176            csBundle(i).uopIdx := i.U
1177          }
1178        }
1179        when(vsew === VSew.e16) {
1180          val vlmax = 16
1181          for (i <- 0 until vlmax) {
1182            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1183            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1185            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1186            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1187            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1188            csBundle(i).uopIdx := i.U
1189          }
1190        }
1191      }
1192      when(vlmul === VLmul.m1) {
1193        when(vsew === VSew.e64) {
1194          val vlmax = 2
1195          for (i <- 0 until vlmax) {
1196            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1197            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1198            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1199            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1200            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1201            csBundle(i).uopIdx := i.U
1202          }
1203        }
1204        when(vsew === VSew.e32) {
1205          val vlmax = 4
1206          for (i <- 0 until vlmax) {
1207            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1208            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1209            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1211            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1212            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1213            csBundle(i).uopIdx := i.U
1214          }
1215        }
1216        when(vsew === VSew.e16) {
1217          val vlmax = 8
1218          for (i <- 0 until vlmax) {
1219            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1220            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1223            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1224            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1225            csBundle(i).uopIdx := i.U
1226          }
1227        }
1228      }
1229      when(vlmul === VLmul.mf2) {
1230        when(vsew === VSew.e32) {
1231          val vlmax = 2
1232          for (i <- 0 until vlmax) {
1233            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1235            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1236            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1237            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1238            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1239            csBundle(i).uopIdx := i.U
1240          }
1241        }
1242        when(vsew === VSew.e16) {
1243          val vlmax = 4
1244          for (i <- 0 until vlmax) {
1245            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1248            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1249            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1250            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1251            csBundle(i).uopIdx := i.U
1252          }
1253        }
1254      }
1255      when(vlmul === VLmul.mf4) {
1256        when(vsew === VSew.e16) {
1257          val vlmax = 2
1258          for (i <- 0 until vlmax) {
1259            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1261            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1262            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1263            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1264            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1265            csBundle(i).uopIdx := i.U
1266          }
1267        }
1268      }
1269    }
1270
1271    is(UopSplitType.VEC_SLIDEUP) {
1272      // i to vector move
1273      csBundle(0).srcType(0) := SrcType.reg
1274      csBundle(0).srcType(1) := SrcType.imm
1275      csBundle(0).lsrc(1) := 0.U
1276      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1277      csBundle(0).fuType := FuType.i2v.U
1278      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1279      csBundle(0).vecWen := true.B
1280      // LMUL
1281      for (i <- 0 until MAX_VLMUL)
1282        for (j <- 0 to i) {
1283          val old_vd = if (j == 0) {
1284            dest + i.U
1285          } else (VECTOR_TMP_REG_LMUL + j).U
1286          val vd = if (j == i) {
1287            dest + i.U
1288          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1289          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1290          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1291          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1292          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1293          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1294          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1295        }
1296    }
1297
1298    is(UopSplitType.VEC_SLIDEDOWN) {
1299      // i to vector move
1300      csBundle(0).srcType(0) := SrcType.reg
1301      csBundle(0).srcType(1) := SrcType.imm
1302      csBundle(0).lsrc(1) := 0.U
1303      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1304      csBundle(0).fuType := FuType.i2v.U
1305      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1306      csBundle(0).vecWen := true.B
1307      // LMUL
1308      for (i <- 0 until MAX_VLMUL)
1309        for (j <- (0 to i).reverse) {
1310          when(i.U < lmul) {
1311            val old_vd = if (j == 0) {
1312              dest + lmul - 1.U - i.U
1313            } else (VECTOR_TMP_REG_LMUL + j).U
1314            val vd = if (j == i) {
1315              dest + lmul - 1.U - i.U
1316            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1317            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1318            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1319            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1320            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1321            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1322            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1323          }
1324        }
1325    }
1326
1327    is(UopSplitType.VEC_M0X) {
1328      // LMUL
1329      for (i <- 0 until MAX_VLMUL) {
1330        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1331        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1332        csBundle(i).srcType(0) := srcType0
1333        csBundle(i).srcType(1) := SrcType.vp
1334        csBundle(i).rfWen := false.B
1335        csBundle(i).fpWen := false.B
1336        csBundle(i).vecWen := true.B
1337        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1338        csBundle(i).lsrc(1) := src2
1339        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1340        csBundle(i).ldest := ldest
1341        csBundle(i).uopIdx := i.U
1342      }
1343      csBundle(lmul - 1.U).rfWen := true.B
1344      csBundle(lmul - 1.U).fpWen := false.B
1345      csBundle(lmul - 1.U).vecWen := false.B
1346      csBundle(lmul - 1.U).ldest := dest
1347    }
1348
1349    is(UopSplitType.VEC_MVV) {
1350      // LMUL
1351      for (i <- 0 until MAX_VLMUL) {
1352        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1353        csBundle(i * 2 + 0).srcType(0) := srcType0
1354        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1355        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1356        csBundle(i * 2 + 0).lsrc(1) := src2
1357        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1358        csBundle(i * 2 + 0).ldest := dest + i.U
1359        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1360
1361        csBundle(i * 2 + 1).srcType(0) := srcType0
1362        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1363        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1364        csBundle(i * 2 + 1).lsrc(1) := src2
1365        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1366        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1367        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1368      }
1369    }
1370
1371    is(UopSplitType.VEC_M0X_VFIRST) {
1372      // LMUL
1373      csBundle(0).rfWen := true.B
1374      csBundle(0).fpWen := false.B
1375      csBundle(0).vecWen := false.B
1376      csBundle(0).ldest := dest
1377    }
1378    is(UopSplitType.VEC_VWW) {
1379      for (i <- 0 until MAX_VLMUL*2) {
1380        when(i.U < lmul){
1381          csBundle(i).srcType(2) := SrcType.DC
1382          csBundle(i).lsrc(0) := src2 + i.U
1383          csBundle(i).lsrc(1) := src2 + i.U
1384          // csBundle(i).lsrc(2) := dest + (2 * i).U
1385          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1386          csBundle(i).uopIdx :=  i.U
1387        } otherwise {
1388          csBundle(i).srcType(2) := SrcType.DC
1389          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1390          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1391          // csBundle(i).lsrc(2) := dest + (2 * i).U
1392          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1393          csBundle(i).uopIdx := i.U
1394        }
1395        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1396        csBundle(numOfUop-1.U).lsrc(0) := src1
1397        csBundle(numOfUop-1.U).lsrc(2) := dest
1398        csBundle(numOfUop-1.U).ldest := dest
1399      }
1400    }
1401    is(UopSplitType.VEC_RGATHER) {
1402      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1403        for (i <- 0 until len)
1404          for (j <- 0 until len) {
1405            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1406            // csBundle(i * len + j).srcType(1) := SrcType.vp
1407            // csBundle(i * len + j).srcType(2) := SrcType.vp
1408            csBundle(i * len + j).lsrc(0) := src1 + i.U
1409            csBundle(i * len + j).lsrc(1) := src2 + j.U
1410            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1411            csBundle(i * len + j).lsrc(2) := vd_old
1412            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1413            csBundle(i * len + j).ldest := vd
1414            csBundle(i * len + j).uopIdx := (i * len + j).U
1415          }
1416      }
1417      switch(vlmulReg) {
1418        is("b001".U ){
1419          genCsBundle_VEC_RGATHER(2)
1420        }
1421        is("b010".U ){
1422          genCsBundle_VEC_RGATHER(4)
1423        }
1424        is("b011".U ){
1425          genCsBundle_VEC_RGATHER(8)
1426        }
1427      }
1428    }
1429    is(UopSplitType.VEC_RGATHER_VX) {
1430      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1431        for (i <- 0 until len)
1432          for (j <- 0 until len) {
1433            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1434            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1435            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1436            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1437            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1438            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1439            csBundle(i * len + j + 1).lsrc(2) := vd_old
1440            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1441            csBundle(i * len + j + 1).ldest := vd
1442            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1443          }
1444      }
1445      // i to vector move
1446      csBundle(0).srcType(0) := SrcType.reg
1447      csBundle(0).srcType(1) := SrcType.imm
1448      csBundle(0).lsrc(1) := 0.U
1449      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1450      csBundle(0).fuType := FuType.i2v.U
1451      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1452      csBundle(0).rfWen := false.B
1453      csBundle(0).fpWen := false.B
1454      csBundle(0).vecWen := true.B
1455      genCsBundle_RGATHER_VX(1)
1456      switch(vlmulReg) {
1457        is("b001".U ){
1458          genCsBundle_RGATHER_VX(2)
1459        }
1460        is("b010".U ){
1461          genCsBundle_RGATHER_VX(4)
1462        }
1463        is("b011".U ){
1464          genCsBundle_RGATHER_VX(8)
1465        }
1466      }
1467    }
1468    is(UopSplitType.VEC_RGATHEREI16) {
1469      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1470        for (i <- 0 until len)
1471          for (j <- 0 until len) {
1472            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1473            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1474            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1475            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1476            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1477            csBundle((i * len + j)*2+0).ldest := vd0
1478            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1479            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1480            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1481            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1482            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1483            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1484            csBundle((i * len + j)*2+1).ldest := vd1
1485            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1486          }
1487      }
1488      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1489        for (i <- 0 until len)
1490          for (j <- 0 until len) {
1491            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1492            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1493            csBundle(i * len + j).lsrc(0) := src1 + i.U
1494            csBundle(i * len + j).lsrc(1) := src2 + j.U
1495            csBundle(i * len + j).lsrc(2) := vd_old
1496            csBundle(i * len + j).ldest := vd
1497            csBundle(i * len + j).uopIdx := (i * len + j).U
1498          }
1499      }
1500      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1501        for (i <- 0 until len)
1502          for (j <- 0 until len) {
1503            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1504            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1505            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1506            csBundle(i * len + j).lsrc(1) := src2 + j.U
1507            csBundle(i * len + j).lsrc(2) := vd_old
1508            csBundle(i * len + j).ldest := vd
1509            csBundle(i * len + j).uopIdx := (i * len + j).U
1510          }
1511      }
1512      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1513        for (i <- 0 until len)
1514          for (j <- 0 until len) {
1515            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1516            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1517            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1518            csBundle(i * len + j).lsrc(1) := src2 + j.U
1519            csBundle(i * len + j).lsrc(2) := vd_old
1520            csBundle(i * len + j).ldest := vd
1521            csBundle(i * len + j).uopIdx := (i * len + j).U
1522          }
1523      }
1524      when(!vsewReg.orR){
1525        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1526      }.elsewhen(vsewReg === VSew.e32){
1527        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1528      }.elsewhen(vsewReg === VSew.e64){
1529        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1530      }.otherwise{
1531        genCsBundle_VEC_RGATHEREI16(1)
1532      }
1533      switch(vlmulReg) {
1534        is("b001".U) {
1535          when(!vsewReg.orR) {
1536            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1537          }.elsewhen(vsewReg === VSew.e32){
1538            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1539          }.elsewhen(vsewReg === VSew.e64){
1540            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1541          }.otherwise{
1542            genCsBundle_VEC_RGATHEREI16(2)
1543          }
1544        }
1545        is("b010".U) {
1546          when(!vsewReg.orR) {
1547            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1548          }.elsewhen(vsewReg === VSew.e32){
1549            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1550          }.elsewhen(vsewReg === VSew.e64){
1551            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1552          }.otherwise{
1553            genCsBundle_VEC_RGATHEREI16(4)
1554          }
1555        }
1556        is("b011".U) {
1557          when(vsewReg === VSew.e32){
1558            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1559          }.elsewhen(vsewReg === VSew.e64){
1560            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1561          }.otherwise{
1562            genCsBundle_VEC_RGATHEREI16(8)
1563          }
1564        }
1565      }
1566    }
1567    is(UopSplitType.VEC_COMPRESS) {
1568      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1569        for (i <- 0 until len) {
1570          val jlen = if (i == len-1) i+1 else i+2
1571          for (j <- 0 until jlen) {
1572            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1573            val vd = if(i==len-1) (dest + j.U) else {
1574              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1575            }
1576            val src13Type = if (j == i+1) DontCare else SrcType.vp
1577            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1578            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1579            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1580            csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp
1581            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1582            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1583            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1584            csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1585            csBundle(i*(i+3)/2 + j).ldest := vd
1586            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1587          }
1588        }
1589      }
1590      switch(vlmulReg) {
1591        is("b001".U ){
1592          genCsBundle_VEC_COMPRESS(2)
1593        }
1594        is("b010".U ){
1595          genCsBundle_VEC_COMPRESS(4)
1596        }
1597        is("b011".U ){
1598          genCsBundle_VEC_COMPRESS(8)
1599        }
1600      }
1601    }
1602    is(UopSplitType.VEC_MVNR) {
1603      for (i <- 0 until MAX_VLMUL) {
1604        csBundle(i).lsrc(0) := src1 + i.U
1605        csBundle(i).lsrc(1) := src2 + i.U
1606        csBundle(i).lsrc(2) := dest + i.U
1607        csBundle(i).ldest := dest + i.U
1608        csBundle(i).uopIdx := i.U
1609      }
1610    }
1611    is(UopSplitType.VEC_US_LDST) {
1612      /*
1613      FMV.D.X
1614       */
1615      csBundle(0).srcType(0) := SrcType.reg
1616      csBundle(0).srcType(1) := SrcType.imm
1617      csBundle(0).lsrc(1) := 0.U
1618      csBundle(0).ldest := FP_TMP_REG_MV.U
1619      csBundle(0).fuType := FuType.i2v.U
1620      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1621      csBundle(0).rfWen := false.B
1622      csBundle(0).fpWen := true.B
1623      csBundle(0).vecWen := false.B
1624      csBundle(0).vlsInstr := true.B
1625      //LMUL
1626      for (i <- 0 until MAX_VLMUL) {
1627        csBundle(i + 1).srcType(0) := SrcType.fp
1628        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1629        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1630        csBundle(i + 1).ldest := dest + i.U
1631        csBundle(i + 1).uopIdx := i.U
1632        csBundle(i + 1).vlsInstr := true.B
1633      }
1634      csBundle.head.waitForward := isUsSegment
1635      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1636    }
1637    is(UopSplitType.VEC_S_LDST) {
1638      /*
1639      FMV.D.X
1640       */
1641      csBundle(0).srcType(0) := SrcType.reg
1642      csBundle(0).srcType(1) := SrcType.imm
1643      csBundle(0).lsrc(1) := 0.U
1644      csBundle(0).ldest := FP_TMP_REG_MV.U
1645      csBundle(0).fuType := FuType.i2v.U
1646      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1647      csBundle(0).rfWen := false.B
1648      csBundle(0).fpWen := true.B
1649      csBundle(0).vecWen := false.B
1650      csBundle(0).vlsInstr := true.B
1651
1652      csBundle(1).srcType(0) := SrcType.reg
1653      csBundle(1).srcType(1) := SrcType.imm
1654      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1655      csBundle(1).lsrc(1) := 0.U
1656      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1657      csBundle(1).fuType := FuType.i2v.U
1658      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1659      csBundle(1).rfWen := false.B
1660      csBundle(1).fpWen := true.B
1661      csBundle(1).vecWen := false.B
1662      csBundle(1).vlsInstr := true.B
1663
1664      //LMUL
1665      for (i <- 0 until MAX_VLMUL) {
1666        csBundle(i + 2).srcType(0) := SrcType.fp
1667        csBundle(i + 2).srcType(1) := SrcType.fp
1668        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1669        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1670        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1671        csBundle(i + 2).ldest := dest + i.U
1672        csBundle(i + 2).uopIdx := i.U
1673        csBundle(i + 2).vlsInstr := true.B
1674      }
1675      csBundle.head.waitForward := isSdSegment
1676      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1677    }
1678    is(UopSplitType.VEC_I_LDST) {
1679    /*
1680      FMV.D.X
1681       */
1682      val vlmul = vlmulReg
1683      val vsew = Cat(0.U(1.W), vsewReg)
1684      val veew = Cat(0.U(1.W), width)
1685      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1686      val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array(
1687        "b001".U -> 1.U,
1688        "b010".U -> 2.U,
1689        "b011".U -> 3.U
1690      ))
1691      val simple_emul = MuxLookup(vemul, 0.U(2.W), Array(
1692        "b001".U -> 1.U,
1693        "b010".U -> 2.U,
1694        "b011".U -> 3.U
1695      ))
1696      csBundle(0).srcType(0) := SrcType.reg
1697      csBundle(0).srcType(1) := SrcType.imm
1698      csBundle(0).lsrc(1) := 0.U
1699      csBundle(0).ldest := FP_TMP_REG_MV.U
1700      csBundle(0).fuType := FuType.i2v.U
1701      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1702      csBundle(0).rfWen := false.B
1703      csBundle(0).fpWen := true.B
1704      csBundle(0).vecWen := false.B
1705      csBundle(0).vlsInstr := true.B
1706
1707      //LMUL
1708      for (i <- 0 until MAX_INDEXED_LS_UOPNUM) {
1709        indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf)
1710        val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1711        val offsetVd = indexedLSRegOffset(i).outOffsetVd
1712        val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd
1713        csBundle(i + 1).srcType(0) := SrcType.fp
1714        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1715        csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1716        /**
1717          * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and
1718          * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same
1719          * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be
1720          * deadlock for indexed instructions with emul > lmul.
1721          *
1722          * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest
1723          * N-1 uops will read temporary vector register.
1724          */
1725        // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1726        csBundle(i + 1).srcType(2) := SrcType.vp
1727        csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1728        csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1729        csBundle(i + 1).uopIdx := i.U
1730        csBundle(i + 1).vlsInstr := true.B
1731      }
1732      csBundle.head.waitForward := isIxSegment
1733      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1734    }
1735  }
1736
1737  //readyFromRename Counter
1738  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1739
1740  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1741  val thisAllOut = uopRes <= readyCounter
1742
1743  switch(state) {
1744    is(s_idle) {
1745      when (inValid) {
1746        stateNext := s_active
1747        uopResNext := inUopInfo.numOfUop
1748      }
1749    }
1750    is(s_active) {
1751      when (thisAllOut) {
1752        when (inValid) {
1753          stateNext := s_active
1754          uopResNext := inUopInfo.numOfUop
1755        }.otherwise {
1756          stateNext := s_idle
1757          uopResNext := 0.U
1758        }
1759      }.otherwise {
1760        stateNext := s_active
1761        uopResNext := uopRes - readyCounter
1762      }
1763    }
1764  }
1765
1766  state := Mux(io.redirect, s_idle, stateNext)
1767  uopRes := Mux(io.redirect, 0.U, uopResNext)
1768
1769  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1770
1771  for(i <- 0 until RenameWidth) {
1772    outValids(i) := complexNum > i.U
1773    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1774  }
1775
1776  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1777  inReady := state === s_idle || state === s_active && thisAllOut
1778
1779//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1780//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1781//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1782//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1783//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1784//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1785//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1786//
1787//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1788//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1789//    0.U)
1790//  validToRename.zipWithIndex.foreach{
1791//    case(dst, i) =>
1792//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1793//      dst := MuxCase(false.B, Seq(
1794//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1795//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1796//      ).toSeq)
1797//  }
1798//
1799//  readyToIBuf.zipWithIndex.foreach {
1800//    case (dst, i) =>
1801//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1802//      dst := MuxCase(true.B, Seq(
1803//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1804//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1805//      ).toSeq)
1806//  }
1807//
1808//  io.deq.decodedInsts := decodedInsts
1809//  io.deq.complexNum := complexNum
1810//  io.deq.validToRename := validToRename
1811//  io.deq.readyToIBuf := readyToIBuf
1812}
1813