xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 2f6c010092b69cb5fd14b73d133b8a163813c177)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
82  val MAX_INDEXED_LS_UOPNUM = 64
83}
84
85class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
86  val redirect = Input(Bool())
87  val csrCtrl = Input(new CustomCSRCtrlIO)
88  val vtypeBypass = Input(new VType)
89  // When the first inst in decode vector is complex inst, pass it in
90  val in = Flipped(DecoupledIO(new Bundle {
91    val simpleDecodedInst = new DecodedInst
92    val uopInfo = new UopInfo
93  }))
94  val out = new Bundle {
95    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
96  }
97  val complexNum = Output(UInt(3.W))
98}
99
100/**
101  * @author zly
102  */
103class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
104  val io = IO(new DecodeUnitCompIO)
105
106  // alias
107  private val inReady = io.in.ready
108  private val inValid = io.in.valid
109  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
110  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
111  private val inUopInfo = io.in.bits.uopInfo
112  private val outValids = io.out.complexDecodedInsts.map(_.valid)
113  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
114  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
115  private val outComplexNum = io.complexNum
116
117  val maxUopSize = MaxUopSize
118  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
119    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
120      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
121    }.elsewhen(inInstFields.RS1 === 0.U) {
122      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
123    }
124  }
125
126  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
127  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
128  //input bits
129  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
130
131  val src1 = Cat(0.U(1.W), instFields.RS1)
132  val src2 = Cat(0.U(1.W), instFields.RS2)
133  val dest = Cat(0.U(1.W), instFields.RD)
134
135  val nf    = instFields.NF
136  val width = instFields.WIDTH(1, 0)
137
138  //output of DecodeUnit
139  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
140  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
141  val lmul = Wire(UInt(4.W))
142  val isVsetSimple = Wire(Bool())
143
144  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
145  indexedLSRegOffset.map(_.src := 0.U)
146
147  //pre decode
148  lmul := latchedUopInfo.lmul
149  isVsetSimple := latchedInst.isVset
150  val vlmulReg = latchedInst.vpu.vlmul
151  val vsewReg = latchedInst.vpu.vsew
152
153  //Type of uop Div
154  val typeOfSplit = latchedInst.uopSplitType
155  val src1Type = latchedInst.srcType(0)
156  val src1IsImm = src1Type === SrcType.imm
157  val src1IsFp = src1Type === SrcType.fp
158
159  val isVstore = FuType.isVStore(latchedInst.fuType)
160
161  numOfUop := latchedUopInfo.numOfUop
162  numOfWB := latchedUopInfo.numOfWB
163
164  //uops dispatch
165  val s_idle :: s_active :: Nil = Enum(2)
166  val state = RegInit(s_idle)
167  val stateNext = WireDefault(state)
168  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
169  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
170  val uopResNext = WireInit(uopRes)
171  val e64 = 3.U(2.W)
172  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
173  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
174  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
175
176  //uop div up to maxUopSize
177  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
178  csBundle.foreach { case dst =>
179    dst := latchedInst
180    dst.numUops := latchedUopInfo.numOfUop
181    dst.numWB := latchedUopInfo.numOfWB
182    dst.firstUop := false.B
183    dst.lastUop := false.B
184    dst.vlsInstr := false.B
185  }
186
187  csBundle(0).firstUop := true.B
188  csBundle(numOfUop - 1.U).lastUop := true.B
189
190  switch(typeOfSplit) {
191    is(UopSplitType.VSET) {
192      // In simple decoder, rfWen and vecWen are not set
193      when(isVsetSimple) {
194        // Default
195        // uop0 set rd, never flushPipe
196        csBundle(0).fuType := FuType.vsetiwi.U
197        csBundle(0).flushPipe := false.B
198        csBundle(0).rfWen := true.B
199        // uop1 set vl, vsetvl will flushPipe
200        csBundle(1).ldest := VCONFIG_IDX.U
201        csBundle(1).vecWen := true.B
202        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
203          // write nothing, uop0 is a nop instruction
204          csBundle(0).rfWen := false.B
205          csBundle(0).fpWen := false.B
206          csBundle(0).vecWen := false.B
207          csBundle(1).fuType := FuType.vsetfwf.U
208          csBundle(1).srcType(0) := SrcType.vp
209          csBundle(1).lsrc(0) := VCONFIG_IDX.U
210        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
211          // uop0: mv vtype gpr to vector region
212          csBundle(0).srcType(0) := SrcType.xp
213          csBundle(0).srcType(1) := SrcType.no
214          csBundle(0).lsrc(0) := src2
215          csBundle(0).lsrc(1) := 0.U
216          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
217          csBundle(0).fuType := FuType.i2v.U
218          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
219          csBundle(0).rfWen := false.B
220          csBundle(0).fpWen := false.B
221          csBundle(0).vecWen := true.B
222          csBundle(0).flushPipe := false.B
223          // uop1: uvsetvcfg_vv
224          csBundle(1).fuType := FuType.vsetfwf.U
225          // vl
226          csBundle(1).srcType(0) := SrcType.vp
227          csBundle(1).lsrc(0) := VCONFIG_IDX.U
228          // vtype
229          csBundle(1).srcType(1) := SrcType.vp
230          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
231          csBundle(1).vecWen := true.B
232          csBundle(1).ldest := VCONFIG_IDX.U
233        }.elsewhen(dest === 0.U) {
234          // write nothing, uop0 is a nop instruction
235          csBundle(0).rfWen := false.B
236          csBundle(0).fpWen := false.B
237          csBundle(0).vecWen := false.B
238        }
239        // use bypass vtype from vtypeGen
240        csBundle(0).vpu.connectVType(io.vtypeBypass)
241        csBundle(1).vpu.connectVType(io.vtypeBypass)
242      }
243    }
244    is(UopSplitType.VEC_VVV) {
245      for (i <- 0 until MAX_VLMUL) {
246        csBundle(i).lsrc(0) := src1 + i.U
247        csBundle(i).lsrc(1) := src2 + i.U
248        csBundle(i).lsrc(2) := dest + i.U
249        csBundle(i).ldest := dest + i.U
250        csBundle(i).uopIdx := i.U
251      }
252    }
253    is(UopSplitType.VEC_VFV) {
254      /*
255      f to vector move
256       */
257      csBundle(0).srcType(0) := SrcType.fp
258      csBundle(0).srcType(1) := SrcType.imm
259      csBundle(0).srcType(2) := SrcType.imm
260      csBundle(0).lsrc(1) := 0.U
261      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
262      csBundle(0).fuType := FuType.f2v.U
263      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
264      csBundle(0).vecWen := true.B
265      csBundle(0).vpu.isReverse := false.B
266      /*
267      LMUL
268       */
269      for (i <- 0 until MAX_VLMUL) {
270        csBundle(i + 1).srcType(0) := SrcType.vp
271        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
272        csBundle(i + 1).lsrc(1) := src2 + i.U
273        csBundle(i + 1).lsrc(2) := dest + i.U
274        csBundle(i + 1).ldest := dest + i.U
275        csBundle(i + 1).uopIdx := i.U
276      }
277    }
278    is(UopSplitType.VEC_EXT2) {
279      for (i <- 0 until MAX_VLMUL / 2) {
280        csBundle(2 * i).lsrc(1) := src2 + i.U
281        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
282        csBundle(2 * i).ldest := dest + (2 * i).U
283        csBundle(2 * i).uopIdx := (2 * i).U
284        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
285        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
286        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
287        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
288      }
289    }
290    is(UopSplitType.VEC_EXT4) {
291      for (i <- 0 until MAX_VLMUL / 4) {
292        csBundle(4 * i).lsrc(1) := src2 + i.U
293        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
294        csBundle(4 * i).ldest := dest + (4 * i).U
295        csBundle(4 * i).uopIdx := (4 * i).U
296        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
297        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
298        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
299        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
300        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
301        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
302        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
303        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
304        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
305        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
306        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
307        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
308      }
309    }
310    is(UopSplitType.VEC_EXT8) {
311      for (i <- 0 until MAX_VLMUL) {
312        csBundle(i).lsrc(1) := src2
313        csBundle(i).lsrc(2) := dest + i.U
314        csBundle(i).ldest := dest + i.U
315        csBundle(i).uopIdx := i.U
316      }
317    }
318    is(UopSplitType.VEC_0XV) {
319      /*
320      i/f to vector move
321       */
322      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
323      csBundle(0).srcType(1) := SrcType.imm
324      csBundle(0).srcType(2) := SrcType.imm
325      csBundle(0).lsrc(1) := 0.U
326      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
327      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
328      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
329      csBundle(0).rfWen := false.B
330      csBundle(0).fpWen := false.B
331      csBundle(0).vecWen := true.B
332      /*
333      vmv.s.x
334       */
335      csBundle(1).srcType(0) := SrcType.vp
336      csBundle(1).srcType(1) := SrcType.imm
337      csBundle(1).srcType(2) := SrcType.vp
338      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
339      csBundle(1).lsrc(1) := 0.U
340      csBundle(1).lsrc(2) := dest
341      csBundle(1).ldest := dest
342      csBundle(1).rfWen := false.B
343      csBundle(1).fpWen := false.B
344      csBundle(1).vecWen := true.B
345      csBundle(1).uopIdx := 0.U
346    }
347    is(UopSplitType.VEC_VXV) {
348      /*
349      i to vector move
350       */
351      csBundle(0).srcType(0) := SrcType.reg
352      csBundle(0).srcType(1) := SrcType.imm
353      csBundle(0).srcType(2) := SrcType.imm
354      csBundle(0).lsrc(1) := 0.U
355      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
356      csBundle(0).fuType := FuType.i2v.U
357      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
358      csBundle(0).vecWen := true.B
359      csBundle(0).vpu.isReverse := false.B
360      /*
361      LMUL
362       */
363      for (i <- 0 until MAX_VLMUL) {
364        csBundle(i + 1).srcType(0) := SrcType.vp
365        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
366        csBundle(i + 1).lsrc(1) := src2 + i.U
367        csBundle(i + 1).lsrc(2) := dest + i.U
368        csBundle(i + 1).ldest := dest + i.U
369        csBundle(i + 1).uopIdx := i.U
370      }
371    }
372    is(UopSplitType.VEC_VVW) {
373      for (i <- 0 until MAX_VLMUL / 2) {
374        csBundle(2 * i).lsrc(0) := src1 + i.U
375        csBundle(2 * i).lsrc(1) := src2 + i.U
376        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
377        csBundle(2 * i).ldest := dest + (2 * i).U
378        csBundle(2 * i).uopIdx := (2 * i).U
379        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
380        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
381        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
382        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
383        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
384      }
385    }
386    is(UopSplitType.VEC_VFW) {
387      /*
388      f to vector move
389       */
390      csBundle(0).srcType(0) := SrcType.fp
391      csBundle(0).srcType(1) := SrcType.imm
392      csBundle(0).srcType(2) := SrcType.imm
393      csBundle(0).lsrc(1) := 0.U
394      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
395      csBundle(0).fuType := FuType.f2v.U
396      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
397      csBundle(0).rfWen := false.B
398      csBundle(0).fpWen := false.B
399      csBundle(0).vecWen := true.B
400
401      for (i <- 0 until MAX_VLMUL / 2) {
402        csBundle(2 * i + 1).srcType(0) := SrcType.vp
403        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
404        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
405        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
406        csBundle(2 * i + 1).ldest := dest + (2 * i).U
407        csBundle(2 * i + 1).uopIdx := (2 * i).U
408        csBundle(2 * i + 2).srcType(0) := SrcType.vp
409        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
410        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
411        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
412        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
413        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
414      }
415    }
416    is(UopSplitType.VEC_WVW) {
417      for (i <- 0 until MAX_VLMUL / 2) {
418        csBundle(2 * i).lsrc(0) := src1 + i.U
419        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
420        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
421        csBundle(2 * i).ldest := dest + (2 * i).U
422        csBundle(2 * i).uopIdx := (2 * i).U
423        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
424        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
425        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
426        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
427        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
428      }
429    }
430    is(UopSplitType.VEC_VXW) {
431      /*
432      i to vector move
433       */
434      csBundle(0).srcType(0) := SrcType.reg
435      csBundle(0).srcType(1) := SrcType.imm
436      csBundle(0).srcType(2) := SrcType.imm
437      csBundle(0).lsrc(1) := 0.U
438      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
439      csBundle(0).fuType := FuType.i2v.U
440      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
441      csBundle(0).vecWen := true.B
442
443      for (i <- 0 until MAX_VLMUL / 2) {
444        csBundle(2 * i + 1).srcType(0) := SrcType.vp
445        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
446        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
447        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
448        csBundle(2 * i + 1).ldest := dest + (2 * i).U
449        csBundle(2 * i + 1).uopIdx := (2 * i).U
450        csBundle(2 * i + 2).srcType(0) := SrcType.vp
451        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
452        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
453        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
454        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
455        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
456      }
457    }
458    is(UopSplitType.VEC_WXW) {
459      /*
460      i to vector move
461       */
462      csBundle(0).srcType(0) := SrcType.reg
463      csBundle(0).srcType(1) := SrcType.imm
464      csBundle(0).srcType(2) := SrcType.imm
465      csBundle(0).lsrc(1) := 0.U
466      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
467      csBundle(0).fuType := FuType.i2v.U
468      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
469      csBundle(0).vecWen := true.B
470
471      for (i <- 0 until MAX_VLMUL / 2) {
472        csBundle(2 * i + 1).srcType(0) := SrcType.vp
473        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
474        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
475        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
476        csBundle(2 * i + 1).ldest := dest + (2 * i).U
477        csBundle(2 * i + 1).uopIdx := (2 * i).U
478        csBundle(2 * i + 2).srcType(0) := SrcType.vp
479        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
480        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
481        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
482        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
483        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
484      }
485    }
486    is(UopSplitType.VEC_WVV) {
487      for (i <- 0 until MAX_VLMUL / 2) {
488
489        csBundle(2 * i).lsrc(0) := src1 + i.U
490        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
491        csBundle(2 * i).lsrc(2) := dest + i.U
492        csBundle(2 * i).ldest := dest + i.U
493        csBundle(2 * i).uopIdx := (2 * i).U
494        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
495        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
496        csBundle(2 * i + 1).lsrc(2) := dest + i.U
497        csBundle(2 * i + 1).ldest := dest + i.U
498        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
499      }
500    }
501    is(UopSplitType.VEC_WFW) {
502      /*
503      f to vector move
504       */
505      csBundle(0).srcType(0) := SrcType.fp
506      csBundle(0).srcType(1) := SrcType.imm
507      csBundle(0).srcType(2) := SrcType.imm
508      csBundle(0).lsrc(1) := 0.U
509      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
510      csBundle(0).fuType := FuType.f2v.U
511      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
512      csBundle(0).rfWen := false.B
513      csBundle(0).fpWen := false.B
514      csBundle(0).vecWen := true.B
515
516      for (i <- 0 until MAX_VLMUL / 2) {
517        csBundle(2 * i + 1).srcType(0) := SrcType.vp
518        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
519        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
520        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
521        csBundle(2 * i + 1).ldest := dest + (2 * i).U
522        csBundle(2 * i + 1).uopIdx := (2 * i).U
523        csBundle(2 * i + 2).srcType(0) := SrcType.vp
524        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
525        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
526        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
527        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
528        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
529      }
530    }
531    is(UopSplitType.VEC_WXV) {
532      /*
533      i to vector move
534       */
535      csBundle(0).srcType(0) := SrcType.reg
536      csBundle(0).srcType(1) := SrcType.imm
537      csBundle(0).srcType(2) := SrcType.imm
538      csBundle(0).lsrc(1) := 0.U
539      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
540      csBundle(0).fuType := FuType.i2v.U
541      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
542      csBundle(0).vecWen := true.B
543
544      for (i <- 0 until MAX_VLMUL / 2) {
545        csBundle(2 * i + 1).srcType(0) := SrcType.vp
546        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
547        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
548        csBundle(2 * i + 1).lsrc(2) := dest + i.U
549        csBundle(2 * i + 1).ldest := dest + i.U
550        csBundle(2 * i + 1).uopIdx := (2 * i).U
551        csBundle(2 * i + 2).srcType(0) := SrcType.vp
552        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
553        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
554        csBundle(2 * i + 2).lsrc(2) := dest + i.U
555        csBundle(2 * i + 2).ldest := dest + i.U
556        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
557      }
558    }
559    is(UopSplitType.VEC_VVM) {
560      csBundle(0).lsrc(2) := dest
561      csBundle(0).ldest := dest
562      csBundle(0).uopIdx := 0.U
563      for (i <- 1 until MAX_VLMUL) {
564        csBundle(i).lsrc(0) := src1 + i.U
565        csBundle(i).lsrc(1) := src2 + i.U
566        csBundle(i).lsrc(2) := dest
567        csBundle(i).ldest := dest
568        csBundle(i).uopIdx := i.U
569      }
570    }
571    is(UopSplitType.VEC_VFM) {
572      /*
573      f to vector move
574       */
575      csBundle(0).srcType(0) := SrcType.fp
576      csBundle(0).srcType(1) := SrcType.imm
577      csBundle(0).srcType(2) := SrcType.imm
578      csBundle(0).lsrc(1) := 0.U
579      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
580      csBundle(0).fuType := FuType.f2v.U
581      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
582      csBundle(0).rfWen := false.B
583      csBundle(0).fpWen := false.B
584      csBundle(0).vecWen := true.B
585      //LMUL
586      csBundle(1).srcType(0) := SrcType.vp
587      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
588      csBundle(1).lsrc(2) := dest
589      csBundle(1).ldest := dest
590      csBundle(1).uopIdx := 0.U
591      for (i <- 1 until MAX_VLMUL) {
592        csBundle(i + 1).srcType(0) := SrcType.vp
593        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
594        csBundle(i + 1).lsrc(1) := src2 + i.U
595        csBundle(i + 1).lsrc(2) := dest
596        csBundle(i + 1).ldest := dest
597        csBundle(i + 1).uopIdx := i.U
598      }
599      csBundle(numOfUop - 1.U).ldest := dest
600    }
601    is(UopSplitType.VEC_VXM) {
602      /*
603      i to vector move
604       */
605      csBundle(0).srcType(0) := SrcType.reg
606      csBundle(0).srcType(1) := SrcType.imm
607      csBundle(0).srcType(2) := SrcType.imm
608      csBundle(0).lsrc(1) := 0.U
609      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
610      csBundle(0).fuType := FuType.i2v.U
611      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
612      csBundle(0).vecWen := true.B
613      //LMUL
614      csBundle(1).srcType(0) := SrcType.vp
615      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
616      csBundle(1).lsrc(2) := dest
617      csBundle(1).ldest := dest
618      csBundle(1).uopIdx := 0.U
619      for (i <- 1 until MAX_VLMUL) {
620        csBundle(i + 1).srcType(0) := SrcType.vp
621        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
622        csBundle(i + 1).lsrc(1) := src2 + i.U
623        csBundle(i + 1).lsrc(2) := dest
624        csBundle(i + 1).ldest := dest
625        csBundle(i + 1).uopIdx := i.U
626      }
627      csBundle(numOfUop - 1.U).ldest := dest
628    }
629    is(UopSplitType.VEC_SLIDE1UP) {
630      /*
631      i to vector move
632       */
633      csBundle(0).srcType(0) := SrcType.reg
634      csBundle(0).srcType(1) := SrcType.imm
635      csBundle(0).srcType(2) := SrcType.imm
636      csBundle(0).lsrc(1) := 0.U
637      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
638      csBundle(0).fuType := FuType.i2v.U
639      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
640      csBundle(0).vecWen := true.B
641      //LMUL
642      csBundle(1).srcType(0) := SrcType.vp
643      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
644      csBundle(1).lsrc(2) := dest
645      csBundle(1).ldest := dest
646      csBundle(1).uopIdx := 0.U
647      for (i <- 1 until MAX_VLMUL) {
648        csBundle(i + 1).srcType(0) := SrcType.vp
649        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
650        csBundle(i + 1).lsrc(1) := src2 + i.U
651        csBundle(i + 1).lsrc(2) := dest + i.U
652        csBundle(i + 1).ldest := dest + i.U
653        csBundle(i + 1).uopIdx := i.U
654      }
655    }
656    is(UopSplitType.VEC_FSLIDE1UP) {
657      /*
658      f to vector move
659       */
660      csBundle(0).srcType(0) := SrcType.fp
661      csBundle(0).srcType(1) := SrcType.imm
662      csBundle(0).srcType(2) := SrcType.imm
663      csBundle(0).lsrc(1) := 0.U
664      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
665      csBundle(0).fuType := FuType.f2v.U
666      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
667      csBundle(0).rfWen := false.B
668      csBundle(0).fpWen := false.B
669      csBundle(0).vecWen := true.B
670      //LMUL
671      csBundle(1).srcType(0) := SrcType.vp
672      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
673      csBundle(1).lsrc(1) := src2
674      csBundle(1).lsrc(2) := dest
675      csBundle(1).ldest := dest
676      csBundle(1).uopIdx := 0.U
677      for (i <- 1 until MAX_VLMUL) {
678        csBundle(i + 1).srcType(0) := SrcType.vp
679        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
680        csBundle(i + 1).lsrc(1) := src2 + i.U
681        csBundle(i + 1).lsrc(2) := dest + i.U
682        csBundle(i + 1).ldest := dest + i.U
683        csBundle(i + 1).uopIdx := i.U
684      }
685    }
686    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
687      /*
688      i to vector move
689       */
690      csBundle(0).srcType(0) := SrcType.reg
691      csBundle(0).srcType(1) := SrcType.imm
692      csBundle(0).srcType(2) := SrcType.imm
693      csBundle(0).lsrc(1) := 0.U
694      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
695      csBundle(0).fuType := FuType.i2v.U
696      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
697      csBundle(0).vecWen := true.B
698      //LMUL
699      for (i <- 0 until MAX_VLMUL) {
700        csBundle(2 * i + 1).srcType(0) := SrcType.vp
701        csBundle(2 * i + 1).srcType(1) := SrcType.vp
702        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
703        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
704        csBundle(2 * i + 1).lsrc(2) := dest + i.U
705        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
706        csBundle(2 * i + 1).uopIdx := (2 * i).U
707        if (2 * i + 2 < MAX_VLMUL * 2) {
708          csBundle(2 * i + 2).srcType(0) := SrcType.vp
709          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
710          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
711          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
712          csBundle(2 * i + 2).ldest := dest + i.U
713          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
714        }
715      }
716      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
717      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
718      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
719    }
720    is(UopSplitType.VEC_FSLIDE1DOWN) {
721      /*
722      f to vector move
723       */
724      csBundle(0).srcType(0) := SrcType.fp
725      csBundle(0).srcType(1) := SrcType.imm
726      csBundle(0).srcType(2) := SrcType.imm
727      csBundle(0).lsrc(1) := 0.U
728      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
729      csBundle(0).fuType := FuType.f2v.U
730      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
731      csBundle(0).rfWen := false.B
732      csBundle(0).fpWen := false.B
733      csBundle(0).vecWen := true.B
734      //LMUL
735      for (i <- 0 until MAX_VLMUL) {
736        csBundle(2 * i + 1).srcType(0) := SrcType.vp
737        csBundle(2 * i + 1).srcType(1) := SrcType.vp
738        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
739        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
740        csBundle(2 * i + 1).lsrc(2) := dest + i.U
741        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
742        csBundle(2 * i + 1).uopIdx := (2 * i).U
743        if (2 * i + 2 < MAX_VLMUL * 2) {
744          csBundle(2 * i + 2).srcType(0) := SrcType.vp
745          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
746          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
747          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
748          csBundle(2 * i + 2).ldest := dest + i.U
749          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
750        }
751      }
752      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
753      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
754      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
755    }
756    is(UopSplitType.VEC_VRED) {
757      when(vlmulReg === "b001".U) {
758        csBundle(0).srcType(2) := SrcType.DC
759        csBundle(0).lsrc(0) := src2 + 1.U
760        csBundle(0).lsrc(1) := src2
761        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
762        csBundle(0).uopIdx := 0.U
763      }
764      when(vlmulReg === "b010".U) {
765        csBundle(0).srcType(2) := SrcType.DC
766        csBundle(0).lsrc(0) := src2 + 1.U
767        csBundle(0).lsrc(1) := src2
768        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
769        csBundle(0).uopIdx := 0.U
770
771        csBundle(1).srcType(2) := SrcType.DC
772        csBundle(1).lsrc(0) := src2 + 3.U
773        csBundle(1).lsrc(1) := src2 + 2.U
774        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
775        csBundle(1).uopIdx := 1.U
776
777        csBundle(2).srcType(2) := SrcType.DC
778        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
779        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
780        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
781        csBundle(2).uopIdx := 2.U
782      }
783      when(vlmulReg === "b011".U) {
784        for (i <- 0 until MAX_VLMUL) {
785          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
786            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
787            csBundle(i).lsrc(1) := src2 + (i * 2).U
788            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
789          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
790            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
791            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
792            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
793          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
794            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
795            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
796            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
797          }
798          csBundle(i).srcType(2) := SrcType.DC
799          csBundle(i).uopIdx := i.U
800        }
801      }
802      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
803        /*
804         * 2 <= vlmul <= 8
805         */
806        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
807        csBundle(numOfUop - 1.U).lsrc(0) := src1
808        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
809        csBundle(numOfUop - 1.U).lsrc(2) := dest
810        csBundle(numOfUop - 1.U).ldest := dest
811        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
812      }
813    }
814    is(UopSplitType.VEC_VFRED) {
815      val vlmul = vlmulReg
816      val vsew = vsewReg
817      when(vlmul === VLmul.m8){
818        for (i <- 0 until 4) {
819          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
820          csBundle(i).lsrc(1) := src2 + (i * 2).U
821          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
822          csBundle(i).uopIdx := i.U
823        }
824        for (i <- 4 until 6) {
825          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
826          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
827          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
828          csBundle(i).uopIdx := i.U
829        }
830        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
831        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
832        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
833        csBundle(6).uopIdx := 6.U
834        when(vsew === VSew.e64) {
835          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
836          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
837          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
838          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
839          csBundle(7).uopIdx := 7.U
840          csBundle(8).lsrc(0) := src1
841          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
842          csBundle(8).ldest := dest
843          csBundle(8).uopIdx := 8.U
844        }
845        when(vsew === VSew.e32) {
846          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
847          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
848          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
849          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
850          csBundle(7).uopIdx := 7.U
851          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
852          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
853          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
854          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
855          csBundle(8).uopIdx := 8.U
856          csBundle(9).lsrc(0) := src1
857          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
858          csBundle(9).ldest := dest
859          csBundle(9).uopIdx := 9.U
860        }
861        when(vsew === VSew.e16) {
862          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
863          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
864          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
865          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
866          csBundle(7).uopIdx := 7.U
867          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
868          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
869          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
870          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
871          csBundle(8).uopIdx := 8.U
872          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
873          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
874          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
875          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
876          csBundle(9).uopIdx := 9.U
877          csBundle(10).lsrc(0) := src1
878          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
879          csBundle(10).ldest := dest
880          csBundle(10).uopIdx := 10.U
881        }
882      }
883      when(vlmul === VLmul.m4) {
884        for (i <- 0 until 2) {
885          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
886          csBundle(i).lsrc(1) := src2 + (i * 2).U
887          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
888          csBundle(i).uopIdx := i.U
889        }
890        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
891        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
892        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
893        csBundle(2).uopIdx := 2.U
894        when(vsew === VSew.e64) {
895          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
896          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
897          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
898          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
899          csBundle(3).uopIdx := 3.U
900          csBundle(4).lsrc(0) := src1
901          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
902          csBundle(4).ldest := dest
903          csBundle(4).uopIdx := 4.U
904        }
905        when(vsew === VSew.e32) {
906          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
907          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
908          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
909          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
910          csBundle(3).uopIdx := 3.U
911          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
912          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
913          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
914          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
915          csBundle(4).uopIdx := 4.U
916          csBundle(5).lsrc(0) := src1
917          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
918          csBundle(5).ldest := dest
919          csBundle(5).uopIdx := 5.U
920        }
921        when(vsew === VSew.e16) {
922          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
923          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
924          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
925          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
926          csBundle(3).uopIdx := 3.U
927          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
928          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
929          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
930          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
931          csBundle(4).uopIdx := 4.U
932          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
933          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
934          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
935          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
936          csBundle(5).uopIdx := 5.U
937          csBundle(6).lsrc(0) := src1
938          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
939          csBundle(6).ldest := dest
940          csBundle(6).uopIdx := 6.U
941        }
942      }
943      when(vlmul === VLmul.m2) {
944        csBundle(0).lsrc(0) := src2 + 1.U
945        csBundle(0).lsrc(1) := src2 + 0.U
946        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
947        csBundle(0).uopIdx := 0.U
948        when(vsew === VSew.e64) {
949          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
950          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
951          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
952          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
953          csBundle(1).uopIdx := 1.U
954          csBundle(2).lsrc(0) := src1
955          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
956          csBundle(2).ldest := dest
957          csBundle(2).uopIdx := 2.U
958        }
959        when(vsew === VSew.e32) {
960          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
961          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
962          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
963          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
964          csBundle(1).uopIdx := 1.U
965          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
966          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
967          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
968          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
969          csBundle(2).uopIdx := 2.U
970          csBundle(3).lsrc(0) := src1
971          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
972          csBundle(3).ldest := dest
973          csBundle(3).uopIdx := 3.U
974        }
975        when(vsew === VSew.e16) {
976          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
977          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
978          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
979          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
980          csBundle(1).uopIdx := 1.U
981          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
982          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
983          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
984          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
985          csBundle(2).uopIdx := 2.U
986          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
987          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
988          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
989          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
990          csBundle(3).uopIdx := 3.U
991          csBundle(4).lsrc(0) := src1
992          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
993          csBundle(4).ldest := dest
994          csBundle(4).uopIdx := 4.U
995        }
996      }
997      when(vlmul === VLmul.m1) {
998        when(vsew === VSew.e64) {
999          csBundle(0).lsrc(0) := src2
1000          csBundle(0).lsrc(1) := src2
1001          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1002          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1003          csBundle(0).uopIdx := 0.U
1004          csBundle(1).lsrc(0) := src1
1005          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1006          csBundle(1).ldest := dest
1007          csBundle(1).uopIdx := 1.U
1008        }
1009        when(vsew === VSew.e32) {
1010          csBundle(0).lsrc(0) := src2
1011          csBundle(0).lsrc(1) := src2
1012          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1013          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1014          csBundle(0).uopIdx := 0.U
1015          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1016          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1017          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1018          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1019          csBundle(1).uopIdx := 1.U
1020          csBundle(2).lsrc(0) := src1
1021          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1022          csBundle(2).ldest := dest
1023          csBundle(2).uopIdx := 2.U
1024        }
1025        when(vsew === VSew.e16) {
1026          csBundle(0).lsrc(0) := src2
1027          csBundle(0).lsrc(1) := src2
1028          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1029          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1030          csBundle(0).uopIdx := 0.U
1031          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1032          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1033          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1034          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1035          csBundle(1).uopIdx := 1.U
1036          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1037          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1038          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1039          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1040          csBundle(2).uopIdx := 2.U
1041          csBundle(3).lsrc(0) := src1
1042          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1043          csBundle(3).ldest := dest
1044          csBundle(3).uopIdx := 3.U
1045        }
1046      }
1047      when(vlmul === VLmul.mf2) {
1048        when(vsew === VSew.e32) {
1049          csBundle(0).lsrc(0) := src2
1050          csBundle(0).lsrc(1) := src2
1051          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1052          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1053          csBundle(0).uopIdx := 0.U
1054          csBundle(1).lsrc(0) := src1
1055          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1056          csBundle(1).ldest := dest
1057          csBundle(1).uopIdx := 1.U
1058        }
1059        when(vsew === VSew.e16) {
1060          csBundle(0).lsrc(0) := src2
1061          csBundle(0).lsrc(1) := src2
1062          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1063          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1064          csBundle(0).uopIdx := 0.U
1065          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1066          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1067          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1068          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1069          csBundle(1).uopIdx := 1.U
1070          csBundle(2).lsrc(0) := src1
1071          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1072          csBundle(2).ldest := dest
1073          csBundle(2).uopIdx := 2.U
1074        }
1075      }
1076      when(vlmul === VLmul.mf4) {
1077        when(vsew === VSew.e16) {
1078          csBundle(0).lsrc(0) := src2
1079          csBundle(0).lsrc(1) := src2
1080          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1081          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1082          csBundle(0).uopIdx := 0.U
1083          csBundle(1).lsrc(0) := src1
1084          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1085          csBundle(1).ldest := dest
1086          csBundle(1).uopIdx := 1.U
1087        }
1088      }
1089    }
1090
1091    is(UopSplitType.VEC_VFREDOSUM) {
1092      import yunsuan.VfaluType
1093      val vlmul = vlmulReg
1094      val vsew = vsewReg
1095      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1096      when(vlmul === VLmul.m8) {
1097        when(vsew === VSew.e64) {
1098          val vlmax = 16
1099          for (i <- 0 until vlmax) {
1100            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1101            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1102            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1103            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1104            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1105            csBundle(i).uopIdx := i.U
1106          }
1107        }
1108        when(vsew === VSew.e32) {
1109          val vlmax = 32
1110          for (i <- 0 until vlmax) {
1111            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1112            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1113            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1114            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1115            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1116            csBundle(i).uopIdx := i.U
1117          }
1118        }
1119        when(vsew === VSew.e16) {
1120          val vlmax = 64
1121          for (i <- 0 until vlmax) {
1122            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1123            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1124            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1125            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1126            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1127            csBundle(i).uopIdx := i.U
1128          }
1129        }
1130      }
1131      when(vlmul === VLmul.m4) {
1132        when(vsew === VSew.e64) {
1133          val vlmax = 8
1134          for (i <- 0 until vlmax) {
1135            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1136            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1137            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1138            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1139            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1140            csBundle(i).uopIdx := i.U
1141          }
1142        }
1143        when(vsew === VSew.e32) {
1144          val vlmax = 16
1145          for (i <- 0 until vlmax) {
1146            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1149            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1150            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1151            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1152            csBundle(i).uopIdx := i.U
1153          }
1154        }
1155        when(vsew === VSew.e16) {
1156          val vlmax = 32
1157          for (i <- 0 until vlmax) {
1158            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1161            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1162            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1163            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1164            csBundle(i).uopIdx := i.U
1165          }
1166        }
1167      }
1168      when(vlmul === VLmul.m2) {
1169        when(vsew === VSew.e64) {
1170          val vlmax = 4
1171          for (i <- 0 until vlmax) {
1172            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1174            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1175            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1176            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1177            csBundle(i).uopIdx := i.U
1178          }
1179        }
1180        when(vsew === VSew.e32) {
1181          val vlmax = 8
1182          for (i <- 0 until vlmax) {
1183            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1185            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1186            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1187            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1188            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1189            csBundle(i).uopIdx := i.U
1190          }
1191        }
1192        when(vsew === VSew.e16) {
1193          val vlmax = 16
1194          for (i <- 0 until vlmax) {
1195            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1197            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1198            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1199            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1200            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1201            csBundle(i).uopIdx := i.U
1202          }
1203        }
1204      }
1205      when(vlmul === VLmul.m1) {
1206        when(vsew === VSew.e64) {
1207          val vlmax = 2
1208          for (i <- 0 until vlmax) {
1209            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1211            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1212            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1213            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1214            csBundle(i).uopIdx := i.U
1215          }
1216        }
1217        when(vsew === VSew.e32) {
1218          val vlmax = 4
1219          for (i <- 0 until vlmax) {
1220            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1223            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1224            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1225            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1226            csBundle(i).uopIdx := i.U
1227          }
1228        }
1229        when(vsew === VSew.e16) {
1230          val vlmax = 8
1231          for (i <- 0 until vlmax) {
1232            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1235            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1236            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1237            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1238            csBundle(i).uopIdx := i.U
1239          }
1240        }
1241      }
1242      when(vlmul === VLmul.mf2) {
1243        when(vsew === VSew.e32) {
1244          val vlmax = 2
1245          for (i <- 0 until vlmax) {
1246            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1248            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1249            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1250            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1251            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1252            csBundle(i).uopIdx := i.U
1253          }
1254        }
1255        when(vsew === VSew.e16) {
1256          val vlmax = 4
1257          for (i <- 0 until vlmax) {
1258            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1261            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1262            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1263            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1264            csBundle(i).uopIdx := i.U
1265          }
1266        }
1267      }
1268      when(vlmul === VLmul.mf4) {
1269        when(vsew === VSew.e16) {
1270          val vlmax = 2
1271          for (i <- 0 until vlmax) {
1272            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1273            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1274            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1275            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1276            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1277            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1278            csBundle(i).uopIdx := i.U
1279          }
1280        }
1281      }
1282    }
1283
1284    is(UopSplitType.VEC_SLIDEUP) {
1285      // i to vector move
1286      csBundle(0).srcType(0) := SrcType.reg
1287      csBundle(0).srcType(1) := SrcType.imm
1288      csBundle(0).srcType(2) := SrcType.imm
1289      csBundle(0).lsrc(1) := 0.U
1290      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1291      csBundle(0).fuType := FuType.i2v.U
1292      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1293      csBundle(0).vecWen := true.B
1294      // LMUL
1295      for (i <- 0 until MAX_VLMUL)
1296        for (j <- 0 to i) {
1297          val old_vd = if (j == 0) {
1298            dest + i.U
1299          } else (VECTOR_TMP_REG_LMUL + j).U
1300          val vd = if (j == i) {
1301            dest + i.U
1302          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1303          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1304          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1305          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1306          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1307          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1308          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1309        }
1310    }
1311
1312    is(UopSplitType.VEC_SLIDEDOWN) {
1313      // i to vector move
1314      csBundle(0).srcType(0) := SrcType.reg
1315      csBundle(0).srcType(1) := SrcType.imm
1316      csBundle(0).srcType(2) := SrcType.imm
1317      csBundle(0).lsrc(1) := 0.U
1318      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1319      csBundle(0).fuType := FuType.i2v.U
1320      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1321      csBundle(0).vecWen := true.B
1322      // LMUL
1323      for (i <- 0 until MAX_VLMUL)
1324        for (j <- (0 to i).reverse) {
1325          when(i.U < lmul) {
1326            val old_vd = if (j == 0) {
1327              dest + lmul - 1.U - i.U
1328            } else (VECTOR_TMP_REG_LMUL + j).U
1329            val vd = if (j == i) {
1330              dest + lmul - 1.U - i.U
1331            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1332            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1333            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1334            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1335            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1336            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1337            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1338          }
1339        }
1340    }
1341
1342    is(UopSplitType.VEC_M0X) {
1343      // LMUL
1344      for (i <- 0 until MAX_VLMUL) {
1345        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1346        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1347        csBundle(i).srcType(0) := srcType0
1348        csBundle(i).srcType(1) := SrcType.vp
1349        csBundle(i).rfWen := false.B
1350        csBundle(i).fpWen := false.B
1351        csBundle(i).vecWen := true.B
1352        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1353        csBundle(i).lsrc(1) := src2
1354        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1355        csBundle(i).ldest := ldest
1356        csBundle(i).uopIdx := i.U
1357      }
1358      csBundle(lmul - 1.U).rfWen := true.B
1359      csBundle(lmul - 1.U).fpWen := false.B
1360      csBundle(lmul - 1.U).vecWen := false.B
1361      csBundle(lmul - 1.U).ldest := dest
1362    }
1363
1364    is(UopSplitType.VEC_MVV) {
1365      // LMUL
1366      for (i <- 0 until MAX_VLMUL) {
1367        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1368        csBundle(i * 2 + 0).srcType(0) := srcType0
1369        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1370        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1371        csBundle(i * 2 + 0).lsrc(1) := src2
1372        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1373        csBundle(i * 2 + 0).ldest := dest + i.U
1374        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1375
1376        csBundle(i * 2 + 1).srcType(0) := srcType0
1377        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1378        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1379        csBundle(i * 2 + 1).lsrc(1) := src2
1380        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1381        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1382        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1383      }
1384    }
1385
1386    is(UopSplitType.VEC_M0X_VFIRST) {
1387      // LMUL
1388      csBundle(0).rfWen := true.B
1389      csBundle(0).fpWen := false.B
1390      csBundle(0).vecWen := false.B
1391      csBundle(0).ldest := dest
1392    }
1393    is(UopSplitType.VEC_VWW) {
1394      for (i <- 0 until MAX_VLMUL*2) {
1395        when(i.U < lmul){
1396          csBundle(i).srcType(2) := SrcType.DC
1397          csBundle(i).lsrc(0) := src2 + i.U
1398          csBundle(i).lsrc(1) := src2 + i.U
1399          // csBundle(i).lsrc(2) := dest + (2 * i).U
1400          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1401          csBundle(i).uopIdx :=  i.U
1402        } otherwise {
1403          csBundle(i).srcType(2) := SrcType.DC
1404          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1405          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1406          // csBundle(i).lsrc(2) := dest + (2 * i).U
1407          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1408          csBundle(i).uopIdx := i.U
1409        }
1410        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1411        csBundle(numOfUop-1.U).lsrc(0) := src1
1412        csBundle(numOfUop-1.U).lsrc(2) := dest
1413        csBundle(numOfUop-1.U).ldest := dest
1414      }
1415    }
1416    is(UopSplitType.VEC_RGATHER) {
1417      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1418        for (i <- 0 until len)
1419          for (j <- 0 until len) {
1420            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1421            // csBundle(i * len + j).srcType(1) := SrcType.vp
1422            // csBundle(i * len + j).srcType(2) := SrcType.vp
1423            csBundle(i * len + j).lsrc(0) := src1 + i.U
1424            csBundle(i * len + j).lsrc(1) := src2 + j.U
1425            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1426            csBundle(i * len + j).lsrc(2) := vd_old
1427            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1428            csBundle(i * len + j).ldest := vd
1429            csBundle(i * len + j).uopIdx := (i * len + j).U
1430          }
1431      }
1432      switch(vlmulReg) {
1433        is("b001".U ){
1434          genCsBundle_VEC_RGATHER(2)
1435        }
1436        is("b010".U ){
1437          genCsBundle_VEC_RGATHER(4)
1438        }
1439        is("b011".U ){
1440          genCsBundle_VEC_RGATHER(8)
1441        }
1442      }
1443    }
1444    is(UopSplitType.VEC_RGATHER_VX) {
1445      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1446        for (i <- 0 until len)
1447          for (j <- 0 until len) {
1448            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1449            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1450            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1451            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1452            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1453            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1454            csBundle(i * len + j + 1).lsrc(2) := vd_old
1455            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1456            csBundle(i * len + j + 1).ldest := vd
1457            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1458          }
1459      }
1460      // i to vector move
1461      csBundle(0).srcType(0) := SrcType.reg
1462      csBundle(0).srcType(1) := SrcType.imm
1463      csBundle(0).srcType(2) := SrcType.imm
1464      csBundle(0).lsrc(1) := 0.U
1465      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1466      csBundle(0).fuType := FuType.i2v.U
1467      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1468      csBundle(0).rfWen := false.B
1469      csBundle(0).fpWen := false.B
1470      csBundle(0).vecWen := true.B
1471      genCsBundle_RGATHER_VX(1)
1472      switch(vlmulReg) {
1473        is("b001".U ){
1474          genCsBundle_RGATHER_VX(2)
1475        }
1476        is("b010".U ){
1477          genCsBundle_RGATHER_VX(4)
1478        }
1479        is("b011".U ){
1480          genCsBundle_RGATHER_VX(8)
1481        }
1482      }
1483    }
1484    is(UopSplitType.VEC_RGATHEREI16) {
1485      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1486        for (i <- 0 until len)
1487          for (j <- 0 until len) {
1488            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1489            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1490            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1491            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1492            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1493            csBundle((i * len + j)*2+0).ldest := vd0
1494            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1495            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1496            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1497            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1498            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1499            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1500            csBundle((i * len + j)*2+1).ldest := vd1
1501            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1502          }
1503      }
1504      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1505        for (i <- 0 until len)
1506          for (j <- 0 until len) {
1507            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1508            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1509            csBundle(i * len + j).lsrc(0) := src1 + i.U
1510            csBundle(i * len + j).lsrc(1) := src2 + j.U
1511            csBundle(i * len + j).lsrc(2) := vd_old
1512            csBundle(i * len + j).ldest := vd
1513            csBundle(i * len + j).uopIdx := (i * len + j).U
1514          }
1515      }
1516      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1517        for (i <- 0 until len)
1518          for (j <- 0 until len) {
1519            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1520            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1521            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1522            csBundle(i * len + j).lsrc(1) := src2 + j.U
1523            csBundle(i * len + j).lsrc(2) := vd_old
1524            csBundle(i * len + j).ldest := vd
1525            csBundle(i * len + j).uopIdx := (i * len + j).U
1526          }
1527      }
1528      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1529        for (i <- 0 until len)
1530          for (j <- 0 until len) {
1531            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1532            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1533            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1534            csBundle(i * len + j).lsrc(1) := src2 + j.U
1535            csBundle(i * len + j).lsrc(2) := vd_old
1536            csBundle(i * len + j).ldest := vd
1537            csBundle(i * len + j).uopIdx := (i * len + j).U
1538          }
1539      }
1540      when(!vsewReg.orR){
1541        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1542      }.elsewhen(vsewReg === VSew.e32){
1543        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1544      }.elsewhen(vsewReg === VSew.e64){
1545        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1546      }.otherwise{
1547        genCsBundle_VEC_RGATHEREI16(1)
1548      }
1549      switch(vlmulReg) {
1550        is("b001".U) {
1551          when(!vsewReg.orR) {
1552            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1553          }.elsewhen(vsewReg === VSew.e32){
1554            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1555          }.elsewhen(vsewReg === VSew.e64){
1556            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1557          }.otherwise{
1558            genCsBundle_VEC_RGATHEREI16(2)
1559          }
1560        }
1561        is("b010".U) {
1562          when(!vsewReg.orR) {
1563            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1564          }.elsewhen(vsewReg === VSew.e32){
1565            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1566          }.elsewhen(vsewReg === VSew.e64){
1567            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1568          }.otherwise{
1569            genCsBundle_VEC_RGATHEREI16(4)
1570          }
1571        }
1572        is("b011".U) {
1573          when(vsewReg === VSew.e32){
1574            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1575          }.elsewhen(vsewReg === VSew.e64){
1576            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1577          }.otherwise{
1578            genCsBundle_VEC_RGATHEREI16(8)
1579          }
1580        }
1581      }
1582    }
1583    is(UopSplitType.VEC_COMPRESS) {
1584      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1585        for (i <- 0 until len) {
1586          val jlen = if (i == len-1) i+1 else i+2
1587          for (j <- 0 until jlen) {
1588            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1589            val vd = if(i==len-1) (dest + j.U) else {
1590              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1591            }
1592            val src13Type = if (j == i+1) DontCare else SrcType.vp
1593            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1594            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1595            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1596            csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp
1597            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1598            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1599            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1600            csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1601            csBundle(i*(i+3)/2 + j).ldest := vd
1602            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1603          }
1604        }
1605      }
1606      switch(vlmulReg) {
1607        is("b001".U ){
1608          genCsBundle_VEC_COMPRESS(2)
1609        }
1610        is("b010".U ){
1611          genCsBundle_VEC_COMPRESS(4)
1612        }
1613        is("b011".U ){
1614          genCsBundle_VEC_COMPRESS(8)
1615        }
1616      }
1617    }
1618    is(UopSplitType.VEC_MVNR) {
1619      for (i <- 0 until MAX_VLMUL) {
1620        csBundle(i).lsrc(0) := src1 + i.U
1621        csBundle(i).lsrc(1) := src2 + i.U
1622        csBundle(i).lsrc(2) := dest + i.U
1623        csBundle(i).ldest := dest + i.U
1624        csBundle(i).uopIdx := i.U
1625      }
1626    }
1627    is(UopSplitType.VEC_US_LDST) {
1628      /*
1629      FMV.D.X
1630       */
1631      csBundle(0).srcType(0) := SrcType.reg
1632      csBundle(0).srcType(1) := SrcType.imm
1633      csBundle(0).lsrc(1) := 0.U
1634      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1635      csBundle(0).fuType := FuType.i2v.U
1636      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1637      csBundle(0).rfWen := false.B
1638      csBundle(0).fpWen := false.B
1639      csBundle(0).vecWen := true.B
1640      csBundle(0).vlsInstr := true.B
1641      //LMUL
1642      for (i <- 0 until MAX_VLMUL) {
1643        csBundle(i + 1).srcType(0) := SrcType.vp
1644        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1645        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1646        csBundle(i + 1).ldest := dest + i.U
1647        csBundle(i + 1).uopIdx := i.U
1648        csBundle(i + 1).vlsInstr := true.B
1649      }
1650      csBundle.head.waitForward := isUsSegment
1651      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1652    }
1653    is(UopSplitType.VEC_S_LDST) {
1654      /*
1655      FMV.D.X
1656       */
1657      csBundle(0).srcType(0) := SrcType.reg
1658      csBundle(0).srcType(1) := SrcType.imm
1659      csBundle(0).lsrc(1) := 0.U
1660      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1661      csBundle(0).fuType := FuType.i2v.U
1662      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1663      csBundle(0).rfWen := false.B
1664      csBundle(0).fpWen := false.B
1665      csBundle(0).vecWen := true.B
1666      csBundle(0).vlsInstr := true.B
1667
1668      csBundle(1).srcType(0) := SrcType.reg
1669      csBundle(1).srcType(1) := SrcType.imm
1670      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1671      csBundle(1).lsrc(1) := 0.U
1672      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1673      csBundle(1).fuType := FuType.i2v.U
1674      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1675      csBundle(1).rfWen := false.B
1676      csBundle(1).fpWen := false.B
1677      csBundle(1).vecWen := true.B
1678      csBundle(1).vlsInstr := true.B
1679
1680      //LMUL
1681      for (i <- 0 until MAX_VLMUL) {
1682        csBundle(i + 2).srcType(0) := SrcType.vp
1683        csBundle(i + 2).srcType(1) := SrcType.vp
1684        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1685        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1686        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1687        csBundle(i + 2).ldest := dest + i.U
1688        csBundle(i + 2).uopIdx := i.U
1689        csBundle(i + 2).vlsInstr := true.B
1690      }
1691      csBundle.head.waitForward := isSdSegment
1692      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1693    }
1694    is(UopSplitType.VEC_I_LDST) {
1695      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1696        for (i <- 0 until MAX_VLMUL) {
1697          val vecWen = if (i < lmul * nf) true.B else false.B
1698          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1699          csBundle(i + 1).srcType(0) := SrcType.vp
1700          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1701          csBundle(i + 1).srcType(1) := SrcType.no
1702          csBundle(i + 1).lsrc(1) := src2 + i.U
1703          csBundle(i + 1).srcType(2) := src2Type
1704          csBundle(i + 1).lsrc(2) := dest + i.U
1705          csBundle(i + 1).ldest := dest + i.U
1706          csBundle(i + 1).rfWen := false.B
1707          csBundle(i + 1).fpWen := false.B
1708          csBundle(i + 1).vecWen := vecWen
1709          csBundle(i + 1).uopIdx := i.U
1710          csBundle(i + 1).vlsInstr := true.B
1711        }
1712      }
1713      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1714        for (i <- 0 until MAX_VLMUL) {
1715          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1716          csBundle(i + 1).srcType(1) := src1Type
1717          csBundle(i + 1).lsrc(1) := src2 + i.U
1718        }
1719      }
1720
1721      val vlmul = vlmulReg
1722      val vsew = Cat(0.U(1.W), vsewReg)
1723      val veew = Cat(0.U(1.W), width)
1724      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1725      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1726        "b001".U -> 1.U,
1727        "b010".U -> 2.U,
1728        "b011".U -> 3.U
1729      ))
1730      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1731        "b001".U -> 1.U,
1732        "b010".U -> 2.U,
1733        "b011".U -> 3.U
1734      ))
1735      csBundle(0).srcType(0) := SrcType.reg
1736      csBundle(0).srcType(1) := SrcType.imm
1737      csBundle(0).lsrc(1) := 0.U
1738      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1739      csBundle(0).fuType := FuType.i2v.U
1740      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1741      csBundle(0).rfWen := false.B
1742      csBundle(0).fpWen := false.B
1743      csBundle(0).vecWen := true.B
1744      csBundle(0).vlsInstr := true.B
1745
1746      //LMUL
1747      when(nf === 0.U) {
1748        for (i <- 0 until MAX_VLMUL) {
1749          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1750          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1751          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1752          csBundle(i + 1).srcType(0) := SrcType.vp
1753          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1754          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1755          csBundle(i + 1).srcType(2) := SrcType.vp
1756          // lsrc2 is old vd
1757          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1758          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1759          csBundle(i + 1).uopIdx := i.U
1760          csBundle(i + 1).vlsInstr := true.B
1761        }
1762      }.otherwise{
1763        // nf > 1, is segment indexed load/store
1764        // gen src0, vd
1765        switch(simple_lmul) {
1766          is(0.U) {
1767            switch(nf) {
1768              is(1.U) {
1769                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1770              }
1771              is(2.U) {
1772                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1773              }
1774              is(3.U) {
1775                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1776              }
1777              is(4.U) {
1778                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1779              }
1780              is(5.U) {
1781                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1782              }
1783              is(6.U) {
1784                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1785              }
1786              is(7.U) {
1787                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1788              }
1789            }
1790          }
1791          is(1.U) {
1792            switch(nf) {
1793              is(1.U) {
1794                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1795              }
1796              is(2.U) {
1797                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1798              }
1799              is(3.U) {
1800                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1801              }
1802            }
1803          }
1804          is(2.U) {
1805            switch(nf) {
1806              is(1.U) {
1807                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1808              }
1809            }
1810          }
1811        }
1812
1813        // gen src1
1814        switch(simple_emul) {
1815          is(0.U) {
1816            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1817          }
1818          is(1.U) {
1819            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1820          }
1821          is(2.U) {
1822            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1823          }
1824          is(3.U) {
1825            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1826          }
1827        }
1828
1829        // when is vstore instructions, not set vecwen
1830        when(isVstore) {
1831          for (i <- 0 until MAX_VLMUL) {
1832            csBundle(i + 1).vecWen := false.B
1833          }
1834        }
1835      }
1836      csBundle.head.waitForward := isIxSegment
1837      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1838    }
1839  }
1840
1841  //readyFromRename Counter
1842  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1843
1844  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1845  val thisAllOut = uopRes <= readyCounter
1846
1847  switch(state) {
1848    is(s_idle) {
1849      when (inValid) {
1850        stateNext := s_active
1851        uopResNext := inUopInfo.numOfUop
1852      }
1853    }
1854    is(s_active) {
1855      when (thisAllOut) {
1856        when (inValid) {
1857          stateNext := s_active
1858          uopResNext := inUopInfo.numOfUop
1859        }.otherwise {
1860          stateNext := s_idle
1861          uopResNext := 0.U
1862        }
1863      }.otherwise {
1864        stateNext := s_active
1865        uopResNext := uopRes - readyCounter
1866      }
1867    }
1868  }
1869
1870  state := Mux(io.redirect, s_idle, stateNext)
1871  uopRes := Mux(io.redirect, 0.U, uopResNext)
1872
1873  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1874
1875  for(i <- 0 until RenameWidth) {
1876    outValids(i) := complexNum > i.U
1877    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1878  }
1879
1880  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1881  inReady := state === s_idle || state === s_active && thisAllOut
1882
1883//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1884//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1885//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1886//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1887//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1888//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1889//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1890//
1891//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1892//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1893//    0.U)
1894//  validToRename.zipWithIndex.foreach{
1895//    case(dst, i) =>
1896//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1897//      dst := MuxCase(false.B, Seq(
1898//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1899//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1900//      ).toSeq)
1901//  }
1902//
1903//  readyToIBuf.zipWithIndex.foreach {
1904//    case (dst, i) =>
1905//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1906//      dst := MuxCase(true.B, Seq(
1907//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1908//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1909//      ).toSeq)
1910//  }
1911//
1912//  io.deq.decodedInsts := decodedInsts
1913//  io.deq.complexNum := complexNum
1914//  io.deq.validToRename := validToRename
1915//  io.deq.readyToIBuf := readyToIBuf
1916}
1917