xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 82674533125d3d049f50148b1d9e215e1463f136)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
82  val MAX_INDEXED_LS_UOPNUM = 64
83}
84
85class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
86  val redirect = Input(Bool())
87  val csrCtrl = Input(new CustomCSRCtrlIO)
88  val vtypeBypass = Input(new VType)
89  // When the first inst in decode vector is complex inst, pass it in
90  val in = Flipped(DecoupledIO(new Bundle {
91    val simpleDecodedInst = new DecodedInst
92    val uopInfo = new UopInfo
93  }))
94  val out = new Bundle {
95    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
96  }
97  val complexNum = Output(UInt(3.W))
98}
99
100/**
101  * @author zly
102  */
103class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
104  val io = IO(new DecodeUnitCompIO)
105
106  // alias
107  private val inReady = io.in.ready
108  private val inValid = io.in.valid
109  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
110  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
111  private val inUopInfo = io.in.bits.uopInfo
112  private val outValids = io.out.complexDecodedInsts.map(_.valid)
113  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
114  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
115  private val outComplexNum = io.complexNum
116
117  val maxUopSize = MaxUopSize
118  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
119    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
120      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
121    }.elsewhen(inInstFields.RS1 === 0.U) {
122      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
123    }
124  }
125
126  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
127  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
128  //input bits
129  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
130
131  val src1 = Cat(0.U(1.W), instFields.RS1)
132  val src2 = Cat(0.U(1.W), instFields.RS2)
133  val dest = Cat(0.U(1.W), instFields.RD)
134
135  val nf    = instFields.NF
136  val width = instFields.WIDTH(1, 0)
137
138  //output of DecodeUnit
139  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
140  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
141  val lmul = Wire(UInt(4.W))
142  val isVsetSimple = Wire(Bool())
143
144  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
145  indexedLSRegOffset.map(_.src := 0.U)
146
147  //pre decode
148  lmul := latchedUopInfo.lmul
149  isVsetSimple := latchedInst.isVset
150  val vlmulReg = latchedInst.vpu.vlmul
151  val vsewReg = latchedInst.vpu.vsew
152
153  //Type of uop Div
154  val typeOfSplit = latchedInst.uopSplitType
155  val src1Type = latchedInst.srcType(0)
156  val src1IsImm = src1Type === SrcType.imm
157  val src1IsFp = src1Type === SrcType.fp
158
159  val isVstore = FuType.isVStore(latchedInst.fuType)
160
161  numOfUop := latchedUopInfo.numOfUop
162  numOfWB := latchedUopInfo.numOfWB
163
164  //uops dispatch
165  val s_idle :: s_active :: Nil = Enum(2)
166  val state = RegInit(s_idle)
167  val stateNext = WireDefault(state)
168  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
169  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
170  val uopResNext = WireInit(uopRes)
171  val e64 = 3.U(2.W)
172  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
173  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
174  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
175
176  //uop div up to maxUopSize
177  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
178  csBundle.foreach { case dst =>
179    dst := latchedInst
180    dst.numUops := latchedUopInfo.numOfUop
181    dst.numWB := latchedUopInfo.numOfWB
182    dst.firstUop := false.B
183    dst.lastUop := false.B
184    dst.vlsInstr := false.B
185  }
186
187  csBundle(0).firstUop := true.B
188  csBundle(numOfUop - 1.U).lastUop := true.B
189
190  switch(typeOfSplit) {
191    is(UopSplitType.VSET) {
192      // In simple decoder, rfWen and vecWen are not set
193      when(isVsetSimple) {
194        // Default
195        // uop0 set rd, never flushPipe
196        csBundle(0).fuType := FuType.vsetiwi.U
197        csBundle(0).flushPipe := false.B
198        csBundle(0).rfWen := true.B
199        // uop1 set vl, vsetvl will flushPipe
200        csBundle(1).ldest := VCONFIG_IDX.U
201        csBundle(1).vecWen := true.B
202        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
203          // write nothing, uop0 is a nop instruction
204          csBundle(0).rfWen := false.B
205          csBundle(0).fpWen := false.B
206          csBundle(0).vecWen := false.B
207          csBundle(1).fuType := FuType.vsetfwf.U
208          csBundle(1).srcType(0) := SrcType.vp
209          csBundle(1).lsrc(0) := VCONFIG_IDX.U
210        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
211          // uop0: mv vtype gpr to vector region
212          csBundle(0).srcType(0) := SrcType.xp
213          csBundle(0).srcType(1) := SrcType.no
214          csBundle(0).lsrc(1) := 0.U
215          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
216          csBundle(0).fuType := FuType.i2v.U
217          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
218          csBundle(0).rfWen := false.B
219          csBundle(0).fpWen := false.B
220          csBundle(0).vecWen := true.B
221          csBundle(0).flushPipe := false.B
222          // uop1: uvsetvcfg_vv
223          csBundle(1).fuType := FuType.vsetfwf.U
224          // vl
225          csBundle(1).srcType(0) := SrcType.vp
226          csBundle(1).lsrc(0) := VCONFIG_IDX.U
227          // vtype
228          csBundle(1).srcType(1) := SrcType.vp
229          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
230          csBundle(1).vecWen := true.B
231          csBundle(1).ldest := VCONFIG_IDX.U
232        }.elsewhen(dest === 0.U) {
233          // write nothing, uop0 is a nop instruction
234          csBundle(0).rfWen := false.B
235          csBundle(0).fpWen := false.B
236          csBundle(0).vecWen := false.B
237        }
238        // use bypass vtype from vtypeGen
239        csBundle(0).vpu.connectVType(io.vtypeBypass)
240        csBundle(1).vpu.connectVType(io.vtypeBypass)
241      }
242    }
243    is(UopSplitType.VEC_VVV) {
244      for (i <- 0 until MAX_VLMUL) {
245        csBundle(i).lsrc(0) := src1 + i.U
246        csBundle(i).lsrc(1) := src2 + i.U
247        csBundle(i).lsrc(2) := dest + i.U
248        csBundle(i).ldest := dest + i.U
249        csBundle(i).uopIdx := i.U
250      }
251    }
252    is(UopSplitType.VEC_VFV) {
253      /*
254      f to vector move
255       */
256      csBundle(0).srcType(0) := SrcType.fp
257      csBundle(0).srcType(1) := SrcType.imm
258      csBundle(0).srcType(2) := SrcType.imm
259      csBundle(0).lsrc(1) := 0.U
260      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
261      csBundle(0).fuType := FuType.f2v.U
262      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
263      csBundle(0).vecWen := true.B
264      csBundle(0).vpu.isReverse := false.B
265      /*
266      LMUL
267       */
268      for (i <- 0 until MAX_VLMUL) {
269        csBundle(i + 1).srcType(0) := SrcType.vp
270        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
271        csBundle(i + 1).lsrc(1) := src2 + i.U
272        csBundle(i + 1).lsrc(2) := dest + i.U
273        csBundle(i + 1).ldest := dest + i.U
274        csBundle(i + 1).uopIdx := i.U
275      }
276    }
277    is(UopSplitType.VEC_EXT2) {
278      for (i <- 0 until MAX_VLMUL / 2) {
279        csBundle(2 * i).lsrc(1) := src2 + i.U
280        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
281        csBundle(2 * i).ldest := dest + (2 * i).U
282        csBundle(2 * i).uopIdx := (2 * i).U
283        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
284        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
285        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
286        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
287      }
288    }
289    is(UopSplitType.VEC_EXT4) {
290      for (i <- 0 until MAX_VLMUL / 4) {
291        csBundle(4 * i).lsrc(1) := src2 + i.U
292        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
293        csBundle(4 * i).ldest := dest + (4 * i).U
294        csBundle(4 * i).uopIdx := (4 * i).U
295        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
296        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
297        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
298        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
299        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
300        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
301        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
302        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
303        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
304        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
305        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
306        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
307      }
308    }
309    is(UopSplitType.VEC_EXT8) {
310      for (i <- 0 until MAX_VLMUL) {
311        csBundle(i).lsrc(1) := src2
312        csBundle(i).lsrc(2) := dest + i.U
313        csBundle(i).ldest := dest + i.U
314        csBundle(i).uopIdx := i.U
315      }
316    }
317    is(UopSplitType.VEC_0XV) {
318      /*
319      i/f to vector move
320       */
321      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
322      csBundle(0).srcType(1) := SrcType.imm
323      csBundle(0).srcType(2) := SrcType.imm
324      csBundle(0).lsrc(1) := 0.U
325      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
326      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
327      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
328      csBundle(0).rfWen := false.B
329      csBundle(0).fpWen := false.B
330      csBundle(0).vecWen := true.B
331      /*
332      vmv.s.x
333       */
334      csBundle(1).srcType(0) := SrcType.vp
335      csBundle(1).srcType(1) := SrcType.imm
336      csBundle(1).srcType(2) := SrcType.vp
337      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
338      csBundle(1).lsrc(1) := 0.U
339      csBundle(1).lsrc(2) := dest
340      csBundle(1).ldest := dest
341      csBundle(1).rfWen := false.B
342      csBundle(1).fpWen := false.B
343      csBundle(1).vecWen := true.B
344      csBundle(1).uopIdx := 0.U
345    }
346    is(UopSplitType.VEC_VXV) {
347      /*
348      i to vector move
349       */
350      csBundle(0).srcType(0) := SrcType.reg
351      csBundle(0).srcType(1) := SrcType.imm
352      csBundle(0).srcType(2) := SrcType.imm
353      csBundle(0).lsrc(1) := 0.U
354      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
355      csBundle(0).fuType := FuType.i2v.U
356      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
357      csBundle(0).vecWen := true.B
358      csBundle(0).vpu.isReverse := false.B
359      /*
360      LMUL
361       */
362      for (i <- 0 until MAX_VLMUL) {
363        csBundle(i + 1).srcType(0) := SrcType.vp
364        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
365        csBundle(i + 1).lsrc(1) := src2 + i.U
366        csBundle(i + 1).lsrc(2) := dest + i.U
367        csBundle(i + 1).ldest := dest + i.U
368        csBundle(i + 1).uopIdx := i.U
369      }
370    }
371    is(UopSplitType.VEC_VVW) {
372      for (i <- 0 until MAX_VLMUL / 2) {
373        csBundle(2 * i).lsrc(0) := src1 + i.U
374        csBundle(2 * i).lsrc(1) := src2 + i.U
375        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
376        csBundle(2 * i).ldest := dest + (2 * i).U
377        csBundle(2 * i).uopIdx := (2 * i).U
378        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
379        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
380        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
381        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
382        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
383      }
384    }
385    is(UopSplitType.VEC_VFW) {
386      /*
387      f to vector move
388       */
389      csBundle(0).srcType(0) := SrcType.fp
390      csBundle(0).srcType(1) := SrcType.imm
391      csBundle(0).srcType(2) := SrcType.imm
392      csBundle(0).lsrc(1) := 0.U
393      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
394      csBundle(0).fuType := FuType.f2v.U
395      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
396      csBundle(0).rfWen := false.B
397      csBundle(0).fpWen := false.B
398      csBundle(0).vecWen := true.B
399
400      for (i <- 0 until MAX_VLMUL / 2) {
401        csBundle(2 * i + 1).srcType(0) := SrcType.vp
402        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
403        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
404        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
405        csBundle(2 * i + 1).ldest := dest + (2 * i).U
406        csBundle(2 * i + 1).uopIdx := (2 * i).U
407        csBundle(2 * i + 2).srcType(0) := SrcType.vp
408        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
409        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
410        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
411        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
412        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
413      }
414    }
415    is(UopSplitType.VEC_WVW) {
416      for (i <- 0 until MAX_VLMUL / 2) {
417        csBundle(2 * i).lsrc(0) := src1 + i.U
418        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
419        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
420        csBundle(2 * i).ldest := dest + (2 * i).U
421        csBundle(2 * i).uopIdx := (2 * i).U
422        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
423        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
424        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
425        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
426        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
427      }
428    }
429    is(UopSplitType.VEC_VXW) {
430      /*
431      i to vector move
432       */
433      csBundle(0).srcType(0) := SrcType.reg
434      csBundle(0).srcType(1) := SrcType.imm
435      csBundle(0).srcType(2) := SrcType.imm
436      csBundle(0).lsrc(1) := 0.U
437      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
438      csBundle(0).fuType := FuType.i2v.U
439      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
440      csBundle(0).vecWen := true.B
441
442      for (i <- 0 until MAX_VLMUL / 2) {
443        csBundle(2 * i + 1).srcType(0) := SrcType.vp
444        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
445        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
446        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
447        csBundle(2 * i + 1).ldest := dest + (2 * i).U
448        csBundle(2 * i + 1).uopIdx := (2 * i).U
449        csBundle(2 * i + 2).srcType(0) := SrcType.vp
450        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
451        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
452        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
453        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
454        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
455      }
456    }
457    is(UopSplitType.VEC_WXW) {
458      /*
459      i to vector move
460       */
461      csBundle(0).srcType(0) := SrcType.reg
462      csBundle(0).srcType(1) := SrcType.imm
463      csBundle(0).srcType(2) := SrcType.imm
464      csBundle(0).lsrc(1) := 0.U
465      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
466      csBundle(0).fuType := FuType.i2v.U
467      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
468      csBundle(0).vecWen := true.B
469
470      for (i <- 0 until MAX_VLMUL / 2) {
471        csBundle(2 * i + 1).srcType(0) := SrcType.vp
472        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
473        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
474        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
475        csBundle(2 * i + 1).ldest := dest + (2 * i).U
476        csBundle(2 * i + 1).uopIdx := (2 * i).U
477        csBundle(2 * i + 2).srcType(0) := SrcType.vp
478        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
479        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
480        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
481        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
482        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
483      }
484    }
485    is(UopSplitType.VEC_WVV) {
486      for (i <- 0 until MAX_VLMUL / 2) {
487
488        csBundle(2 * i).lsrc(0) := src1 + i.U
489        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
490        csBundle(2 * i).lsrc(2) := dest + i.U
491        csBundle(2 * i).ldest := dest + i.U
492        csBundle(2 * i).uopIdx := (2 * i).U
493        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
494        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
495        csBundle(2 * i + 1).lsrc(2) := dest + i.U
496        csBundle(2 * i + 1).ldest := dest + i.U
497        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
498      }
499    }
500    is(UopSplitType.VEC_WFW) {
501      /*
502      f to vector move
503       */
504      csBundle(0).srcType(0) := SrcType.fp
505      csBundle(0).srcType(1) := SrcType.imm
506      csBundle(0).srcType(2) := SrcType.imm
507      csBundle(0).lsrc(1) := 0.U
508      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
509      csBundle(0).fuType := FuType.f2v.U
510      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
511      csBundle(0).rfWen := false.B
512      csBundle(0).fpWen := false.B
513      csBundle(0).vecWen := true.B
514
515      for (i <- 0 until MAX_VLMUL / 2) {
516        csBundle(2 * i + 1).srcType(0) := SrcType.vp
517        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
518        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
519        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
520        csBundle(2 * i + 1).ldest := dest + (2 * i).U
521        csBundle(2 * i + 1).uopIdx := (2 * i).U
522        csBundle(2 * i + 2).srcType(0) := SrcType.vp
523        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
524        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
525        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
526        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
527        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
528      }
529    }
530    is(UopSplitType.VEC_WXV) {
531      /*
532      i to vector move
533       */
534      csBundle(0).srcType(0) := SrcType.reg
535      csBundle(0).srcType(1) := SrcType.imm
536      csBundle(0).srcType(2) := SrcType.imm
537      csBundle(0).lsrc(1) := 0.U
538      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
539      csBundle(0).fuType := FuType.i2v.U
540      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
541      csBundle(0).vecWen := true.B
542
543      for (i <- 0 until MAX_VLMUL / 2) {
544        csBundle(2 * i + 1).srcType(0) := SrcType.vp
545        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
546        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
547        csBundle(2 * i + 1).lsrc(2) := dest + i.U
548        csBundle(2 * i + 1).ldest := dest + i.U
549        csBundle(2 * i + 1).uopIdx := (2 * i).U
550        csBundle(2 * i + 2).srcType(0) := SrcType.vp
551        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
552        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
553        csBundle(2 * i + 2).lsrc(2) := dest + i.U
554        csBundle(2 * i + 2).ldest := dest + i.U
555        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
556      }
557    }
558    is(UopSplitType.VEC_VVM) {
559      csBundle(0).lsrc(2) := dest
560      csBundle(0).ldest := dest
561      csBundle(0).uopIdx := 0.U
562      for (i <- 1 until MAX_VLMUL) {
563        csBundle(i).lsrc(0) := src1 + i.U
564        csBundle(i).lsrc(1) := src2 + i.U
565        csBundle(i).lsrc(2) := dest
566        csBundle(i).ldest := dest
567        csBundle(i).uopIdx := i.U
568      }
569    }
570    is(UopSplitType.VEC_VFM) {
571      /*
572      f to vector move
573       */
574      csBundle(0).srcType(0) := SrcType.fp
575      csBundle(0).srcType(1) := SrcType.imm
576      csBundle(0).srcType(2) := SrcType.imm
577      csBundle(0).lsrc(1) := 0.U
578      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
579      csBundle(0).fuType := FuType.f2v.U
580      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
581      csBundle(0).rfWen := false.B
582      csBundle(0).fpWen := false.B
583      csBundle(0).vecWen := true.B
584      //LMUL
585      csBundle(1).srcType(0) := SrcType.vp
586      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
587      csBundle(1).lsrc(2) := dest
588      csBundle(1).ldest := dest
589      csBundle(1).uopIdx := 0.U
590      for (i <- 1 until MAX_VLMUL) {
591        csBundle(i + 1).srcType(0) := SrcType.vp
592        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
593        csBundle(i + 1).lsrc(1) := src2 + i.U
594        csBundle(i + 1).lsrc(2) := dest
595        csBundle(i + 1).ldest := dest
596        csBundle(i + 1).uopIdx := i.U
597      }
598      csBundle(numOfUop - 1.U).ldest := dest
599    }
600    is(UopSplitType.VEC_VXM) {
601      /*
602      i to vector move
603       */
604      csBundle(0).srcType(0) := SrcType.reg
605      csBundle(0).srcType(1) := SrcType.imm
606      csBundle(0).srcType(2) := SrcType.imm
607      csBundle(0).lsrc(1) := 0.U
608      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
609      csBundle(0).fuType := FuType.i2v.U
610      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
611      csBundle(0).vecWen := true.B
612      //LMUL
613      csBundle(1).srcType(0) := SrcType.vp
614      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
615      csBundle(1).lsrc(2) := dest
616      csBundle(1).ldest := dest
617      csBundle(1).uopIdx := 0.U
618      for (i <- 1 until MAX_VLMUL) {
619        csBundle(i + 1).srcType(0) := SrcType.vp
620        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
621        csBundle(i + 1).lsrc(1) := src2 + i.U
622        csBundle(i + 1).lsrc(2) := dest
623        csBundle(i + 1).ldest := dest
624        csBundle(i + 1).uopIdx := i.U
625      }
626      csBundle(numOfUop - 1.U).ldest := dest
627    }
628    is(UopSplitType.VEC_SLIDE1UP) {
629      /*
630      i to vector move
631       */
632      csBundle(0).srcType(0) := SrcType.reg
633      csBundle(0).srcType(1) := SrcType.imm
634      csBundle(0).srcType(2) := SrcType.imm
635      csBundle(0).lsrc(1) := 0.U
636      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
637      csBundle(0).fuType := FuType.i2v.U
638      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
639      csBundle(0).vecWen := true.B
640      //LMUL
641      csBundle(1).srcType(0) := SrcType.vp
642      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
643      csBundle(1).lsrc(2) := dest
644      csBundle(1).ldest := dest
645      csBundle(1).uopIdx := 0.U
646      for (i <- 1 until MAX_VLMUL) {
647        csBundle(i + 1).srcType(0) := SrcType.vp
648        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
649        csBundle(i + 1).lsrc(1) := src2 + i.U
650        csBundle(i + 1).lsrc(2) := dest + i.U
651        csBundle(i + 1).ldest := dest + i.U
652        csBundle(i + 1).uopIdx := i.U
653      }
654    }
655    is(UopSplitType.VEC_FSLIDE1UP) {
656      /*
657      f to vector move
658       */
659      csBundle(0).srcType(0) := SrcType.fp
660      csBundle(0).srcType(1) := SrcType.imm
661      csBundle(0).srcType(2) := SrcType.imm
662      csBundle(0).lsrc(1) := 0.U
663      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
664      csBundle(0).fuType := FuType.f2v.U
665      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
666      csBundle(0).rfWen := false.B
667      csBundle(0).fpWen := false.B
668      csBundle(0).vecWen := true.B
669      //LMUL
670      csBundle(1).srcType(0) := SrcType.vp
671      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
672      csBundle(1).lsrc(1) := src2
673      csBundle(1).lsrc(2) := dest
674      csBundle(1).ldest := dest
675      csBundle(1).uopIdx := 0.U
676      for (i <- 1 until MAX_VLMUL) {
677        csBundle(i + 1).srcType(0) := SrcType.vp
678        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
679        csBundle(i + 1).lsrc(1) := src2 + i.U
680        csBundle(i + 1).lsrc(2) := dest + i.U
681        csBundle(i + 1).ldest := dest + i.U
682        csBundle(i + 1).uopIdx := i.U
683      }
684    }
685    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
686      /*
687      i to vector move
688       */
689      csBundle(0).srcType(0) := SrcType.reg
690      csBundle(0).srcType(1) := SrcType.imm
691      csBundle(0).srcType(2) := SrcType.imm
692      csBundle(0).lsrc(1) := 0.U
693      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
694      csBundle(0).fuType := FuType.i2v.U
695      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
696      csBundle(0).vecWen := true.B
697      //LMUL
698      for (i <- 0 until MAX_VLMUL) {
699        csBundle(2 * i + 1).srcType(0) := SrcType.vp
700        csBundle(2 * i + 1).srcType(1) := SrcType.vp
701        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
702        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
703        csBundle(2 * i + 1).lsrc(2) := dest + i.U
704        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
705        csBundle(2 * i + 1).uopIdx := (2 * i).U
706        if (2 * i + 2 < MAX_VLMUL * 2) {
707          csBundle(2 * i + 2).srcType(0) := SrcType.vp
708          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
709          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
710          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
711          csBundle(2 * i + 2).ldest := dest + i.U
712          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
713        }
714      }
715      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
716      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
717      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
718    }
719    is(UopSplitType.VEC_FSLIDE1DOWN) {
720      /*
721      f to vector move
722       */
723      csBundle(0).srcType(0) := SrcType.fp
724      csBundle(0).srcType(1) := SrcType.imm
725      csBundle(0).srcType(2) := SrcType.imm
726      csBundle(0).lsrc(1) := 0.U
727      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
728      csBundle(0).fuType := FuType.f2v.U
729      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
730      csBundle(0).rfWen := false.B
731      csBundle(0).fpWen := false.B
732      csBundle(0).vecWen := true.B
733      //LMUL
734      for (i <- 0 until MAX_VLMUL) {
735        csBundle(2 * i + 1).srcType(0) := SrcType.vp
736        csBundle(2 * i + 1).srcType(1) := SrcType.vp
737        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
738        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
739        csBundle(2 * i + 1).lsrc(2) := dest + i.U
740        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
741        csBundle(2 * i + 1).uopIdx := (2 * i).U
742        if (2 * i + 2 < MAX_VLMUL * 2) {
743          csBundle(2 * i + 2).srcType(0) := SrcType.vp
744          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
745          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
746          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
747          csBundle(2 * i + 2).ldest := dest + i.U
748          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
749        }
750      }
751      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
752      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
753      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
754    }
755    is(UopSplitType.VEC_VRED) {
756      when(vlmulReg === "b001".U) {
757        csBundle(0).srcType(2) := SrcType.DC
758        csBundle(0).lsrc(0) := src2 + 1.U
759        csBundle(0).lsrc(1) := src2
760        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
761        csBundle(0).uopIdx := 0.U
762      }
763      when(vlmulReg === "b010".U) {
764        csBundle(0).srcType(2) := SrcType.DC
765        csBundle(0).lsrc(0) := src2 + 1.U
766        csBundle(0).lsrc(1) := src2
767        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
768        csBundle(0).uopIdx := 0.U
769
770        csBundle(1).srcType(2) := SrcType.DC
771        csBundle(1).lsrc(0) := src2 + 3.U
772        csBundle(1).lsrc(1) := src2 + 2.U
773        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
774        csBundle(1).uopIdx := 1.U
775
776        csBundle(2).srcType(2) := SrcType.DC
777        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
778        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
779        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
780        csBundle(2).uopIdx := 2.U
781      }
782      when(vlmulReg === "b011".U) {
783        for (i <- 0 until MAX_VLMUL) {
784          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
785            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
786            csBundle(i).lsrc(1) := src2 + (i * 2).U
787            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
788          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
789            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
790            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
791            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
792          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
793            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
794            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
795            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
796          }
797          csBundle(i).srcType(2) := SrcType.DC
798          csBundle(i).uopIdx := i.U
799        }
800      }
801      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
802        /*
803         * 2 <= vlmul <= 8
804         */
805        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
806        csBundle(numOfUop - 1.U).lsrc(0) := src1
807        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
808        csBundle(numOfUop - 1.U).lsrc(2) := dest
809        csBundle(numOfUop - 1.U).ldest := dest
810        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
811      }
812    }
813    is(UopSplitType.VEC_VFRED) {
814      val vlmul = vlmulReg
815      val vsew = vsewReg
816      when(vlmul === VLmul.m8){
817        for (i <- 0 until 4) {
818          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
819          csBundle(i).lsrc(1) := src2 + (i * 2).U
820          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
821          csBundle(i).uopIdx := i.U
822        }
823        for (i <- 4 until 6) {
824          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
825          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
826          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
827          csBundle(i).uopIdx := i.U
828        }
829        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
830        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
831        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
832        csBundle(6).uopIdx := 6.U
833        when(vsew === VSew.e64) {
834          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
835          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
836          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
837          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
838          csBundle(7).uopIdx := 7.U
839          csBundle(8).lsrc(0) := src1
840          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
841          csBundle(8).ldest := dest
842          csBundle(8).uopIdx := 8.U
843        }
844        when(vsew === VSew.e32) {
845          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
846          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
847          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
848          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
849          csBundle(7).uopIdx := 7.U
850          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
851          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
852          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
853          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
854          csBundle(8).uopIdx := 8.U
855          csBundle(9).lsrc(0) := src1
856          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
857          csBundle(9).ldest := dest
858          csBundle(9).uopIdx := 9.U
859        }
860        when(vsew === VSew.e16) {
861          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
862          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
863          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
864          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
865          csBundle(7).uopIdx := 7.U
866          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
867          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
868          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
869          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
870          csBundle(8).uopIdx := 8.U
871          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
872          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
873          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
874          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
875          csBundle(9).uopIdx := 9.U
876          csBundle(10).lsrc(0) := src1
877          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
878          csBundle(10).ldest := dest
879          csBundle(10).uopIdx := 10.U
880        }
881      }
882      when(vlmul === VLmul.m4) {
883        for (i <- 0 until 2) {
884          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
885          csBundle(i).lsrc(1) := src2 + (i * 2).U
886          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
887          csBundle(i).uopIdx := i.U
888        }
889        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
890        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
891        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
892        csBundle(2).uopIdx := 2.U
893        when(vsew === VSew.e64) {
894          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
895          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
896          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
897          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
898          csBundle(3).uopIdx := 3.U
899          csBundle(4).lsrc(0) := src1
900          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
901          csBundle(4).ldest := dest
902          csBundle(4).uopIdx := 4.U
903        }
904        when(vsew === VSew.e32) {
905          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
906          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
907          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
908          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
909          csBundle(3).uopIdx := 3.U
910          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
911          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
912          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
913          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
914          csBundle(4).uopIdx := 4.U
915          csBundle(5).lsrc(0) := src1
916          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
917          csBundle(5).ldest := dest
918          csBundle(5).uopIdx := 5.U
919        }
920        when(vsew === VSew.e16) {
921          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
922          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
923          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
924          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
925          csBundle(3).uopIdx := 3.U
926          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
927          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
928          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
929          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
930          csBundle(4).uopIdx := 4.U
931          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
932          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
933          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
934          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
935          csBundle(5).uopIdx := 5.U
936          csBundle(6).lsrc(0) := src1
937          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
938          csBundle(6).ldest := dest
939          csBundle(6).uopIdx := 6.U
940        }
941      }
942      when(vlmul === VLmul.m2) {
943        csBundle(0).lsrc(0) := src2 + 1.U
944        csBundle(0).lsrc(1) := src2 + 0.U
945        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
946        csBundle(0).uopIdx := 0.U
947        when(vsew === VSew.e64) {
948          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
949          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
950          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
951          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
952          csBundle(1).uopIdx := 1.U
953          csBundle(2).lsrc(0) := src1
954          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
955          csBundle(2).ldest := dest
956          csBundle(2).uopIdx := 2.U
957        }
958        when(vsew === VSew.e32) {
959          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
960          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
961          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
962          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
963          csBundle(1).uopIdx := 1.U
964          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
965          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
966          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
967          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
968          csBundle(2).uopIdx := 2.U
969          csBundle(3).lsrc(0) := src1
970          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
971          csBundle(3).ldest := dest
972          csBundle(3).uopIdx := 3.U
973        }
974        when(vsew === VSew.e16) {
975          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
976          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
977          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
978          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
979          csBundle(1).uopIdx := 1.U
980          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
981          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
982          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
983          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
984          csBundle(2).uopIdx := 2.U
985          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
986          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
987          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
988          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
989          csBundle(3).uopIdx := 3.U
990          csBundle(4).lsrc(0) := src1
991          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
992          csBundle(4).ldest := dest
993          csBundle(4).uopIdx := 4.U
994        }
995      }
996      when(vlmul === VLmul.m1) {
997        when(vsew === VSew.e64) {
998          csBundle(0).lsrc(0) := src2
999          csBundle(0).lsrc(1) := src2
1000          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1001          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1002          csBundle(0).uopIdx := 0.U
1003          csBundle(1).lsrc(0) := src1
1004          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1005          csBundle(1).ldest := dest
1006          csBundle(1).uopIdx := 1.U
1007        }
1008        when(vsew === VSew.e32) {
1009          csBundle(0).lsrc(0) := src2
1010          csBundle(0).lsrc(1) := src2
1011          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1012          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1013          csBundle(0).uopIdx := 0.U
1014          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1015          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1016          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1017          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1018          csBundle(1).uopIdx := 1.U
1019          csBundle(2).lsrc(0) := src1
1020          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1021          csBundle(2).ldest := dest
1022          csBundle(2).uopIdx := 2.U
1023        }
1024        when(vsew === VSew.e16) {
1025          csBundle(0).lsrc(0) := src2
1026          csBundle(0).lsrc(1) := src2
1027          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1028          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1029          csBundle(0).uopIdx := 0.U
1030          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1031          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1032          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1033          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1034          csBundle(1).uopIdx := 1.U
1035          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1036          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1037          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1038          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1039          csBundle(2).uopIdx := 2.U
1040          csBundle(3).lsrc(0) := src1
1041          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1042          csBundle(3).ldest := dest
1043          csBundle(3).uopIdx := 3.U
1044        }
1045      }
1046      when(vlmul === VLmul.mf2) {
1047        when(vsew === VSew.e32) {
1048          csBundle(0).lsrc(0) := src2
1049          csBundle(0).lsrc(1) := src2
1050          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1051          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1052          csBundle(0).uopIdx := 0.U
1053          csBundle(1).lsrc(0) := src1
1054          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1055          csBundle(1).ldest := dest
1056          csBundle(1).uopIdx := 1.U
1057        }
1058        when(vsew === VSew.e16) {
1059          csBundle(0).lsrc(0) := src2
1060          csBundle(0).lsrc(1) := src2
1061          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1062          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1063          csBundle(0).uopIdx := 0.U
1064          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1065          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1066          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1067          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1068          csBundle(1).uopIdx := 1.U
1069          csBundle(2).lsrc(0) := src1
1070          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1071          csBundle(2).ldest := dest
1072          csBundle(2).uopIdx := 2.U
1073        }
1074      }
1075      when(vlmul === VLmul.mf4) {
1076        when(vsew === VSew.e16) {
1077          csBundle(0).lsrc(0) := src2
1078          csBundle(0).lsrc(1) := src2
1079          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1080          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1081          csBundle(0).uopIdx := 0.U
1082          csBundle(1).lsrc(0) := src1
1083          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1084          csBundle(1).ldest := dest
1085          csBundle(1).uopIdx := 1.U
1086        }
1087      }
1088    }
1089
1090    is(UopSplitType.VEC_VFREDOSUM) {
1091      import yunsuan.VfaluType
1092      val vlmul = vlmulReg
1093      val vsew = vsewReg
1094      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1095      when(vlmul === VLmul.m8) {
1096        when(vsew === VSew.e64) {
1097          val vlmax = 16
1098          for (i <- 0 until vlmax) {
1099            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1100            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1101            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1102            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1103            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1104            csBundle(i).uopIdx := i.U
1105          }
1106        }
1107        when(vsew === VSew.e32) {
1108          val vlmax = 32
1109          for (i <- 0 until vlmax) {
1110            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1111            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1112            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1113            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1114            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1115            csBundle(i).uopIdx := i.U
1116          }
1117        }
1118        when(vsew === VSew.e16) {
1119          val vlmax = 64
1120          for (i <- 0 until vlmax) {
1121            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1122            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1123            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1124            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1125            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1126            csBundle(i).uopIdx := i.U
1127          }
1128        }
1129      }
1130      when(vlmul === VLmul.m4) {
1131        when(vsew === VSew.e64) {
1132          val vlmax = 8
1133          for (i <- 0 until vlmax) {
1134            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1135            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1136            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1137            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1138            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1139            csBundle(i).uopIdx := i.U
1140          }
1141        }
1142        when(vsew === VSew.e32) {
1143          val vlmax = 16
1144          for (i <- 0 until vlmax) {
1145            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1146            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1149            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1150            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1151            csBundle(i).uopIdx := i.U
1152          }
1153        }
1154        when(vsew === VSew.e16) {
1155          val vlmax = 32
1156          for (i <- 0 until vlmax) {
1157            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1158            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1161            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1162            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1163            csBundle(i).uopIdx := i.U
1164          }
1165        }
1166      }
1167      when(vlmul === VLmul.m2) {
1168        when(vsew === VSew.e64) {
1169          val vlmax = 4
1170          for (i <- 0 until vlmax) {
1171            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1172            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1174            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1175            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1176            csBundle(i).uopIdx := i.U
1177          }
1178        }
1179        when(vsew === VSew.e32) {
1180          val vlmax = 8
1181          for (i <- 0 until vlmax) {
1182            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1183            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1185            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1186            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1187            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1188            csBundle(i).uopIdx := i.U
1189          }
1190        }
1191        when(vsew === VSew.e16) {
1192          val vlmax = 16
1193          for (i <- 0 until vlmax) {
1194            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1195            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1197            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1198            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1199            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1200            csBundle(i).uopIdx := i.U
1201          }
1202        }
1203      }
1204      when(vlmul === VLmul.m1) {
1205        when(vsew === VSew.e64) {
1206          val vlmax = 2
1207          for (i <- 0 until vlmax) {
1208            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1209            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1211            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1212            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1213            csBundle(i).uopIdx := i.U
1214          }
1215        }
1216        when(vsew === VSew.e32) {
1217          val vlmax = 4
1218          for (i <- 0 until vlmax) {
1219            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1220            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1223            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1224            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1225            csBundle(i).uopIdx := i.U
1226          }
1227        }
1228        when(vsew === VSew.e16) {
1229          val vlmax = 8
1230          for (i <- 0 until vlmax) {
1231            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1232            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1235            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1236            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1237            csBundle(i).uopIdx := i.U
1238          }
1239        }
1240      }
1241      when(vlmul === VLmul.mf2) {
1242        when(vsew === VSew.e32) {
1243          val vlmax = 2
1244          for (i <- 0 until vlmax) {
1245            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1248            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1249            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1250            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1251            csBundle(i).uopIdx := i.U
1252          }
1253        }
1254        when(vsew === VSew.e16) {
1255          val vlmax = 4
1256          for (i <- 0 until vlmax) {
1257            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1261            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1262            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1263            csBundle(i).uopIdx := i.U
1264          }
1265        }
1266      }
1267      when(vlmul === VLmul.mf4) {
1268        when(vsew === VSew.e16) {
1269          val vlmax = 2
1270          for (i <- 0 until vlmax) {
1271            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1272            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1273            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1274            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1275            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1276            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1277            csBundle(i).uopIdx := i.U
1278          }
1279        }
1280      }
1281    }
1282
1283    is(UopSplitType.VEC_SLIDEUP) {
1284      // i to vector move
1285      csBundle(0).srcType(0) := SrcType.reg
1286      csBundle(0).srcType(1) := SrcType.imm
1287      csBundle(0).srcType(2) := SrcType.imm
1288      csBundle(0).lsrc(1) := 0.U
1289      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1290      csBundle(0).fuType := FuType.i2v.U
1291      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1292      csBundle(0).vecWen := true.B
1293      // LMUL
1294      for (i <- 0 until MAX_VLMUL)
1295        for (j <- 0 to i) {
1296          val old_vd = if (j == 0) {
1297            dest + i.U
1298          } else (VECTOR_TMP_REG_LMUL + j).U
1299          val vd = if (j == i) {
1300            dest + i.U
1301          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1302          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1303          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1304          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1305          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1306          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1307          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1308        }
1309    }
1310
1311    is(UopSplitType.VEC_SLIDEDOWN) {
1312      // i to vector move
1313      csBundle(0).srcType(0) := SrcType.reg
1314      csBundle(0).srcType(1) := SrcType.imm
1315      csBundle(0).srcType(2) := SrcType.imm
1316      csBundle(0).lsrc(1) := 0.U
1317      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1318      csBundle(0).fuType := FuType.i2v.U
1319      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1320      csBundle(0).vecWen := true.B
1321      // LMUL
1322      for (i <- 0 until MAX_VLMUL)
1323        for (j <- (0 to i).reverse) {
1324          when(i.U < lmul) {
1325            val old_vd = if (j == 0) {
1326              dest + lmul - 1.U - i.U
1327            } else (VECTOR_TMP_REG_LMUL + j).U
1328            val vd = if (j == i) {
1329              dest + lmul - 1.U - i.U
1330            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1331            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1332            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1333            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1334            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1335            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1336            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1337          }
1338        }
1339    }
1340
1341    is(UopSplitType.VEC_M0X) {
1342      // LMUL
1343      for (i <- 0 until MAX_VLMUL) {
1344        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1345        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1346        csBundle(i).srcType(0) := srcType0
1347        csBundle(i).srcType(1) := SrcType.vp
1348        csBundle(i).rfWen := false.B
1349        csBundle(i).fpWen := false.B
1350        csBundle(i).vecWen := true.B
1351        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1352        csBundle(i).lsrc(1) := src2
1353        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1354        csBundle(i).ldest := ldest
1355        csBundle(i).uopIdx := i.U
1356      }
1357      csBundle(lmul - 1.U).rfWen := true.B
1358      csBundle(lmul - 1.U).fpWen := false.B
1359      csBundle(lmul - 1.U).vecWen := false.B
1360      csBundle(lmul - 1.U).ldest := dest
1361    }
1362
1363    is(UopSplitType.VEC_MVV) {
1364      // LMUL
1365      for (i <- 0 until MAX_VLMUL) {
1366        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1367        csBundle(i * 2 + 0).srcType(0) := srcType0
1368        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1369        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1370        csBundle(i * 2 + 0).lsrc(1) := src2
1371        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1372        csBundle(i * 2 + 0).ldest := dest + i.U
1373        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1374
1375        csBundle(i * 2 + 1).srcType(0) := srcType0
1376        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1377        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1378        csBundle(i * 2 + 1).lsrc(1) := src2
1379        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1380        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1381        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1382      }
1383    }
1384
1385    is(UopSplitType.VEC_M0X_VFIRST) {
1386      // LMUL
1387      csBundle(0).rfWen := true.B
1388      csBundle(0).fpWen := false.B
1389      csBundle(0).vecWen := false.B
1390      csBundle(0).ldest := dest
1391    }
1392    is(UopSplitType.VEC_VWW) {
1393      for (i <- 0 until MAX_VLMUL*2) {
1394        when(i.U < lmul){
1395          csBundle(i).srcType(2) := SrcType.DC
1396          csBundle(i).lsrc(0) := src2 + i.U
1397          csBundle(i).lsrc(1) := src2 + i.U
1398          // csBundle(i).lsrc(2) := dest + (2 * i).U
1399          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1400          csBundle(i).uopIdx :=  i.U
1401        } otherwise {
1402          csBundle(i).srcType(2) := SrcType.DC
1403          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1404          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1405          // csBundle(i).lsrc(2) := dest + (2 * i).U
1406          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1407          csBundle(i).uopIdx := i.U
1408        }
1409        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1410        csBundle(numOfUop-1.U).lsrc(0) := src1
1411        csBundle(numOfUop-1.U).lsrc(2) := dest
1412        csBundle(numOfUop-1.U).ldest := dest
1413      }
1414    }
1415    is(UopSplitType.VEC_RGATHER) {
1416      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1417        for (i <- 0 until len)
1418          for (j <- 0 until len) {
1419            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1420            // csBundle(i * len + j).srcType(1) := SrcType.vp
1421            // csBundle(i * len + j).srcType(2) := SrcType.vp
1422            csBundle(i * len + j).lsrc(0) := src1 + i.U
1423            csBundle(i * len + j).lsrc(1) := src2 + j.U
1424            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1425            csBundle(i * len + j).lsrc(2) := vd_old
1426            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1427            csBundle(i * len + j).ldest := vd
1428            csBundle(i * len + j).uopIdx := (i * len + j).U
1429          }
1430      }
1431      switch(vlmulReg) {
1432        is("b001".U ){
1433          genCsBundle_VEC_RGATHER(2)
1434        }
1435        is("b010".U ){
1436          genCsBundle_VEC_RGATHER(4)
1437        }
1438        is("b011".U ){
1439          genCsBundle_VEC_RGATHER(8)
1440        }
1441      }
1442    }
1443    is(UopSplitType.VEC_RGATHER_VX) {
1444      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1445        for (i <- 0 until len)
1446          for (j <- 0 until len) {
1447            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1448            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1449            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1450            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1451            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1452            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1453            csBundle(i * len + j + 1).lsrc(2) := vd_old
1454            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1455            csBundle(i * len + j + 1).ldest := vd
1456            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1457          }
1458      }
1459      // i to vector move
1460      csBundle(0).srcType(0) := SrcType.reg
1461      csBundle(0).srcType(1) := SrcType.imm
1462      csBundle(0).srcType(2) := SrcType.imm
1463      csBundle(0).lsrc(1) := 0.U
1464      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1465      csBundle(0).fuType := FuType.i2v.U
1466      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1467      csBundle(0).rfWen := false.B
1468      csBundle(0).fpWen := false.B
1469      csBundle(0).vecWen := true.B
1470      genCsBundle_RGATHER_VX(1)
1471      switch(vlmulReg) {
1472        is("b001".U ){
1473          genCsBundle_RGATHER_VX(2)
1474        }
1475        is("b010".U ){
1476          genCsBundle_RGATHER_VX(4)
1477        }
1478        is("b011".U ){
1479          genCsBundle_RGATHER_VX(8)
1480        }
1481      }
1482    }
1483    is(UopSplitType.VEC_RGATHEREI16) {
1484      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1485        for (i <- 0 until len)
1486          for (j <- 0 until len) {
1487            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1488            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1489            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1490            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1491            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1492            csBundle((i * len + j)*2+0).ldest := vd0
1493            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1494            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1495            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1496            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1497            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1498            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1499            csBundle((i * len + j)*2+1).ldest := vd1
1500            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1501          }
1502      }
1503      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1504        for (i <- 0 until len)
1505          for (j <- 0 until len) {
1506            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1507            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1508            csBundle(i * len + j).lsrc(0) := src1 + i.U
1509            csBundle(i * len + j).lsrc(1) := src2 + j.U
1510            csBundle(i * len + j).lsrc(2) := vd_old
1511            csBundle(i * len + j).ldest := vd
1512            csBundle(i * len + j).uopIdx := (i * len + j).U
1513          }
1514      }
1515      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1516        for (i <- 0 until len)
1517          for (j <- 0 until len) {
1518            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1519            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1520            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1521            csBundle(i * len + j).lsrc(1) := src2 + j.U
1522            csBundle(i * len + j).lsrc(2) := vd_old
1523            csBundle(i * len + j).ldest := vd
1524            csBundle(i * len + j).uopIdx := (i * len + j).U
1525          }
1526      }
1527      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1528        for (i <- 0 until len)
1529          for (j <- 0 until len) {
1530            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1531            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1532            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1533            csBundle(i * len + j).lsrc(1) := src2 + j.U
1534            csBundle(i * len + j).lsrc(2) := vd_old
1535            csBundle(i * len + j).ldest := vd
1536            csBundle(i * len + j).uopIdx := (i * len + j).U
1537          }
1538      }
1539      when(!vsewReg.orR){
1540        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1541      }.elsewhen(vsewReg === VSew.e32){
1542        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1543      }.elsewhen(vsewReg === VSew.e64){
1544        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1545      }.otherwise{
1546        genCsBundle_VEC_RGATHEREI16(1)
1547      }
1548      switch(vlmulReg) {
1549        is("b001".U) {
1550          when(!vsewReg.orR) {
1551            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1552          }.elsewhen(vsewReg === VSew.e32){
1553            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1554          }.elsewhen(vsewReg === VSew.e64){
1555            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1556          }.otherwise{
1557            genCsBundle_VEC_RGATHEREI16(2)
1558          }
1559        }
1560        is("b010".U) {
1561          when(!vsewReg.orR) {
1562            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1563          }.elsewhen(vsewReg === VSew.e32){
1564            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1565          }.elsewhen(vsewReg === VSew.e64){
1566            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1567          }.otherwise{
1568            genCsBundle_VEC_RGATHEREI16(4)
1569          }
1570        }
1571        is("b011".U) {
1572          when(vsewReg === VSew.e32){
1573            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1574          }.elsewhen(vsewReg === VSew.e64){
1575            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1576          }.otherwise{
1577            genCsBundle_VEC_RGATHEREI16(8)
1578          }
1579        }
1580      }
1581    }
1582    is(UopSplitType.VEC_COMPRESS) {
1583      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1584        for (i <- 0 until len) {
1585          val jlen = if (i == len-1) i+1 else i+2
1586          for (j <- 0 until jlen) {
1587            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1588            val vd = if(i==len-1) (dest + j.U) else {
1589              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1590            }
1591            val src13Type = if (j == i+1) DontCare else SrcType.vp
1592            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1593            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1594            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1595            csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp
1596            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1597            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1598            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1599            csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1600            csBundle(i*(i+3)/2 + j).ldest := vd
1601            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1602          }
1603        }
1604      }
1605      switch(vlmulReg) {
1606        is("b001".U ){
1607          genCsBundle_VEC_COMPRESS(2)
1608        }
1609        is("b010".U ){
1610          genCsBundle_VEC_COMPRESS(4)
1611        }
1612        is("b011".U ){
1613          genCsBundle_VEC_COMPRESS(8)
1614        }
1615      }
1616    }
1617    is(UopSplitType.VEC_MVNR) {
1618      for (i <- 0 until MAX_VLMUL) {
1619        csBundle(i).lsrc(0) := src1 + i.U
1620        csBundle(i).lsrc(1) := src2 + i.U
1621        csBundle(i).lsrc(2) := dest + i.U
1622        csBundle(i).ldest := dest + i.U
1623        csBundle(i).uopIdx := i.U
1624      }
1625    }
1626    is(UopSplitType.VEC_US_LDST) {
1627      /*
1628      FMV.D.X
1629       */
1630      csBundle(0).srcType(0) := SrcType.reg
1631      csBundle(0).srcType(1) := SrcType.imm
1632      csBundle(0).lsrc(1) := 0.U
1633      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1634      csBundle(0).fuType := FuType.i2v.U
1635      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1636      csBundle(0).rfWen := false.B
1637      csBundle(0).fpWen := false.B
1638      csBundle(0).vecWen := true.B
1639      csBundle(0).vlsInstr := true.B
1640      //LMUL
1641      for (i <- 0 until MAX_VLMUL) {
1642        csBundle(i + 1).srcType(0) := SrcType.vp
1643        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1644        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1645        csBundle(i + 1).ldest := dest + i.U
1646        csBundle(i + 1).uopIdx := i.U
1647        csBundle(i + 1).vlsInstr := true.B
1648      }
1649      csBundle.head.waitForward := isUsSegment
1650      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1651    }
1652    is(UopSplitType.VEC_S_LDST) {
1653      /*
1654      FMV.D.X
1655       */
1656      csBundle(0).srcType(0) := SrcType.reg
1657      csBundle(0).srcType(1) := SrcType.imm
1658      csBundle(0).lsrc(1) := 0.U
1659      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1660      csBundle(0).fuType := FuType.i2v.U
1661      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1662      csBundle(0).rfWen := false.B
1663      csBundle(0).fpWen := false.B
1664      csBundle(0).vecWen := true.B
1665      csBundle(0).vlsInstr := true.B
1666
1667      csBundle(1).srcType(0) := SrcType.reg
1668      csBundle(1).srcType(1) := SrcType.imm
1669      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1670      csBundle(1).lsrc(1) := 0.U
1671      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1672      csBundle(1).fuType := FuType.i2v.U
1673      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1674      csBundle(1).rfWen := false.B
1675      csBundle(1).fpWen := false.B
1676      csBundle(1).vecWen := true.B
1677      csBundle(1).vlsInstr := true.B
1678
1679      //LMUL
1680      for (i <- 0 until MAX_VLMUL) {
1681        csBundle(i + 2).srcType(0) := SrcType.vp
1682        csBundle(i + 2).srcType(1) := SrcType.vp
1683        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1684        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1685        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1686        csBundle(i + 2).ldest := dest + i.U
1687        csBundle(i + 2).uopIdx := i.U
1688        csBundle(i + 2).vlsInstr := true.B
1689      }
1690      csBundle.head.waitForward := isSdSegment
1691      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1692    }
1693    is(UopSplitType.VEC_I_LDST) {
1694      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1695        for (i <- 0 until MAX_VLMUL) {
1696          val vecWen = if (i < lmul * nf) true.B else false.B
1697          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1698          csBundle(i + 1).srcType(0) := SrcType.vp
1699          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1700          csBundle(i + 1).srcType(1) := SrcType.no
1701          csBundle(i + 1).lsrc(1) := src2 + i.U
1702          csBundle(i + 1).srcType(2) := src2Type
1703          csBundle(i + 1).lsrc(2) := dest + i.U
1704          csBundle(i + 1).ldest := dest + i.U
1705          csBundle(i + 1).rfWen := false.B
1706          csBundle(i + 1).fpWen := false.B
1707          csBundle(i + 1).vecWen := vecWen
1708          csBundle(i + 1).uopIdx := i.U
1709          csBundle(i + 1).vlsInstr := true.B
1710        }
1711      }
1712      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1713        for (i <- 0 until MAX_VLMUL) {
1714          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1715          csBundle(i + 1).srcType(1) := src1Type
1716          csBundle(i + 1).lsrc(1) := src2 + i.U
1717        }
1718      }
1719
1720      val vlmul = vlmulReg
1721      val vsew = Cat(0.U(1.W), vsewReg)
1722      val veew = Cat(0.U(1.W), width)
1723      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1724      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1725        "b001".U -> 1.U,
1726        "b010".U -> 2.U,
1727        "b011".U -> 3.U
1728      ))
1729      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1730        "b001".U -> 1.U,
1731        "b010".U -> 2.U,
1732        "b011".U -> 3.U
1733      ))
1734      csBundle(0).srcType(0) := SrcType.reg
1735      csBundle(0).srcType(1) := SrcType.imm
1736      csBundle(0).lsrc(1) := 0.U
1737      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1738      csBundle(0).fuType := FuType.i2v.U
1739      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1740      csBundle(0).rfWen := false.B
1741      csBundle(0).fpWen := false.B
1742      csBundle(0).vecWen := true.B
1743      csBundle(0).vlsInstr := true.B
1744
1745      //LMUL
1746      when(nf === 0.U) {
1747        for (i <- 0 until MAX_VLMUL) {
1748          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1749          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1750          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1751          csBundle(i + 1).srcType(0) := SrcType.vp
1752          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1753          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1754          csBundle(i + 1).srcType(2) := SrcType.vp
1755          // lsrc2 is old vd
1756          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1757          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1758          csBundle(i + 1).uopIdx := i.U
1759          csBundle(i + 1).vlsInstr := true.B
1760        }
1761      }.otherwise{
1762        // nf > 1, is segment indexed load/store
1763        // gen src0, vd
1764        switch(simple_lmul) {
1765          is(0.U) {
1766            switch(nf) {
1767              is(1.U) {
1768                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1769              }
1770              is(2.U) {
1771                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1772              }
1773              is(3.U) {
1774                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1775              }
1776              is(4.U) {
1777                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1778              }
1779              is(5.U) {
1780                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1781              }
1782              is(6.U) {
1783                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1784              }
1785              is(7.U) {
1786                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1787              }
1788            }
1789          }
1790          is(1.U) {
1791            switch(nf) {
1792              is(1.U) {
1793                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1794              }
1795              is(2.U) {
1796                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1797              }
1798              is(3.U) {
1799                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1800              }
1801            }
1802          }
1803          is(2.U) {
1804            switch(nf) {
1805              is(1.U) {
1806                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1807              }
1808            }
1809          }
1810        }
1811
1812        // gen src1
1813        switch(simple_emul) {
1814          is(0.U) {
1815            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1816          }
1817          is(1.U) {
1818            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1819          }
1820          is(2.U) {
1821            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1822          }
1823          is(3.U) {
1824            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1825          }
1826        }
1827
1828        // when is vstore instructions, not set vecwen
1829        when(isVstore) {
1830          for (i <- 0 until MAX_VLMUL) {
1831            csBundle(i + 1).vecWen := false.B
1832          }
1833        }
1834      }
1835      csBundle.head.waitForward := isIxSegment
1836      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1837    }
1838  }
1839
1840  //readyFromRename Counter
1841  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1842
1843  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1844  val thisAllOut = uopRes <= readyCounter
1845
1846  switch(state) {
1847    is(s_idle) {
1848      when (inValid) {
1849        stateNext := s_active
1850        uopResNext := inUopInfo.numOfUop
1851      }
1852    }
1853    is(s_active) {
1854      when (thisAllOut) {
1855        when (inValid) {
1856          stateNext := s_active
1857          uopResNext := inUopInfo.numOfUop
1858        }.otherwise {
1859          stateNext := s_idle
1860          uopResNext := 0.U
1861        }
1862      }.otherwise {
1863        stateNext := s_active
1864        uopResNext := uopRes - readyCounter
1865      }
1866    }
1867  }
1868
1869  state := Mux(io.redirect, s_idle, stateNext)
1870  uopRes := Mux(io.redirect, 0.U, uopResNext)
1871
1872  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1873
1874  for(i <- 0 until RenameWidth) {
1875    outValids(i) := complexNum > i.U
1876    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1877  }
1878
1879  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1880  inReady := state === s_idle || state === s_active && thisAllOut
1881
1882//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1883//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1884//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1885//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1886//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1887//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1888//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1889//
1890//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1891//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1892//    0.U)
1893//  validToRename.zipWithIndex.foreach{
1894//    case(dst, i) =>
1895//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1896//      dst := MuxCase(false.B, Seq(
1897//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1898//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1899//      ).toSeq)
1900//  }
1901//
1902//  readyToIBuf.zipWithIndex.foreach {
1903//    case (dst, i) =>
1904//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1905//      dst := MuxCase(true.B, Seq(
1906//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1907//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1908//      ).toSeq)
1909//  }
1910//
1911//  io.deq.decodedInsts := decodedInsts
1912//  io.deq.complexNum := complexNum
1913//  io.deq.validToRename := validToRename
1914//  io.deq.readyToIBuf := readyToIBuf
1915}
1916