xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 1bc48dd1fa0af361fd194c65bad3b86349ec2903)
1/***************************************************************************************
2  * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3  * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4  * Copyright (c) 2020-2021 Peng Cheng Laboratory
5  *
6  * XiangShan is licensed under Mulan PSL v2.
7  * You can use this software according to the terms and conditions of the Mulan PSL v2.
8  * You may obtain a copy of Mulan PSL v2 at:
9  *          http://license.coscl.org.cn/MulanPSL2
10  *
11  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14  *
15  * See the Mulan PSL v2 for more details.
16  ***************************************************************************************/
17
18package xiangshan.backend.decode
19
20import org.chipsalliance.cde.config.Parameters
21import chisel3._
22import chisel3.util._
23import freechips.rocketchip.rocket.Instructions
24import freechips.rocketchip.util.uintToBitPat
25import utils._
26import utility._
27import xiangshan.ExceptionNO.illegalInstr
28import xiangshan._
29import xiangshan.backend.fu.fpu.FPU
30import xiangshan.backend.fu.FuType
31import freechips.rocketchip.rocket.Instructions._
32import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
33import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
34import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
35import yunsuan.VpermType
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 32 // 32~46  ->  15
82  val VECTOR_COMPRESS = 1 // in v0 regfile
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153  val vstartReg = latchedInst.vpu.vstart
154
155  //Type of uop Div
156  val typeOfSplit = latchedInst.uopSplitType
157  val src1Type = latchedInst.srcType(0)
158  val src1IsImm = src1Type === SrcType.imm
159  val src1IsFp = src1Type === SrcType.fp
160
161  val isVstore = FuType.isVStore(latchedInst.fuType)
162
163  numOfUop := latchedUopInfo.numOfUop
164  numOfWB := latchedUopInfo.numOfWB
165
166  //uops dispatch
167  val s_idle :: s_active :: Nil = Enum(2)
168  val state = RegInit(s_idle)
169  val stateNext = WireDefault(state)
170  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
171  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
172  val uopResNext = WireInit(uopRes)
173  val e64 = 3.U(2.W)
174  val isUsSegment = instFields.MOP === 0.U && ((nf =/= 0.U && instFields.LUMOP === 0.U) || instFields.LUMOP === "b10000".U)
175  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
176  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
177
178  //uop div up to maxUopSize
179  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
180  val fixedDecodedInst = Wire(Vec(maxUopSize, new DecodedInst))
181
182  csBundle.foreach { case dst =>
183    dst := latchedInst
184    dst.numUops := latchedUopInfo.numOfUop
185    dst.numWB := latchedUopInfo.numOfWB
186    dst.firstUop := false.B
187    dst.lastUop := false.B
188    dst.vlsInstr := false.B
189  }
190
191  csBundle(0).firstUop := true.B
192  csBundle(numOfUop - 1.U).lastUop := true.B
193
194  // when vstart is not zero, the last uop will modify vstart to zero
195  // therefore, blockback and flush pipe
196  csBundle(numOfUop - 1.U).blockBackward := vstartReg =/= 0.U
197  csBundle(0.U).flushPipe := vstartReg =/= 0.U
198
199  switch(typeOfSplit) {
200    is(UopSplitType.VSET) {
201      // In simple decoder, rfWen and vecWen are not set
202      when(isVsetSimple) {
203        // Default
204        // uop0 set rd, never flushPipe
205        csBundle(0).fuType := FuType.vsetiwi.U
206        csBundle(0).flushPipe := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U)
207        csBundle(0).blockBackward := false.B
208        csBundle(0).rfWen := true.B
209        // uop1 set vl, vsetvl will flushPipe
210        csBundle(1).ldest := Vl_IDX.U
211        csBundle(1).vecWen := false.B
212        csBundle(1).vlWen := true.B
213        csBundle(1).flushPipe := false.B
214        csBundle(1).blockBackward := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U)
215        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
216          // write nothing, uop0 is a nop instruction
217          csBundle(0).rfWen := false.B
218          csBundle(0).fpWen := false.B
219          csBundle(0).vecWen := false.B
220          csBundle(0).vlWen := false.B
221          csBundle(1).fuType := FuType.vsetfwf.U
222          csBundle(1).srcType(0) := SrcType.no
223          csBundle(1).srcType(2) := SrcType.no
224          csBundle(1).srcType(3) := SrcType.no
225          csBundle(1).srcType(4) := SrcType.vp
226          csBundle(1).lsrc(4) := Vl_IDX.U
227        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
228          // uop0: mv vtype gpr to vector region
229          csBundle(0).srcType(0) := SrcType.xp
230          csBundle(0).srcType(1) := SrcType.no
231          csBundle(0).lsrc(0) := src2
232          csBundle(0).lsrc(1) := 0.U
233          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
234          csBundle(0).fuType := FuType.i2v.U
235          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
236          csBundle(0).rfWen := false.B
237          csBundle(0).fpWen := false.B
238          csBundle(0).vecWen := true.B
239          csBundle(0).vlWen := false.B
240          // uop1: uvsetvcfg_vv
241          csBundle(1).fuType := FuType.vsetfwf.U
242          // vl
243          csBundle(1).srcType(0) := SrcType.no
244          csBundle(1).srcType(2) := SrcType.no
245          csBundle(1).srcType(3) := SrcType.no
246          csBundle(1).srcType(4) := SrcType.vp
247          csBundle(1).lsrc(4) := Vl_IDX.U
248          // vtype
249          csBundle(1).srcType(1) := SrcType.vp
250          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
251          csBundle(1).vecWen := false.B
252          csBundle(1).vlWen := true.B
253          csBundle(1).ldest := Vl_IDX.U
254        }.elsewhen(dest === 0.U) {
255          // write nothing, uop0 is a nop instruction
256          csBundle(0).rfWen := false.B
257          csBundle(0).fpWen := false.B
258          csBundle(0).vecWen := false.B
259          csBundle(0).vlWen := false.B
260        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) {
261          // because vsetvl may modified src2 when src2 == rd,
262          // we need to modify vd in second uop to avoid dependency
263          // uop0 set vl
264          csBundle(0).fuType := FuType.vsetiwf.U
265          csBundle(0).ldest := Vl_IDX.U
266          csBundle(0).rfWen := false.B
267          csBundle(0).vlWen := true.B
268          // uop1 set rd
269          csBundle(1).fuType := FuType.vsetiwi.U
270          csBundle(1).ldest := dest
271          csBundle(1).rfWen := true.B
272          csBundle(1).vlWen := false.B
273        }
274        // use bypass vtype from vtypeGen
275        csBundle(0).vpu.connectVType(io.vtypeBypass)
276        csBundle(1).vpu.connectVType(io.vtypeBypass)
277      }
278    }
279    is(UopSplitType.VEC_VVV) {
280      for (i <- 0 until MAX_VLMUL) {
281        csBundle(i).lsrc(0) := src1 + i.U
282        csBundle(i).lsrc(1) := src2 + i.U
283        csBundle(i).lsrc(2) := dest + i.U
284        csBundle(i).ldest := dest + i.U
285        csBundle(i).uopIdx := i.U
286      }
287    }
288    is(UopSplitType.VEC_VFV) {
289      /*
290      f to vector move
291       */
292      csBundle(0).srcType(0) := SrcType.fp
293      csBundle(0).srcType(1) := SrcType.imm
294      csBundle(0).srcType(2) := SrcType.imm
295      csBundle(0).lsrc(1) := 0.U
296      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
297      csBundle(0).fuType := FuType.f2v.U
298      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
299      csBundle(0).vecWen := true.B
300      csBundle(0).vpu.isReverse := false.B
301      /*
302      LMUL
303       */
304      for (i <- 0 until MAX_VLMUL) {
305        csBundle(i + 1).srcType(0) := SrcType.vp
306        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
307        csBundle(i + 1).lsrc(1) := src2 + i.U
308        csBundle(i + 1).lsrc(2) := dest + i.U
309        csBundle(i + 1).ldest := dest + i.U
310        csBundle(i + 1).uopIdx := i.U
311      }
312    }
313    is(UopSplitType.VEC_EXT2) {
314      for (i <- 0 until MAX_VLMUL / 2) {
315        csBundle(2 * i).lsrc(1) := src2 + i.U
316        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
317        csBundle(2 * i).ldest := dest + (2 * i).U
318        csBundle(2 * i).uopIdx := (2 * i).U
319        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
320        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
321        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
322        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
323      }
324    }
325    is(UopSplitType.VEC_EXT4) {
326      for (i <- 0 until MAX_VLMUL / 4) {
327        csBundle(4 * i).lsrc(1) := src2 + i.U
328        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
329        csBundle(4 * i).ldest := dest + (4 * i).U
330        csBundle(4 * i).uopIdx := (4 * i).U
331        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
332        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
333        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
334        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
335        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
336        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
337        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
338        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
339        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
340        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
341        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
342        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
343      }
344    }
345    is(UopSplitType.VEC_EXT8) {
346      for (i <- 0 until MAX_VLMUL) {
347        csBundle(i).lsrc(1) := src2
348        csBundle(i).lsrc(2) := dest + i.U
349        csBundle(i).ldest := dest + i.U
350        csBundle(i).uopIdx := i.U
351      }
352    }
353    is(UopSplitType.VEC_0XV) {
354      /*
355      i/f to vector move
356       */
357      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
358      csBundle(0).srcType(1) := SrcType.imm
359      csBundle(0).srcType(2) := SrcType.imm
360      csBundle(0).lsrc(1) := 0.U
361      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
362      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
363      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
364      csBundle(0).rfWen := false.B
365      csBundle(0).fpWen := false.B
366      csBundle(0).vecWen := true.B
367      /*
368      vmv.s.x
369       */
370      csBundle(1).srcType(0) := SrcType.vp
371      csBundle(1).srcType(1) := SrcType.imm
372      csBundle(1).srcType(2) := SrcType.vp
373      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
374      csBundle(1).lsrc(1) := 0.U
375      csBundle(1).lsrc(2) := dest
376      csBundle(1).ldest := dest
377      csBundle(1).rfWen := false.B
378      csBundle(1).fpWen := false.B
379      csBundle(1).vecWen := true.B
380      csBundle(1).uopIdx := 0.U
381    }
382    is(UopSplitType.VEC_VXV) {
383      /*
384      i to vector move
385       */
386      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
387      csBundle(0).srcType(1) := SrcType.imm
388      csBundle(0).srcType(2) := SrcType.imm
389      csBundle(0).lsrc(1) := 0.U
390      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
391      csBundle(0).fuType := FuType.i2v.U
392      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
393      csBundle(0).vecWen := true.B
394      csBundle(0).vpu.isReverse := false.B
395      /*
396      LMUL
397       */
398      for (i <- 0 until MAX_VLMUL) {
399        csBundle(i + 1).srcType(0) := SrcType.vp
400        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
401        csBundle(i + 1).lsrc(1) := src2 + i.U
402        csBundle(i + 1).lsrc(2) := dest + i.U
403        csBundle(i + 1).ldest := dest + i.U
404        csBundle(i + 1).uopIdx := i.U
405      }
406    }
407    is(UopSplitType.VEC_VVW) {
408      for (i <- 0 until MAX_VLMUL / 2) {
409        csBundle(2 * i).lsrc(0) := src1 + i.U
410        csBundle(2 * i).lsrc(1) := src2 + i.U
411        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
412        csBundle(2 * i).ldest := dest + (2 * i).U
413        csBundle(2 * i).uopIdx := (2 * i).U
414        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
415        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
416        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
417        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
418        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
419      }
420    }
421    is(UopSplitType.VEC_VFW) {
422      /*
423      f to vector move
424       */
425      csBundle(0).srcType(0) := SrcType.fp
426      csBundle(0).srcType(1) := SrcType.imm
427      csBundle(0).srcType(2) := SrcType.imm
428      csBundle(0).lsrc(1) := 0.U
429      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
430      csBundle(0).fuType := FuType.f2v.U
431      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
432      csBundle(0).rfWen := false.B
433      csBundle(0).fpWen := false.B
434      csBundle(0).vecWen := true.B
435
436      for (i <- 0 until MAX_VLMUL / 2) {
437        csBundle(2 * i + 1).srcType(0) := SrcType.vp
438        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
439        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
440        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
441        csBundle(2 * i + 1).ldest := dest + (2 * i).U
442        csBundle(2 * i + 1).uopIdx := (2 * i).U
443        csBundle(2 * i + 2).srcType(0) := SrcType.vp
444        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
445        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
446        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
447        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
448        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
449      }
450    }
451    is(UopSplitType.VEC_WVW) {
452      for (i <- 0 until MAX_VLMUL / 2) {
453        csBundle(2 * i).lsrc(0) := src1 + i.U
454        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
455        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
456        csBundle(2 * i).ldest := dest + (2 * i).U
457        csBundle(2 * i).uopIdx := (2 * i).U
458        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
459        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
460        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
461        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
462        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
463      }
464    }
465    is(UopSplitType.VEC_VXW) {
466      /*
467      i to vector move
468       */
469      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
470      csBundle(0).srcType(1) := SrcType.imm
471      csBundle(0).srcType(2) := SrcType.imm
472      csBundle(0).lsrc(1) := 0.U
473      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
474      csBundle(0).fuType := FuType.i2v.U
475      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
476      csBundle(0).vecWen := true.B
477
478      for (i <- 0 until MAX_VLMUL / 2) {
479        csBundle(2 * i + 1).srcType(0) := SrcType.vp
480        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
481        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
482        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
483        csBundle(2 * i + 1).ldest := dest + (2 * i).U
484        csBundle(2 * i + 1).uopIdx := (2 * i).U
485        csBundle(2 * i + 2).srcType(0) := SrcType.vp
486        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
487        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
488        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
489        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
490        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
491      }
492    }
493    is(UopSplitType.VEC_WXW) {
494      /*
495      i to vector move
496       */
497      csBundle(0).srcType(0) := SrcType.reg
498      csBundle(0).srcType(1) := SrcType.imm
499      csBundle(0).srcType(2) := SrcType.imm
500      csBundle(0).lsrc(1) := 0.U
501      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
502      csBundle(0).fuType := FuType.i2v.U
503      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
504      csBundle(0).vecWen := true.B
505
506      for (i <- 0 until MAX_VLMUL / 2) {
507        csBundle(2 * i + 1).srcType(0) := SrcType.vp
508        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
509        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
510        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
511        csBundle(2 * i + 1).ldest := dest + (2 * i).U
512        csBundle(2 * i + 1).uopIdx := (2 * i).U
513        csBundle(2 * i + 2).srcType(0) := SrcType.vp
514        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
515        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
516        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
517        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
518        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
519      }
520    }
521    is(UopSplitType.VEC_WVV) {
522      for (i <- 0 until MAX_VLMUL / 2) {
523
524        csBundle(2 * i).lsrc(0) := src1 + i.U
525        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
526        csBundle(2 * i).lsrc(2) := dest + i.U
527        csBundle(2 * i).ldest := dest + i.U
528        csBundle(2 * i).uopIdx := (2 * i).U
529        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
530        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
531        csBundle(2 * i + 1).lsrc(2) := dest + i.U
532        csBundle(2 * i + 1).ldest := dest + i.U
533        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
534      }
535    }
536    is(UopSplitType.VEC_WFW) {
537      /*
538      f to vector move
539       */
540      csBundle(0).srcType(0) := SrcType.fp
541      csBundle(0).srcType(1) := SrcType.imm
542      csBundle(0).srcType(2) := SrcType.imm
543      csBundle(0).lsrc(1) := 0.U
544      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
545      csBundle(0).fuType := FuType.f2v.U
546      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
547      csBundle(0).rfWen := false.B
548      csBundle(0).fpWen := false.B
549      csBundle(0).vecWen := true.B
550
551      for (i <- 0 until MAX_VLMUL / 2) {
552        csBundle(2 * i + 1).srcType(0) := SrcType.vp
553        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
554        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
555        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
556        csBundle(2 * i + 1).ldest := dest + (2 * i).U
557        csBundle(2 * i + 1).uopIdx := (2 * i).U
558        csBundle(2 * i + 2).srcType(0) := SrcType.vp
559        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
560        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
561        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
562        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
563        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
564      }
565    }
566    is(UopSplitType.VEC_WXV) {
567      /*
568      i to vector move
569       */
570      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
571      csBundle(0).srcType(1) := SrcType.imm
572      csBundle(0).srcType(2) := SrcType.imm
573      csBundle(0).lsrc(1) := 0.U
574      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
575      csBundle(0).fuType := FuType.i2v.U
576      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
577      csBundle(0).vecWen := true.B
578
579      for (i <- 0 until MAX_VLMUL / 2) {
580        csBundle(2 * i + 1).srcType(0) := SrcType.vp
581        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
582        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
583        csBundle(2 * i + 1).lsrc(2) := dest + i.U
584        csBundle(2 * i + 1).ldest := dest + i.U
585        csBundle(2 * i + 1).uopIdx := (2 * i).U
586        csBundle(2 * i + 2).srcType(0) := SrcType.vp
587        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
588        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
589        csBundle(2 * i + 2).lsrc(2) := dest + i.U
590        csBundle(2 * i + 2).ldest := dest + i.U
591        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
592      }
593    }
594    is(UopSplitType.VEC_VVM) {
595      csBundle(0).lsrc(2) := dest
596      csBundle(0).ldest := dest
597      csBundle(0).uopIdx := 0.U
598      for (i <- 1 until MAX_VLMUL) {
599        csBundle(i).lsrc(0) := src1 + i.U
600        csBundle(i).lsrc(1) := src2 + i.U
601        csBundle(i).lsrc(2) := dest
602        csBundle(i).ldest := dest
603        csBundle(i).uopIdx := i.U
604      }
605    }
606    is(UopSplitType.VEC_VFM) {
607      /*
608      f to vector move
609       */
610      csBundle(0).srcType(0) := SrcType.fp
611      csBundle(0).srcType(1) := SrcType.imm
612      csBundle(0).srcType(2) := SrcType.imm
613      csBundle(0).lsrc(1) := 0.U
614      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
615      csBundle(0).fuType := FuType.f2v.U
616      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
617      csBundle(0).rfWen := false.B
618      csBundle(0).fpWen := false.B
619      csBundle(0).vecWen := true.B
620      //LMUL
621      csBundle(1).srcType(0) := SrcType.vp
622      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
623      csBundle(1).lsrc(2) := dest
624      csBundle(1).ldest := dest
625      csBundle(1).uopIdx := 0.U
626      for (i <- 1 until MAX_VLMUL) {
627        csBundle(i + 1).srcType(0) := SrcType.vp
628        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
629        csBundle(i + 1).lsrc(1) := src2 + i.U
630        csBundle(i + 1).lsrc(2) := dest
631        csBundle(i + 1).ldest := dest
632        csBundle(i + 1).uopIdx := i.U
633      }
634      csBundle(numOfUop - 1.U).ldest := dest
635    }
636    is(UopSplitType.VEC_VXM) {
637      /*
638      i to vector move
639       */
640      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
641      csBundle(0).srcType(1) := SrcType.imm
642      csBundle(0).srcType(2) := SrcType.imm
643      csBundle(0).lsrc(1) := 0.U
644      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
645      csBundle(0).fuType := FuType.i2v.U
646      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
647      csBundle(0).vecWen := true.B
648      //LMUL
649      csBundle(1).srcType(0) := SrcType.vp
650      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
651      csBundle(1).lsrc(2) := dest
652      csBundle(1).ldest := dest
653      csBundle(1).uopIdx := 0.U
654      for (i <- 1 until MAX_VLMUL) {
655        csBundle(i + 1).srcType(0) := SrcType.vp
656        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
657        csBundle(i + 1).lsrc(1) := src2 + i.U
658        csBundle(i + 1).lsrc(2) := dest
659        csBundle(i + 1).ldest := dest
660        csBundle(i + 1).uopIdx := i.U
661      }
662      csBundle(numOfUop - 1.U).ldest := dest
663    }
664    is(UopSplitType.VEC_SLIDE1UP) {
665      /*
666      i to vector move
667       */
668      csBundle(0).srcType(0) := SrcType.reg
669      csBundle(0).srcType(1) := SrcType.imm
670      csBundle(0).srcType(2) := SrcType.imm
671      csBundle(0).lsrc(1) := 0.U
672      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
673      csBundle(0).fuType := FuType.i2v.U
674      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
675      csBundle(0).vecWen := true.B
676      //LMUL
677      csBundle(1).srcType(0) := SrcType.vp
678      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
679      csBundle(1).lsrc(2) := dest
680      csBundle(1).ldest := dest
681      csBundle(1).uopIdx := 0.U
682      for (i <- 1 until MAX_VLMUL) {
683        csBundle(i + 1).srcType(0) := SrcType.vp
684        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
685        csBundle(i + 1).lsrc(1) := src2 + i.U
686        csBundle(i + 1).lsrc(2) := dest + i.U
687        csBundle(i + 1).ldest := dest + i.U
688        csBundle(i + 1).uopIdx := i.U
689      }
690    }
691    is(UopSplitType.VEC_FSLIDE1UP) {
692      /*
693      f to vector move
694       */
695      csBundle(0).srcType(0) := SrcType.fp
696      csBundle(0).srcType(1) := SrcType.imm
697      csBundle(0).srcType(2) := SrcType.imm
698      csBundle(0).lsrc(1) := 0.U
699      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
700      csBundle(0).fuType := FuType.f2v.U
701      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
702      csBundle(0).rfWen := false.B
703      csBundle(0).fpWen := false.B
704      csBundle(0).vecWen := true.B
705      //LMUL
706      csBundle(1).srcType(0) := SrcType.vp
707      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
708      csBundle(1).lsrc(1) := src2
709      csBundle(1).lsrc(2) := dest
710      csBundle(1).ldest := dest
711      csBundle(1).uopIdx := 0.U
712      for (i <- 1 until MAX_VLMUL) {
713        csBundle(i + 1).srcType(0) := SrcType.vp
714        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
715        csBundle(i + 1).lsrc(1) := src2 + i.U
716        csBundle(i + 1).lsrc(2) := dest + i.U
717        csBundle(i + 1).ldest := dest + i.U
718        csBundle(i + 1).uopIdx := i.U
719      }
720    }
721    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
722      /*
723      i to vector move
724       */
725      csBundle(0).srcType(0) := SrcType.reg
726      csBundle(0).srcType(1) := SrcType.imm
727      csBundle(0).srcType(2) := SrcType.imm
728      csBundle(0).lsrc(1) := 0.U
729      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
730      csBundle(0).fuType := FuType.i2v.U
731      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
732      csBundle(0).vecWen := true.B
733      //LMUL
734      for (i <- 0 until MAX_VLMUL) {
735        csBundle(2 * i + 1).srcType(0) := SrcType.vp
736        csBundle(2 * i + 1).srcType(1) := SrcType.vp
737        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
738        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
739        csBundle(2 * i + 1).lsrc(2) := dest + i.U
740        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
741        csBundle(2 * i + 1).uopIdx := (2 * i).U
742        if (2 * i + 2 < MAX_VLMUL * 2) {
743          csBundle(2 * i + 2).srcType(0) := SrcType.vp
744          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
745          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
746          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
747          csBundle(2 * i + 2).ldest := dest + i.U
748          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
749        }
750      }
751      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
752      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
753      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
754    }
755    is(UopSplitType.VEC_FSLIDE1DOWN) {
756      /*
757      f to vector move
758       */
759      csBundle(0).srcType(0) := SrcType.fp
760      csBundle(0).srcType(1) := SrcType.imm
761      csBundle(0).srcType(2) := SrcType.imm
762      csBundle(0).lsrc(1) := 0.U
763      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
764      csBundle(0).fuType := FuType.f2v.U
765      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
766      csBundle(0).rfWen := false.B
767      csBundle(0).fpWen := false.B
768      csBundle(0).vecWen := true.B
769      //LMUL
770      for (i <- 0 until MAX_VLMUL) {
771        csBundle(2 * i + 1).srcType(0) := SrcType.vp
772        csBundle(2 * i + 1).srcType(1) := SrcType.vp
773        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
774        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
775        csBundle(2 * i + 1).lsrc(2) := dest + i.U
776        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
777        csBundle(2 * i + 1).uopIdx := (2 * i).U
778        if (2 * i + 2 < MAX_VLMUL * 2) {
779          csBundle(2 * i + 2).srcType(0) := SrcType.vp
780          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
781          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
782          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
783          csBundle(2 * i + 2).ldest := dest + i.U
784          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
785        }
786      }
787      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
788      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
789      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
790    }
791    is(UopSplitType.VEC_VRED) {
792      when(vlmulReg === "b001".U) {
793        csBundle(0).srcType(2) := SrcType.DC
794        csBundle(0).lsrc(0) := src2 + 1.U
795        csBundle(0).lsrc(1) := src2
796        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
797        csBundle(0).uopIdx := 0.U
798      }
799      when(vlmulReg === "b010".U) {
800        csBundle(0).srcType(2) := SrcType.DC
801        csBundle(0).lsrc(0) := src2 + 1.U
802        csBundle(0).lsrc(1) := src2
803        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
804        csBundle(0).uopIdx := 0.U
805
806        csBundle(1).srcType(2) := SrcType.DC
807        csBundle(1).lsrc(0) := src2 + 3.U
808        csBundle(1).lsrc(1) := src2 + 2.U
809        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
810        csBundle(1).uopIdx := 1.U
811
812        csBundle(2).srcType(2) := SrcType.DC
813        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
814        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
815        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
816        csBundle(2).uopIdx := 2.U
817      }
818      when(vlmulReg === "b011".U) {
819        for (i <- 0 until MAX_VLMUL) {
820          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
821            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
822            csBundle(i).lsrc(1) := src2 + (i * 2).U
823            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
824          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
825            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
826            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
827            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
828          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
829            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
830            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
831            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
832          }
833          csBundle(i).srcType(2) := SrcType.DC
834          csBundle(i).uopIdx := i.U
835        }
836      }
837      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
838        /*
839         * 2 <= vlmul <= 8
840         */
841        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
842        csBundle(numOfUop - 1.U).lsrc(0) := src1
843        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
844        csBundle(numOfUop - 1.U).lsrc(2) := dest
845        csBundle(numOfUop - 1.U).ldest := dest
846        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
847      }
848    }
849    is(UopSplitType.VEC_VFRED) {
850      val vlmul = vlmulReg
851      val vsew = vsewReg
852      when(vlmul === VLmul.m8){
853        for (i <- 0 until 4) {
854          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
855          csBundle(i).lsrc(1) := src2 + (i * 2).U
856          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
857          csBundle(i).uopIdx := i.U
858        }
859        for (i <- 4 until 6) {
860          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
861          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
862          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
863          csBundle(i).uopIdx := i.U
864        }
865        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
866        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
867        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
868        csBundle(6).uopIdx := 6.U
869        when(vsew === VSew.e64) {
870          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
871          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
872          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
873          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
874          csBundle(7).uopIdx := 7.U
875          csBundle(8).lsrc(0) := src1
876          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
877          csBundle(8).ldest := dest
878          csBundle(8).uopIdx := 8.U
879        }
880        when(vsew === VSew.e32) {
881          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
882          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
883          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
884          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
885          csBundle(7).uopIdx := 7.U
886          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
887          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
888          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
889          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
890          csBundle(8).uopIdx := 8.U
891          csBundle(9).lsrc(0) := src1
892          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
893          csBundle(9).ldest := dest
894          csBundle(9).uopIdx := 9.U
895        }
896        when(vsew === VSew.e16) {
897          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
898          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
899          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
900          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
901          csBundle(7).uopIdx := 7.U
902          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
903          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
904          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
905          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
906          csBundle(8).uopIdx := 8.U
907          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
908          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
909          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
910          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
911          csBundle(9).uopIdx := 9.U
912          csBundle(10).lsrc(0) := src1
913          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
914          csBundle(10).ldest := dest
915          csBundle(10).uopIdx := 10.U
916        }
917      }
918      when(vlmul === VLmul.m4) {
919        for (i <- 0 until 2) {
920          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
921          csBundle(i).lsrc(1) := src2 + (i * 2).U
922          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
923          csBundle(i).uopIdx := i.U
924        }
925        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
926        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
927        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
928        csBundle(2).uopIdx := 2.U
929        when(vsew === VSew.e64) {
930          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
931          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
932          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
933          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
934          csBundle(3).uopIdx := 3.U
935          csBundle(4).lsrc(0) := src1
936          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
937          csBundle(4).ldest := dest
938          csBundle(4).uopIdx := 4.U
939        }
940        when(vsew === VSew.e32) {
941          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
942          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
943          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
944          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
945          csBundle(3).uopIdx := 3.U
946          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
947          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
948          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
949          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
950          csBundle(4).uopIdx := 4.U
951          csBundle(5).lsrc(0) := src1
952          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
953          csBundle(5).ldest := dest
954          csBundle(5).uopIdx := 5.U
955        }
956        when(vsew === VSew.e16) {
957          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
958          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
959          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
960          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
961          csBundle(3).uopIdx := 3.U
962          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
963          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
964          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
965          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
966          csBundle(4).uopIdx := 4.U
967          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
968          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
969          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
970          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
971          csBundle(5).uopIdx := 5.U
972          csBundle(6).lsrc(0) := src1
973          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
974          csBundle(6).ldest := dest
975          csBundle(6).uopIdx := 6.U
976        }
977      }
978      when(vlmul === VLmul.m2) {
979        csBundle(0).lsrc(0) := src2 + 1.U
980        csBundle(0).lsrc(1) := src2 + 0.U
981        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
982        csBundle(0).uopIdx := 0.U
983        when(vsew === VSew.e64) {
984          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
985          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
986          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
987          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
988          csBundle(1).uopIdx := 1.U
989          csBundle(2).lsrc(0) := src1
990          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
991          csBundle(2).ldest := dest
992          csBundle(2).uopIdx := 2.U
993        }
994        when(vsew === VSew.e32) {
995          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
996          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
997          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
998          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
999          csBundle(1).uopIdx := 1.U
1000          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1001          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1002          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1003          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1004          csBundle(2).uopIdx := 2.U
1005          csBundle(3).lsrc(0) := src1
1006          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1007          csBundle(3).ldest := dest
1008          csBundle(3).uopIdx := 3.U
1009        }
1010        when(vsew === VSew.e16) {
1011          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1012          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1013          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1014          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1015          csBundle(1).uopIdx := 1.U
1016          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1017          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1018          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1019          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1020          csBundle(2).uopIdx := 2.U
1021          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1022          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1023          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1024          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
1025          csBundle(3).uopIdx := 3.U
1026          csBundle(4).lsrc(0) := src1
1027          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1028          csBundle(4).ldest := dest
1029          csBundle(4).uopIdx := 4.U
1030        }
1031      }
1032      when(vlmul === VLmul.m1) {
1033        when(vsew === VSew.e64) {
1034          csBundle(0).lsrc(0) := src2
1035          csBundle(0).lsrc(1) := src2
1036          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1037          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1038          csBundle(0).uopIdx := 0.U
1039          csBundle(1).lsrc(0) := src1
1040          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1041          csBundle(1).ldest := dest
1042          csBundle(1).uopIdx := 1.U
1043        }
1044        when(vsew === VSew.e32) {
1045          csBundle(0).lsrc(0) := src2
1046          csBundle(0).lsrc(1) := src2
1047          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1048          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1049          csBundle(0).uopIdx := 0.U
1050          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1051          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1052          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1053          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1054          csBundle(1).uopIdx := 1.U
1055          csBundle(2).lsrc(0) := src1
1056          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1057          csBundle(2).ldest := dest
1058          csBundle(2).uopIdx := 2.U
1059        }
1060        when(vsew === VSew.e16) {
1061          csBundle(0).lsrc(0) := src2
1062          csBundle(0).lsrc(1) := src2
1063          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1064          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1065          csBundle(0).uopIdx := 0.U
1066          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1067          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1068          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1069          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1070          csBundle(1).uopIdx := 1.U
1071          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1072          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1073          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1074          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1075          csBundle(2).uopIdx := 2.U
1076          csBundle(3).lsrc(0) := src1
1077          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1078          csBundle(3).ldest := dest
1079          csBundle(3).uopIdx := 3.U
1080        }
1081      }
1082      when(vlmul === VLmul.mf2) {
1083        when(vsew === VSew.e32) {
1084          csBundle(0).lsrc(0) := src2
1085          csBundle(0).lsrc(1) := src2
1086          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1087          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1088          csBundle(0).uopIdx := 0.U
1089          csBundle(1).lsrc(0) := src1
1090          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1091          csBundle(1).ldest := dest
1092          csBundle(1).uopIdx := 1.U
1093        }
1094        when(vsew === VSew.e16) {
1095          csBundle(0).lsrc(0) := src2
1096          csBundle(0).lsrc(1) := src2
1097          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1098          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1099          csBundle(0).uopIdx := 0.U
1100          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1101          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1102          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1103          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1104          csBundle(1).uopIdx := 1.U
1105          csBundle(2).lsrc(0) := src1
1106          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1107          csBundle(2).ldest := dest
1108          csBundle(2).uopIdx := 2.U
1109        }
1110      }
1111      when(vlmul === VLmul.mf4) {
1112        when(vsew === VSew.e16) {
1113          csBundle(0).lsrc(0) := src2
1114          csBundle(0).lsrc(1) := src2
1115          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1116          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1117          csBundle(0).uopIdx := 0.U
1118          csBundle(1).lsrc(0) := src1
1119          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1120          csBundle(1).ldest := dest
1121          csBundle(1).uopIdx := 1.U
1122        }
1123      }
1124    }
1125
1126    is(UopSplitType.VEC_VFREDOSUM) {
1127      import yunsuan.VfaluType
1128      val vlmul = vlmulReg
1129      val vsew = vsewReg
1130      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1131      when(vlmul === VLmul.m8) {
1132        when(vsew === VSew.e64) {
1133          val vlmax = 16
1134          for (i <- 0 until vlmax) {
1135            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1136            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1137            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1138            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1139            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1140            csBundle(i).uopIdx := i.U
1141          }
1142        }
1143        when(vsew === VSew.e32) {
1144          val vlmax = 32
1145          for (i <- 0 until vlmax) {
1146            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1149            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1150            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1151            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1152            csBundle(i).uopIdx := i.U
1153          }
1154        }
1155        when(vsew === VSew.e16) {
1156          val vlmax = 64
1157          for (i <- 0 until vlmax) {
1158            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1161            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1162            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1163            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1164            csBundle(i).uopIdx := i.U
1165          }
1166        }
1167      }
1168      when(vlmul === VLmul.m4) {
1169        when(vsew === VSew.e64) {
1170          val vlmax = 8
1171          for (i <- 0 until vlmax) {
1172            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1174            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1175            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1176            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1177            csBundle(i).uopIdx := i.U
1178          }
1179        }
1180        when(vsew === VSew.e32) {
1181          val vlmax = 16
1182          for (i <- 0 until vlmax) {
1183            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1185            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1186            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1187            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1188            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1189            csBundle(i).uopIdx := i.U
1190          }
1191        }
1192        when(vsew === VSew.e16) {
1193          val vlmax = 32
1194          for (i <- 0 until vlmax) {
1195            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1197            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1198            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1199            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1200            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1201            csBundle(i).uopIdx := i.U
1202          }
1203        }
1204      }
1205      when(vlmul === VLmul.m2) {
1206        when(vsew === VSew.e64) {
1207          val vlmax = 4
1208          for (i <- 0 until vlmax) {
1209            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1211            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1212            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1213            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1214            csBundle(i).uopIdx := i.U
1215          }
1216        }
1217        when(vsew === VSew.e32) {
1218          val vlmax = 8
1219          for (i <- 0 until vlmax) {
1220            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1223            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1224            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1225            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1226            csBundle(i).uopIdx := i.U
1227          }
1228        }
1229        when(vsew === VSew.e16) {
1230          val vlmax = 16
1231          for (i <- 0 until vlmax) {
1232            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1235            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1236            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1237            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1238            csBundle(i).uopIdx := i.U
1239          }
1240        }
1241      }
1242      when(vlmul === VLmul.m1) {
1243        when(vsew === VSew.e64) {
1244          val vlmax = 2
1245          for (i <- 0 until vlmax) {
1246            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1248            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1249            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1250            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1251            csBundle(i).uopIdx := i.U
1252          }
1253        }
1254        when(vsew === VSew.e32) {
1255          val vlmax = 4
1256          for (i <- 0 until vlmax) {
1257            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1261            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1262            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1263            csBundle(i).uopIdx := i.U
1264          }
1265        }
1266        when(vsew === VSew.e16) {
1267          val vlmax = 8
1268          for (i <- 0 until vlmax) {
1269            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1270            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1271            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1272            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1273            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1274            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1275            csBundle(i).uopIdx := i.U
1276          }
1277        }
1278      }
1279      when(vlmul === VLmul.mf2) {
1280        when(vsew === VSew.e32) {
1281          val vlmax = 2
1282          for (i <- 0 until vlmax) {
1283            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1284            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1285            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1286            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1287            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1288            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1289            csBundle(i).uopIdx := i.U
1290          }
1291        }
1292        when(vsew === VSew.e16) {
1293          val vlmax = 4
1294          for (i <- 0 until vlmax) {
1295            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1296            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1297            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1298            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1299            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1300            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1301            csBundle(i).uopIdx := i.U
1302          }
1303        }
1304      }
1305      when(vlmul === VLmul.mf4) {
1306        when(vsew === VSew.e16) {
1307          val vlmax = 2
1308          for (i <- 0 until vlmax) {
1309            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1310            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1311            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1312            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1313            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1314            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1315            csBundle(i).uopIdx := i.U
1316          }
1317        }
1318      }
1319    }
1320
1321    is(UopSplitType.VEC_SLIDEUP) {
1322      // i to vector move
1323      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1324      csBundle(0).srcType(1) := SrcType.imm
1325      csBundle(0).srcType(2) := SrcType.imm
1326      csBundle(0).lsrc(1) := 0.U
1327      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1328      csBundle(0).fuType := FuType.i2v.U
1329      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1330      csBundle(0).vecWen := true.B
1331      // LMUL
1332      for (i <- 0 until MAX_VLMUL)
1333        for (j <- 0 to i) {
1334          val old_vd = if (j == 0) {
1335            dest + i.U
1336          } else (VECTOR_TMP_REG_LMUL + j).U
1337          val vd = if (j == i) {
1338            dest + i.U
1339          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1340          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1341          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1342          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1343          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1344          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1345          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1346        }
1347    }
1348
1349    is(UopSplitType.VEC_SLIDEDOWN) {
1350      // i to vector move
1351      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1352      csBundle(0).srcType(1) := SrcType.imm
1353      csBundle(0).srcType(2) := SrcType.imm
1354      csBundle(0).lsrc(1) := 0.U
1355      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1356      csBundle(0).fuType := FuType.i2v.U
1357      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1358      csBundle(0).vecWen := true.B
1359      // LMUL
1360      for (i <- 0 until MAX_VLMUL)
1361        for (j <- (0 to i).reverse) {
1362          when(i.U < lmul) {
1363            val old_vd = if (j == 0) {
1364              dest + lmul - 1.U - i.U
1365            } else (VECTOR_TMP_REG_LMUL + j).U
1366            val vd = if (j == i) {
1367              dest + lmul - 1.U - i.U
1368            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1369            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1370            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1371            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1372            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1373            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1374            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1375          }
1376        }
1377    }
1378
1379    is(UopSplitType.VEC_M0X) {
1380      // LMUL
1381      for (i <- 0 until MAX_VLMUL) {
1382        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1383        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1384        csBundle(i).srcType(0) := srcType0
1385        csBundle(i).srcType(1) := SrcType.vp
1386        csBundle(i).rfWen := false.B
1387        csBundle(i).fpWen := false.B
1388        csBundle(i).vecWen := true.B
1389        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1390        csBundle(i).lsrc(1) := src2
1391        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1392        csBundle(i).ldest := ldest
1393        csBundle(i).uopIdx := i.U
1394      }
1395      csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B)
1396      csBundle(numOfUop - 1.U).fpWen := false.B
1397      csBundle(numOfUop - 1.U).vecWen := false.B
1398      csBundle(numOfUop - 1.U).ldest := dest
1399    }
1400
1401    is(UopSplitType.VEC_MVV) {
1402      // LMUL
1403      for (i <- 0 until MAX_VLMUL) {
1404        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1405        csBundle(i * 2 + 0).srcType(0) := srcType0
1406        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1407        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1408        csBundle(i * 2 + 0).lsrc(1) := src2
1409        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1410        csBundle(i * 2 + 0).ldest := dest + i.U
1411        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1412
1413        csBundle(i * 2 + 1).srcType(0) := srcType0
1414        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1415        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1416        csBundle(i * 2 + 1).lsrc(1) := src2
1417        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1418        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1419        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1420      }
1421    }
1422    is(UopSplitType.VEC_VWW) {
1423      for (i <- 0 until MAX_VLMUL*2) {
1424        when(i.U < lmul){
1425          csBundle(i).srcType(2) := SrcType.DC
1426          csBundle(i).lsrc(0) := src2 + i.U
1427          csBundle(i).lsrc(1) := src2 + i.U
1428          // csBundle(i).lsrc(2) := dest + (2 * i).U
1429          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1430          csBundle(i).uopIdx :=  i.U
1431        } otherwise {
1432          csBundle(i).srcType(2) := SrcType.DC
1433          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1434          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1435          // csBundle(i).lsrc(2) := dest + (2 * i).U
1436          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1437          csBundle(i).uopIdx := i.U
1438        }
1439        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1440        csBundle(numOfUop-1.U).lsrc(0) := src1
1441        csBundle(numOfUop-1.U).lsrc(2) := dest
1442        csBundle(numOfUop-1.U).ldest := dest
1443      }
1444    }
1445    is(UopSplitType.VEC_RGATHER) {
1446      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1447        for (i <- 0 until len)
1448          for (j <- 0 until len) {
1449            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1450            // csBundle(i * len + j).srcType(1) := SrcType.vp
1451            // csBundle(i * len + j).srcType(2) := SrcType.vp
1452            csBundle(i * len + j).lsrc(0) := src1 + i.U
1453            csBundle(i * len + j).lsrc(1) := src2 + j.U
1454            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1455            csBundle(i * len + j).lsrc(2) := vd_old
1456            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1457            csBundle(i * len + j).ldest := vd
1458            csBundle(i * len + j).uopIdx := (i * len + j).U
1459          }
1460      }
1461      switch(vlmulReg) {
1462        is("b001".U ){
1463          genCsBundle_VEC_RGATHER(2)
1464        }
1465        is("b010".U ){
1466          genCsBundle_VEC_RGATHER(4)
1467        }
1468        is("b011".U ){
1469          genCsBundle_VEC_RGATHER(8)
1470        }
1471      }
1472    }
1473    is(UopSplitType.VEC_RGATHER_VX) {
1474      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1475        for (i <- 0 until len)
1476          for (j <- 0 until len) {
1477            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1478            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1479            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1480            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1481            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1482            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1483            csBundle(i * len + j + 1).lsrc(2) := vd_old
1484            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1485            csBundle(i * len + j + 1).ldest := vd
1486            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1487          }
1488      }
1489      // i to vector move
1490      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1491      csBundle(0).srcType(1) := SrcType.imm
1492      csBundle(0).srcType(2) := SrcType.imm
1493      csBundle(0).lsrc(1) := 0.U
1494      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1495      csBundle(0).fuType := FuType.i2v.U
1496      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1497      csBundle(0).rfWen := false.B
1498      csBundle(0).fpWen := false.B
1499      csBundle(0).vecWen := true.B
1500      genCsBundle_RGATHER_VX(1)
1501      switch(vlmulReg) {
1502        is("b001".U ){
1503          genCsBundle_RGATHER_VX(2)
1504        }
1505        is("b010".U ){
1506          genCsBundle_RGATHER_VX(4)
1507        }
1508        is("b011".U ){
1509          genCsBundle_RGATHER_VX(8)
1510        }
1511      }
1512    }
1513    is(UopSplitType.VEC_RGATHEREI16) {
1514      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1515        for (i <- 0 until len)
1516          for (j <- 0 until len) {
1517            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1518            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1519            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1520            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1521            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1522            csBundle((i * len + j)*2+0).ldest := vd0
1523            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1524            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1525            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1526            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1527            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1528            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1529            csBundle((i * len + j)*2+1).ldest := vd1
1530            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1531          }
1532      }
1533      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1534        for (i <- 0 until len)
1535          for (j <- 0 until len) {
1536            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1537            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1538            csBundle(i * len + j).lsrc(0) := src1 + i.U
1539            csBundle(i * len + j).lsrc(1) := src2 + j.U
1540            csBundle(i * len + j).lsrc(2) := vd_old
1541            csBundle(i * len + j).ldest := vd
1542            csBundle(i * len + j).uopIdx := (i * len + j).U
1543          }
1544      }
1545      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1546        for (i <- 0 until len)
1547          for (j <- 0 until len) {
1548            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1549            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1550            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1551            csBundle(i * len + j).lsrc(1) := src2 + j.U
1552            csBundle(i * len + j).lsrc(2) := vd_old
1553            csBundle(i * len + j).ldest := vd
1554            csBundle(i * len + j).uopIdx := (i * len + j).U
1555          }
1556      }
1557      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1558        for (i <- 0 until len)
1559          for (j <- 0 until len) {
1560            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1561            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1562            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1563            csBundle(i * len + j).lsrc(1) := src2 + j.U
1564            csBundle(i * len + j).lsrc(2) := vd_old
1565            csBundle(i * len + j).ldest := vd
1566            csBundle(i * len + j).uopIdx := (i * len + j).U
1567          }
1568      }
1569      when(!vsewReg.orR){
1570        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1571      }.elsewhen(vsewReg === VSew.e32){
1572        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1573      }.elsewhen(vsewReg === VSew.e64){
1574        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1575      }.otherwise{
1576        genCsBundle_VEC_RGATHEREI16(1)
1577      }
1578      switch(vlmulReg) {
1579        is("b001".U) {
1580          when(!vsewReg.orR) {
1581            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1582          }.elsewhen(vsewReg === VSew.e32){
1583            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1584          }.elsewhen(vsewReg === VSew.e64){
1585            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1586          }.otherwise{
1587            genCsBundle_VEC_RGATHEREI16(2)
1588          }
1589        }
1590        is("b010".U) {
1591          when(!vsewReg.orR) {
1592            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1593          }.elsewhen(vsewReg === VSew.e32){
1594            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1595          }.elsewhen(vsewReg === VSew.e64){
1596            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1597          }.otherwise{
1598            genCsBundle_VEC_RGATHEREI16(4)
1599          }
1600        }
1601        is("b011".U) {
1602          when(vsewReg === VSew.e32){
1603            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1604          }.elsewhen(vsewReg === VSew.e64){
1605            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1606          }.otherwise{
1607            genCsBundle_VEC_RGATHEREI16(8)
1608          }
1609        }
1610      }
1611    }
1612    is(UopSplitType.VEC_COMPRESS) {
1613      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1614        for (i <- 0 until len) {
1615          val jlen = if (i == len-1) i+1 else i+2
1616          for (j <- 0 until jlen) {
1617            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1618            val vd = if(i==len-1) (dest + j.U) else {
1619              if (j == i+1) VECTOR_TMP_REG_LMUL.U  else (VECTOR_TMP_REG_LMUL + j + 1).U
1620            }
1621            csBundle(i*(i+3)/2 + j).vecWen := true.B
1622            csBundle(i*(i+3)/2 + j).v0Wen := false.B
1623            val src13Type = if (j == i+1) DontCare else SrcType.vp
1624            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1625            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1626            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1627            if (i == 0) {
1628              csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1629            } else {
1630              csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1631            }
1632            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1633            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1634            csBundle(i*(i+3)/2 + j).ldest := vd
1635            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1636          }
1637        }
1638      }
1639      switch(vlmulReg) {
1640        is("b001".U ){
1641          genCsBundle_VEC_COMPRESS(2)
1642        }
1643        is("b010".U ){
1644          genCsBundle_VEC_COMPRESS(4)
1645        }
1646        is("b011".U ){
1647          genCsBundle_VEC_COMPRESS(8)
1648        }
1649      }
1650    }
1651    is(UopSplitType.VEC_MVNR) {
1652      for (i <- 0 until MAX_VLMUL) {
1653        csBundle(i).lsrc(0) := src1 + i.U
1654        csBundle(i).lsrc(1) := src2 + i.U
1655        csBundle(i).lsrc(2) := dest + i.U
1656        csBundle(i).ldest := dest + i.U
1657        csBundle(i).uopIdx := i.U
1658      }
1659    }
1660    is(UopSplitType.VEC_US_LDST) {
1661      /*
1662      FMV.D.X
1663       */
1664      csBundle(0).srcType(0) := SrcType.reg
1665      csBundle(0).srcType(1) := SrcType.imm
1666      csBundle(0).lsrc(1) := 0.U
1667      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1668      csBundle(0).fuType := FuType.i2v.U
1669      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1670      csBundle(0).rfWen := false.B
1671      csBundle(0).fpWen := false.B
1672      csBundle(0).vecWen := true.B
1673      csBundle(0).vlsInstr := true.B
1674      //LMUL
1675      for (i <- 0 until MAX_VLMUL) {
1676        csBundle(i + 1).srcType(0) := SrcType.vp
1677        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1678        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1679        csBundle(i + 1).ldest := dest + i.U
1680        csBundle(i + 1).uopIdx := i.U
1681        csBundle(i + 1).vlsInstr := true.B
1682      }
1683      csBundle.head.waitForward := isUsSegment
1684      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1685    }
1686    is(UopSplitType.VEC_US_FF_LD) {
1687      csBundle(0).srcType(0) := SrcType.reg
1688      csBundle(0).srcType(1) := SrcType.imm
1689      csBundle(0).lsrc(1) := 0.U
1690      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1691      csBundle(0).fuType := FuType.i2v.U
1692      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1693      csBundle(0).rfWen := false.B
1694      csBundle(0).fpWen := false.B
1695      csBundle(0).vecWen := true.B
1696      csBundle(0).vlsInstr := true.B
1697      //LMUL
1698      for (i <- 0 until MAX_VLMUL) {
1699        csBundle(i + 1).srcType(0) := SrcType.vp
1700        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1701        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1702        csBundle(i + 1).ldest := dest + i.U
1703        csBundle(i + 1).uopIdx := i.U
1704        csBundle(i + 1).vlsInstr := true.B
1705      }
1706      csBundle.head.waitForward := isUsSegment
1707      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1708      // last uop read vl and write vl
1709      csBundle(numOfUop - 1.U).srcType(0) := SrcType.no
1710      csBundle(numOfUop - 1.U).srcType(1) := SrcType.no
1711      csBundle(numOfUop - 1.U).srcType(2) := SrcType.no
1712      csBundle(numOfUop - 1.U).srcType(3) := SrcType.no
1713      csBundle(numOfUop - 1.U).srcType(4) := SrcType.vp
1714      csBundle(numOfUop - 1.U).lsrc(4) := Vl_IDX.U
1715      // vtype
1716      csBundle(numOfUop - 1.U).vecWen := false.B
1717      csBundle(numOfUop - 1.U).vlWen := true.B
1718      csBundle(numOfUop - 1.U).ldest := Vl_IDX.U
1719    }
1720    is(UopSplitType.VEC_S_LDST) {
1721      /*
1722      FMV.D.X
1723       */
1724      csBundle(0).srcType(0) := SrcType.reg
1725      csBundle(0).srcType(1) := SrcType.imm
1726      csBundle(0).lsrc(1) := 0.U
1727      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1728      csBundle(0).fuType := FuType.i2v.U
1729      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1730      csBundle(0).rfWen := false.B
1731      csBundle(0).fpWen := false.B
1732      csBundle(0).vecWen := true.B
1733      csBundle(0).vlsInstr := true.B
1734
1735      csBundle(1).srcType(0) := SrcType.reg
1736      csBundle(1).srcType(1) := SrcType.imm
1737      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1738      csBundle(1).lsrc(1) := 0.U
1739      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1740      csBundle(1).fuType := FuType.i2v.U
1741      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1742      csBundle(1).rfWen := false.B
1743      csBundle(1).fpWen := false.B
1744      csBundle(1).vecWen := true.B
1745      csBundle(1).vlsInstr := true.B
1746
1747      //LMUL
1748      for (i <- 0 until MAX_VLMUL) {
1749        csBundle(i + 2).srcType(0) := SrcType.vp
1750        csBundle(i + 2).srcType(1) := SrcType.vp
1751        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1752        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1753        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1754        csBundle(i + 2).ldest := dest + i.U
1755        csBundle(i + 2).uopIdx := i.U
1756        csBundle(i + 2).vlsInstr := true.B
1757      }
1758      csBundle.head.waitForward := isSdSegment
1759      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1760    }
1761    is(UopSplitType.VEC_I_LDST) {
1762      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1763        for (i <- 0 until MAX_VLMUL) {
1764          val vecWen = if (i < lmul * nf) true.B else false.B
1765          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1766          csBundle(i + 1).srcType(0) := SrcType.vp
1767          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1768          csBundle(i + 1).srcType(1) := SrcType.no
1769          csBundle(i + 1).lsrc(1) := src2 + i.U
1770          csBundle(i + 1).srcType(2) := src2Type
1771          csBundle(i + 1).lsrc(2) := dest + i.U
1772          csBundle(i + 1).ldest := dest + i.U
1773          csBundle(i + 1).rfWen := false.B
1774          csBundle(i + 1).fpWen := false.B
1775          csBundle(i + 1).vecWen := vecWen
1776          csBundle(i + 1).uopIdx := i.U
1777          csBundle(i + 1).vlsInstr := true.B
1778        }
1779      }
1780      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1781        for (i <- 0 until MAX_VLMUL) {
1782          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1783          csBundle(i + 1).srcType(1) := src1Type
1784          csBundle(i + 1).lsrc(1) := src2 + i.U
1785        }
1786      }
1787
1788      val vlmul = vlmulReg
1789      val vsew = Cat(0.U(1.W), vsewReg)
1790      val veew = Cat(0.U(1.W), width)
1791      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1792      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Seq(
1793        "b001".U -> 1.U,
1794        "b010".U -> 2.U,
1795        "b011".U -> 3.U
1796      ))
1797      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Seq(
1798        "b001".U -> 1.U,
1799        "b010".U -> 2.U,
1800        "b011".U -> 3.U
1801      ))
1802      csBundle(0).srcType(0) := SrcType.reg
1803      csBundle(0).srcType(1) := SrcType.imm
1804      csBundle(0).lsrc(1) := 0.U
1805      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1806      csBundle(0).fuType := FuType.i2v.U
1807      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1808      csBundle(0).rfWen := false.B
1809      csBundle(0).fpWen := false.B
1810      csBundle(0).vecWen := true.B
1811      csBundle(0).vlsInstr := true.B
1812
1813      //LMUL
1814      when(nf === 0.U) {
1815        for (i <- 0 until MAX_VLMUL) {
1816          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1817          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1818          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1819          csBundle(i + 1).srcType(0) := SrcType.vp
1820          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1821          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1822          csBundle(i + 1).srcType(2) := SrcType.vp
1823          // lsrc2 is old vd
1824          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1825          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1826          csBundle(i + 1).uopIdx := i.U
1827          csBundle(i + 1).vlsInstr := true.B
1828        }
1829      }.otherwise{
1830        // nf > 1, is segment indexed load/store
1831        // gen src0, vd
1832        switch(simple_lmul) {
1833          is(0.U) {
1834            switch(nf) {
1835              is(1.U) {
1836                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1837              }
1838              is(2.U) {
1839                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1840              }
1841              is(3.U) {
1842                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1843              }
1844              is(4.U) {
1845                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1846              }
1847              is(5.U) {
1848                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1849              }
1850              is(6.U) {
1851                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1852              }
1853              is(7.U) {
1854                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1855              }
1856            }
1857          }
1858          is(1.U) {
1859            switch(nf) {
1860              is(1.U) {
1861                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1862              }
1863              is(2.U) {
1864                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1865              }
1866              is(3.U) {
1867                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1868              }
1869            }
1870          }
1871          is(2.U) {
1872            switch(nf) {
1873              is(1.U) {
1874                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1875              }
1876            }
1877          }
1878        }
1879
1880        // gen src1
1881        switch(simple_emul) {
1882          is(0.U) {
1883            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1884          }
1885          is(1.U) {
1886            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1887          }
1888          is(2.U) {
1889            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1890          }
1891          is(3.U) {
1892            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1893          }
1894        }
1895
1896        // when is vstore instructions, not set vecwen
1897        when(isVstore) {
1898          for (i <- 0 until MAX_VLMUL) {
1899            csBundle(i + 1).vecWen := false.B
1900          }
1901        }
1902      }
1903      csBundle.head.waitForward := isIxSegment
1904      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1905    }
1906  }
1907
1908  //readyFromRename Counter
1909  val readyCounter = Mux(outReadys.head, RenameWidth.U, 0.U)
1910
1911  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1912  val thisAllOut = uopRes <= readyCounter
1913
1914  switch(state) {
1915    is(s_idle) {
1916      when (inValid) {
1917        stateNext := s_active
1918        uopResNext := inUopInfo.numOfUop
1919      }
1920    }
1921    is(s_active) {
1922      when (thisAllOut) {
1923        when (inValid) {
1924          stateNext := s_active
1925          uopResNext := inUopInfo.numOfUop
1926        }.otherwise {
1927          stateNext := s_idle
1928          uopResNext := 0.U
1929        }
1930      }.otherwise {
1931        stateNext := s_active
1932        uopResNext := uopRes - readyCounter
1933      }
1934    }
1935  }
1936
1937  state := Mux(io.redirect, s_idle, stateNext)
1938  uopRes := Mux(io.redirect, 0.U, uopResNext)
1939
1940  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1941
1942  fixedDecodedInst := csBundle
1943
1944  // when vstart is not zero, the last uop will modify vstart to zero
1945  // therefore, blockback and flush pipe
1946  fixedDecodedInst(numOfUop - 1.U).flushPipe := (vstartReg =/= 0.U) || latchedInst.flushPipe
1947
1948  for(i <- 0 until RenameWidth) {
1949    outValids(i) := complexNum > i.U
1950    outDecodedInsts(i) := fixedDecodedInst(i.U + numOfUop - uopRes)
1951  }
1952
1953  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1954  inReady := state === s_idle || state === s_active && thisAllOut
1955
1956
1957  XSError(inValid && inUopInfo.numOfUop === 0.U,
1958    p"uop number ${inUopInfo.numOfUop} is illegal, cannot be zero")
1959//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1960//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1961//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1962//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1963//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1964//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1965//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1966//
1967//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1968//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1969//    0.U)
1970//  validToRename.zipWithIndex.foreach{
1971//    case(dst, i) =>
1972//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1973//      dst := MuxCase(false.B, Seq(
1974//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1975//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1976//      ).toSeq)
1977//  }
1978//
1979//  readyToIBuf.zipWithIndex.foreach {
1980//    case (dst, i) =>
1981//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1982//      dst := MuxCase(true.B, Seq(
1983//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1984//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1985//      ).toSeq)
1986//  }
1987//
1988//  io.deq.decodedInsts := decodedInsts
1989//  io.deq.complexNum := complexNum
1990//  io.deq.validToRename := validToRename
1991//  io.deq.readyToIBuf := readyToIBuf
1992}
1993