xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 94aa21c6009c2f39c5c5dae9c87260c78887efcc)
1/***************************************************************************************
2  * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3  * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4  * Copyright (c) 2020-2021 Peng Cheng Laboratory
5  *
6  * XiangShan is licensed under Mulan PSL v2.
7  * You can use this software according to the terms and conditions of the Mulan PSL v2.
8  * You may obtain a copy of Mulan PSL v2 at:
9  *          http://license.coscl.org.cn/MulanPSL2
10  *
11  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14  *
15  * See the Mulan PSL v2 for more details.
16  ***************************************************************************************/
17
18package xiangshan.backend.decode
19
20import org.chipsalliance.cde.config.Parameters
21import chisel3._
22import chisel3.util._
23import freechips.rocketchip.rocket.Instructions
24import freechips.rocketchip.util.uintToBitPat
25import utils._
26import utility._
27import xiangshan.ExceptionNO.illegalInstr
28import xiangshan._
29import xiangshan.backend.fu.fpu.FPU
30import xiangshan.backend.fu.FuType
31import freechips.rocketchip.rocket.Instructions._
32import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
33import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
34import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul, Vl}
35import yunsuan.VpermType
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 32 // 32~46  ->  15
82  val VECTOR_COMPRESS = 1 // in v0 regfile
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153  val vstartReg = latchedInst.vpu.vstart
154
155  //Type of uop Div
156  val typeOfSplit = latchedInst.uopSplitType
157  val src1Type = latchedInst.srcType(0)
158  val src1IsImm = src1Type === SrcType.imm
159  val src1IsFp = src1Type === SrcType.fp
160
161  val isVstore = FuType.isVStore(latchedInst.fuType)
162
163  // exception generator
164  val vecException = Module(new VecExceptionGen)
165  vecException.io.inst := latchedInst.instr
166  vecException.io.decodedInst := latchedInst
167  vecException.io.vtype := latchedInst.vpu.vtype
168  vecException.io.vstart := latchedInst.vpu.vstart
169  val illegalInst = vecException.io.illegalInst
170
171  numOfUop := latchedUopInfo.numOfUop
172  numOfWB := latchedUopInfo.numOfWB
173
174  //uops dispatch
175  val s_idle :: s_active :: Nil = Enum(2)
176  val state = RegInit(s_idle)
177  val stateNext = WireDefault(state)
178  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
179  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
180  val uopResNext = WireInit(uopRes)
181  val e64 = 3.U(2.W)
182  val isUsSegment = instFields.MOP === 0.U && ((nf =/= 0.U && instFields.LUMOP === 0.U) || instFields.LUMOP === "b10000".U)
183  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
184  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
185
186  //uop div up to maxUopSize
187  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
188  val fixedDecodedInst = Wire(Vec(maxUopSize, new DecodedInst))
189
190  csBundle.foreach { case dst =>
191    dst := latchedInst
192    dst.numUops := latchedUopInfo.numOfUop
193    dst.numWB := latchedUopInfo.numOfWB
194    dst.exceptionVec(ExceptionNO.EX_II) := latchedInst.exceptionVec(ExceptionNO.EX_II) || illegalInst
195    dst.firstUop := false.B
196    dst.lastUop := false.B
197    dst.vlsInstr := false.B
198  }
199
200  csBundle(0).firstUop := true.B
201  csBundle(numOfUop - 1.U).lastUop := true.B
202
203  // when vstart is not zero, the last uop will modify vstart to zero
204  // therefore, blockback and flush pipe
205  csBundle(numOfUop - 1.U).blockBackward := vstartReg =/= 0.U
206  csBundle(0.U).flushPipe := vstartReg =/= 0.U
207
208  switch(typeOfSplit) {
209    is(UopSplitType.AMO_CAS_W) {
210      csBundle(0).uopIdx := 0.U
211      csBundle(0).fuOpType := Cat(0.U(3.W), LSUOpType.amocas_w)
212      csBundle(0).lsrc(0) := src1
213      csBundle(0).lsrc(1) := dest
214      csBundle(0).waitForward := true.B
215      csBundle(0).blockBackward := false.B
216
217      csBundle(1).uopIdx := 1.U
218      csBundle(1).fuOpType := Cat(1.U(3.W), LSUOpType.amocas_w)
219      csBundle(1).lsrc(0) := src1
220      csBundle(1).lsrc(1) := src2
221      csBundle(1).rfWen := false.B
222      csBundle(1).waitForward := false.B
223      csBundle(1).blockBackward := true.B
224    }
225    is(UopSplitType.AMO_CAS_D) {
226      csBundle(0).uopIdx := 0.U
227      csBundle(0).fuOpType := Cat(0.U(3.W), LSUOpType.amocas_d)
228      csBundle(0).lsrc(0) := src1
229      csBundle(0).lsrc(1) := dest
230      csBundle(0).waitForward := true.B
231      csBundle(0).blockBackward := false.B
232
233      csBundle(1).uopIdx := 1.U
234      csBundle(1).fuOpType := Cat(1.U(3.W), LSUOpType.amocas_d)
235      csBundle(1).lsrc(0) := src1
236      csBundle(1).lsrc(1) := src2
237      csBundle(1).rfWen := false.B
238      csBundle(1).waitForward := false.B
239      csBundle(1).blockBackward := true.B
240    }
241    is(UopSplitType.AMO_CAS_Q) {
242      csBundle(0).uopIdx := 0.U
243      csBundle(0).fuOpType := Cat(0.U(3.W), LSUOpType.amocas_q)
244      csBundle(0).lsrc(0) := src1
245      csBundle(0).lsrc(1) := dest
246      csBundle(0).waitForward := true.B
247      csBundle(0).blockBackward := false.B
248
249      csBundle(1).uopIdx := 1.U
250      csBundle(1).fuOpType := Cat(1.U(3.W), LSUOpType.amocas_q)
251      csBundle(1).lsrc(0) := src1
252      csBundle(1).lsrc(1) := src2
253      csBundle(1).rfWen := false.B
254      csBundle(1).waitForward := false.B
255      csBundle(1).blockBackward := false.B
256
257      csBundle(2).uopIdx := 2.U
258      csBundle(2).fuOpType := Cat(2.U(3.W), LSUOpType.amocas_q)
259      csBundle(2).lsrc(0) := src1
260      csBundle(2).lsrc(1) := Mux(dest === 0.U, 0.U, dest + 1.U)
261      csBundle(2).ldest := Mux(dest === 0.U, 0.U, dest + 1.U)
262      csBundle(2).waitForward := false.B
263      csBundle(2).blockBackward := false.B
264
265      csBundle(3).uopIdx := 3.U
266      csBundle(3).fuOpType := Cat(3.U(3.W), LSUOpType.amocas_q)
267      csBundle(3).lsrc(0) := src1
268      csBundle(3).lsrc(1) := Mux(src2 === 0.U, 0.U, src2 + 1.U)
269      csBundle(3).rfWen := false.B
270      csBundle(3).waitForward := false.B
271      csBundle(3).blockBackward := true.B
272    }
273    is(UopSplitType.VSET) {
274      // In simple decoder, rfWen and vecWen are not set
275      when(isVsetSimple) {
276        // Default
277        // uop0 set rd, never flushPipe
278        csBundle(0).fuType := FuType.vsetiwi.U
279        csBundle(0).flushPipe := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U)
280        csBundle(0).blockBackward := false.B
281        csBundle(0).rfWen := true.B
282        // uop1 set vl, vsetvl will flushPipe
283        csBundle(1).ldest := Vl_IDX.U
284        csBundle(1).vecWen := false.B
285        csBundle(1).vlWen := true.B
286        csBundle(1).flushPipe := false.B
287        csBundle(1).blockBackward := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U)
288        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
289          // write nothing, uop0 is a nop instruction
290          csBundle(0).rfWen := false.B
291          csBundle(0).fpWen := false.B
292          csBundle(0).vecWen := false.B
293          csBundle(0).vlWen := false.B
294          csBundle(1).fuType := FuType.vsetfwf.U
295          csBundle(1).srcType(0) := SrcType.no
296          csBundle(1).srcType(2) := SrcType.no
297          csBundle(1).srcType(3) := SrcType.no
298          csBundle(1).srcType(4) := SrcType.vp
299          csBundle(1).lsrc(4) := Vl_IDX.U
300        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
301          // uop0: mv vtype gpr to vector region
302          csBundle(0).srcType(0) := SrcType.xp
303          csBundle(0).srcType(1) := SrcType.no
304          csBundle(0).lsrc(0) := src2
305          csBundle(0).lsrc(1) := 0.U
306          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
307          csBundle(0).fuType := FuType.i2v.U
308          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
309          csBundle(0).rfWen := false.B
310          csBundle(0).fpWen := false.B
311          csBundle(0).vecWen := true.B
312          csBundle(0).vlWen := false.B
313          // uop1: uvsetvcfg_vv
314          csBundle(1).fuType := FuType.vsetfwf.U
315          // vl
316          csBundle(1).srcType(0) := SrcType.no
317          csBundle(1).srcType(2) := SrcType.no
318          csBundle(1).srcType(3) := SrcType.no
319          csBundle(1).srcType(4) := SrcType.vp
320          csBundle(1).lsrc(4) := Vl_IDX.U
321          // vtype
322          csBundle(1).srcType(1) := SrcType.vp
323          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
324          csBundle(1).vecWen := false.B
325          csBundle(1).vlWen := true.B
326          csBundle(1).ldest := Vl_IDX.U
327        }.elsewhen(dest === 0.U) {
328          // write nothing, uop0 is a nop instruction
329          csBundle(0).rfWen := false.B
330          csBundle(0).fpWen := false.B
331          csBundle(0).vecWen := false.B
332          csBundle(0).vlWen := false.B
333        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) {
334          // because vsetvl may modified src2 when src2 == rd,
335          // we need to modify vd in second uop to avoid dependency
336          // uop0 set vl
337          csBundle(0).fuType := FuType.vsetiwf.U
338          csBundle(0).ldest := Vl_IDX.U
339          csBundle(0).rfWen := false.B
340          csBundle(0).vlWen := true.B
341          // uop1 set rd
342          csBundle(1).fuType := FuType.vsetiwi.U
343          csBundle(1).ldest := dest
344          csBundle(1).rfWen := true.B
345          csBundle(1).vlWen := false.B
346        }
347        // use bypass vtype from vtypeGen
348        csBundle(0).vpu.connectVType(io.vtypeBypass)
349        csBundle(1).vpu.connectVType(io.vtypeBypass)
350      }
351    }
352    is(UopSplitType.VEC_VVV) {
353      for (i <- 0 until MAX_VLMUL) {
354        csBundle(i).lsrc(0) := src1 + i.U
355        csBundle(i).lsrc(1) := src2 + i.U
356        csBundle(i).lsrc(2) := dest + i.U
357        csBundle(i).ldest := dest + i.U
358        csBundle(i).uopIdx := i.U
359      }
360    }
361    is(UopSplitType.VEC_VFV) {
362      /*
363      f to vector move
364       */
365      csBundle(0).srcType(0) := SrcType.fp
366      csBundle(0).srcType(1) := SrcType.imm
367      csBundle(0).srcType(2) := SrcType.imm
368      csBundle(0).lsrc(1) := 0.U
369      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
370      csBundle(0).fuType := FuType.f2v.U
371      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
372      csBundle(0).vecWen := true.B
373      csBundle(0).vpu.isReverse := false.B
374      /*
375      LMUL
376       */
377      for (i <- 0 until MAX_VLMUL) {
378        csBundle(i + 1).srcType(0) := SrcType.vp
379        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
380        csBundle(i + 1).lsrc(1) := src2 + i.U
381        csBundle(i + 1).lsrc(2) := dest + i.U
382        csBundle(i + 1).ldest := dest + i.U
383        csBundle(i + 1).uopIdx := i.U
384      }
385    }
386    is(UopSplitType.VEC_EXT2) {
387      for (i <- 0 until MAX_VLMUL / 2) {
388        csBundle(2 * i).lsrc(1) := src2 + i.U
389        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
390        csBundle(2 * i).ldest := dest + (2 * i).U
391        csBundle(2 * i).uopIdx := (2 * i).U
392        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
393        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
394        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
395        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
396      }
397    }
398    is(UopSplitType.VEC_EXT4) {
399      for (i <- 0 until MAX_VLMUL / 4) {
400        csBundle(4 * i).lsrc(1) := src2 + i.U
401        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
402        csBundle(4 * i).ldest := dest + (4 * i).U
403        csBundle(4 * i).uopIdx := (4 * i).U
404        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
405        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
406        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
407        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
408        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
409        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
410        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
411        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
412        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
413        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
414        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
415        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
416      }
417    }
418    is(UopSplitType.VEC_EXT8) {
419      for (i <- 0 until MAX_VLMUL) {
420        csBundle(i).lsrc(1) := src2
421        csBundle(i).lsrc(2) := dest + i.U
422        csBundle(i).ldest := dest + i.U
423        csBundle(i).uopIdx := i.U
424      }
425    }
426    is(UopSplitType.VEC_0XV) {
427      /*
428      i/f to vector move
429       */
430      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
431      csBundle(0).srcType(1) := SrcType.imm
432      csBundle(0).srcType(2) := SrcType.imm
433      csBundle(0).lsrc(1) := 0.U
434      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
435      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
436      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
437      csBundle(0).rfWen := false.B
438      csBundle(0).fpWen := false.B
439      csBundle(0).vecWen := true.B
440      /*
441      vmv.s.x
442       */
443      csBundle(1).srcType(0) := SrcType.vp
444      csBundle(1).srcType(1) := SrcType.imm
445      csBundle(1).srcType(2) := SrcType.vp
446      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
447      csBundle(1).lsrc(1) := 0.U
448      csBundle(1).lsrc(2) := dest
449      csBundle(1).ldest := dest
450      csBundle(1).rfWen := false.B
451      csBundle(1).fpWen := false.B
452      csBundle(1).vecWen := true.B
453      csBundle(1).uopIdx := 0.U
454    }
455    is(UopSplitType.VEC_VXV) {
456      /*
457      i to vector move
458       */
459      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
460      csBundle(0).srcType(1) := SrcType.imm
461      csBundle(0).srcType(2) := SrcType.imm
462      csBundle(0).lsrc(1) := 0.U
463      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
464      csBundle(0).fuType := FuType.i2v.U
465      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
466      csBundle(0).vecWen := true.B
467      csBundle(0).vpu.isReverse := false.B
468      /*
469      LMUL
470       */
471      for (i <- 0 until MAX_VLMUL) {
472        csBundle(i + 1).srcType(0) := SrcType.vp
473        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
474        csBundle(i + 1).lsrc(1) := src2 + i.U
475        csBundle(i + 1).lsrc(2) := dest + i.U
476        csBundle(i + 1).ldest := dest + i.U
477        csBundle(i + 1).uopIdx := i.U
478      }
479    }
480    is(UopSplitType.VEC_VVW) {
481      for (i <- 0 until MAX_VLMUL / 2) {
482        csBundle(2 * i).lsrc(0) := src1 + i.U
483        csBundle(2 * i).lsrc(1) := src2 + i.U
484        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
485        csBundle(2 * i).ldest := dest + (2 * i).U
486        csBundle(2 * i).uopIdx := (2 * i).U
487        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
488        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
489        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
490        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
491        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
492      }
493    }
494    is(UopSplitType.VEC_VFW) {
495      /*
496      f to vector move
497       */
498      csBundle(0).srcType(0) := SrcType.fp
499      csBundle(0).srcType(1) := SrcType.imm
500      csBundle(0).srcType(2) := SrcType.imm
501      csBundle(0).lsrc(1) := 0.U
502      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
503      csBundle(0).fuType := FuType.f2v.U
504      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
505      csBundle(0).rfWen := false.B
506      csBundle(0).fpWen := false.B
507      csBundle(0).vecWen := true.B
508
509      for (i <- 0 until MAX_VLMUL / 2) {
510        csBundle(2 * i + 1).srcType(0) := SrcType.vp
511        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
512        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
513        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
514        csBundle(2 * i + 1).ldest := dest + (2 * i).U
515        csBundle(2 * i + 1).uopIdx := (2 * i).U
516        csBundle(2 * i + 2).srcType(0) := SrcType.vp
517        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
518        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
519        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
520        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
521        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
522      }
523    }
524    is(UopSplitType.VEC_WVW) {
525      for (i <- 0 until MAX_VLMUL / 2) {
526        csBundle(2 * i).lsrc(0) := src1 + i.U
527        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
528        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
529        csBundle(2 * i).ldest := dest + (2 * i).U
530        csBundle(2 * i).uopIdx := (2 * i).U
531        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
532        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
533        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
534        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
535        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
536      }
537    }
538    is(UopSplitType.VEC_VXW) {
539      /*
540      i to vector move
541       */
542      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
543      csBundle(0).srcType(1) := SrcType.imm
544      csBundle(0).srcType(2) := SrcType.imm
545      csBundle(0).lsrc(1) := 0.U
546      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
547      csBundle(0).fuType := FuType.i2v.U
548      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
549      csBundle(0).vecWen := true.B
550
551      for (i <- 0 until MAX_VLMUL / 2) {
552        csBundle(2 * i + 1).srcType(0) := SrcType.vp
553        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
554        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
555        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
556        csBundle(2 * i + 1).ldest := dest + (2 * i).U
557        csBundle(2 * i + 1).uopIdx := (2 * i).U
558        csBundle(2 * i + 2).srcType(0) := SrcType.vp
559        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
560        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
561        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
562        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
563        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
564      }
565    }
566    is(UopSplitType.VEC_WXW) {
567      /*
568      i to vector move
569       */
570      csBundle(0).srcType(0) := SrcType.reg
571      csBundle(0).srcType(1) := SrcType.imm
572      csBundle(0).srcType(2) := SrcType.imm
573      csBundle(0).lsrc(1) := 0.U
574      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
575      csBundle(0).fuType := FuType.i2v.U
576      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
577      csBundle(0).vecWen := true.B
578
579      for (i <- 0 until MAX_VLMUL / 2) {
580        csBundle(2 * i + 1).srcType(0) := SrcType.vp
581        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
582        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
583        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
584        csBundle(2 * i + 1).ldest := dest + (2 * i).U
585        csBundle(2 * i + 1).uopIdx := (2 * i).U
586        csBundle(2 * i + 2).srcType(0) := SrcType.vp
587        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
588        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
589        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
590        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
591        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
592      }
593    }
594    is(UopSplitType.VEC_WVV) {
595      for (i <- 0 until MAX_VLMUL / 2) {
596
597        csBundle(2 * i).lsrc(0) := src1 + i.U
598        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
599        csBundle(2 * i).lsrc(2) := dest + i.U
600        csBundle(2 * i).ldest := dest + i.U
601        csBundle(2 * i).uopIdx := (2 * i).U
602        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
603        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
604        csBundle(2 * i + 1).lsrc(2) := dest + i.U
605        csBundle(2 * i + 1).ldest := dest + i.U
606        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
607      }
608    }
609    is(UopSplitType.VEC_WFW) {
610      /*
611      f to vector move
612       */
613      csBundle(0).srcType(0) := SrcType.fp
614      csBundle(0).srcType(1) := SrcType.imm
615      csBundle(0).srcType(2) := SrcType.imm
616      csBundle(0).lsrc(1) := 0.U
617      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
618      csBundle(0).fuType := FuType.f2v.U
619      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
620      csBundle(0).rfWen := false.B
621      csBundle(0).fpWen := false.B
622      csBundle(0).vecWen := true.B
623
624      for (i <- 0 until MAX_VLMUL / 2) {
625        csBundle(2 * i + 1).srcType(0) := SrcType.vp
626        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
627        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
628        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
629        csBundle(2 * i + 1).ldest := dest + (2 * i).U
630        csBundle(2 * i + 1).uopIdx := (2 * i).U
631        csBundle(2 * i + 2).srcType(0) := SrcType.vp
632        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
633        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
634        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
635        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
636        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
637      }
638    }
639    is(UopSplitType.VEC_WXV) {
640      /*
641      i to vector move
642       */
643      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
644      csBundle(0).srcType(1) := SrcType.imm
645      csBundle(0).srcType(2) := SrcType.imm
646      csBundle(0).lsrc(1) := 0.U
647      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
648      csBundle(0).fuType := FuType.i2v.U
649      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
650      csBundle(0).vecWen := true.B
651
652      for (i <- 0 until MAX_VLMUL / 2) {
653        csBundle(2 * i + 1).srcType(0) := SrcType.vp
654        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
655        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
656        csBundle(2 * i + 1).lsrc(2) := dest + i.U
657        csBundle(2 * i + 1).ldest := dest + i.U
658        csBundle(2 * i + 1).uopIdx := (2 * i).U
659        csBundle(2 * i + 2).srcType(0) := SrcType.vp
660        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
661        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
662        csBundle(2 * i + 2).lsrc(2) := dest + i.U
663        csBundle(2 * i + 2).ldest := dest + i.U
664        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
665      }
666    }
667    is(UopSplitType.VEC_VVM) {
668      csBundle(0).lsrc(2) := dest
669      csBundle(0).ldest := dest
670      csBundle(0).uopIdx := 0.U
671      for (i <- 1 until MAX_VLMUL) {
672        csBundle(i).lsrc(0) := src1 + i.U
673        csBundle(i).lsrc(1) := src2 + i.U
674        csBundle(i).lsrc(2) := dest
675        csBundle(i).ldest := dest
676        csBundle(i).uopIdx := i.U
677      }
678    }
679    is(UopSplitType.VEC_VFM) {
680      /*
681      f to vector move
682       */
683      csBundle(0).srcType(0) := SrcType.fp
684      csBundle(0).srcType(1) := SrcType.imm
685      csBundle(0).srcType(2) := SrcType.imm
686      csBundle(0).lsrc(1) := 0.U
687      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
688      csBundle(0).fuType := FuType.f2v.U
689      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
690      csBundle(0).rfWen := false.B
691      csBundle(0).fpWen := false.B
692      csBundle(0).vecWen := true.B
693      //LMUL
694      csBundle(1).srcType(0) := SrcType.vp
695      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
696      csBundle(1).lsrc(2) := dest
697      csBundle(1).ldest := dest
698      csBundle(1).uopIdx := 0.U
699      for (i <- 1 until MAX_VLMUL) {
700        csBundle(i + 1).srcType(0) := SrcType.vp
701        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
702        csBundle(i + 1).lsrc(1) := src2 + i.U
703        csBundle(i + 1).lsrc(2) := dest
704        csBundle(i + 1).ldest := dest
705        csBundle(i + 1).uopIdx := i.U
706      }
707      csBundle(numOfUop - 1.U).ldest := dest
708    }
709    is(UopSplitType.VEC_VXM) {
710      /*
711      i to vector move
712       */
713      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
714      csBundle(0).srcType(1) := SrcType.imm
715      csBundle(0).srcType(2) := SrcType.imm
716      csBundle(0).lsrc(1) := 0.U
717      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
718      csBundle(0).fuType := FuType.i2v.U
719      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
720      csBundle(0).vecWen := true.B
721      //LMUL
722      csBundle(1).srcType(0) := SrcType.vp
723      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
724      csBundle(1).lsrc(2) := dest
725      csBundle(1).ldest := dest
726      csBundle(1).uopIdx := 0.U
727      for (i <- 1 until MAX_VLMUL) {
728        csBundle(i + 1).srcType(0) := SrcType.vp
729        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
730        csBundle(i + 1).lsrc(1) := src2 + i.U
731        csBundle(i + 1).lsrc(2) := dest
732        csBundle(i + 1).ldest := dest
733        csBundle(i + 1).uopIdx := i.U
734      }
735      csBundle(numOfUop - 1.U).ldest := dest
736    }
737    is(UopSplitType.VEC_SLIDE1UP) {
738      /*
739      i to vector move
740       */
741      csBundle(0).srcType(0) := SrcType.reg
742      csBundle(0).srcType(1) := SrcType.imm
743      csBundle(0).srcType(2) := SrcType.imm
744      csBundle(0).lsrc(1) := 0.U
745      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
746      csBundle(0).fuType := FuType.i2v.U
747      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
748      csBundle(0).vecWen := true.B
749      //LMUL
750      csBundle(1).srcType(0) := SrcType.vp
751      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
752      csBundle(1).lsrc(2) := dest
753      csBundle(1).ldest := dest
754      csBundle(1).uopIdx := 0.U
755      for (i <- 1 until MAX_VLMUL) {
756        csBundle(i + 1).srcType(0) := SrcType.vp
757        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
758        csBundle(i + 1).lsrc(1) := src2 + i.U
759        csBundle(i + 1).lsrc(2) := dest + i.U
760        csBundle(i + 1).ldest := dest + i.U
761        csBundle(i + 1).uopIdx := i.U
762      }
763    }
764    is(UopSplitType.VEC_FSLIDE1UP) {
765      /*
766      f to vector move
767       */
768      csBundle(0).srcType(0) := SrcType.fp
769      csBundle(0).srcType(1) := SrcType.imm
770      csBundle(0).srcType(2) := SrcType.imm
771      csBundle(0).lsrc(1) := 0.U
772      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
773      csBundle(0).fuType := FuType.f2v.U
774      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
775      csBundle(0).rfWen := false.B
776      csBundle(0).fpWen := false.B
777      csBundle(0).vecWen := true.B
778      //LMUL
779      csBundle(1).srcType(0) := SrcType.vp
780      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
781      csBundle(1).lsrc(1) := src2
782      csBundle(1).lsrc(2) := dest
783      csBundle(1).ldest := dest
784      csBundle(1).uopIdx := 0.U
785      for (i <- 1 until MAX_VLMUL) {
786        csBundle(i + 1).srcType(0) := SrcType.vp
787        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
788        csBundle(i + 1).lsrc(1) := src2 + i.U
789        csBundle(i + 1).lsrc(2) := dest + i.U
790        csBundle(i + 1).ldest := dest + i.U
791        csBundle(i + 1).uopIdx := i.U
792      }
793    }
794    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
795      /*
796      i to vector move
797       */
798      csBundle(0).srcType(0) := SrcType.reg
799      csBundle(0).srcType(1) := SrcType.imm
800      csBundle(0).srcType(2) := SrcType.imm
801      csBundle(0).lsrc(1) := 0.U
802      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
803      csBundle(0).fuType := FuType.i2v.U
804      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
805      csBundle(0).vecWen := true.B
806      //LMUL
807      for (i <- 0 until MAX_VLMUL) {
808        csBundle(2 * i + 1).srcType(0) := SrcType.vp
809        csBundle(2 * i + 1).srcType(1) := SrcType.vp
810        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
811        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
812        csBundle(2 * i + 1).lsrc(2) := dest + i.U
813        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
814        csBundle(2 * i + 1).uopIdx := (2 * i).U
815        if (2 * i + 2 < MAX_VLMUL * 2) {
816          csBundle(2 * i + 2).srcType(0) := SrcType.vp
817          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
818          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
819          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
820          csBundle(2 * i + 2).ldest := dest + i.U
821          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
822        }
823      }
824      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
825      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
826      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
827    }
828    is(UopSplitType.VEC_FSLIDE1DOWN) {
829      /*
830      f to vector move
831       */
832      csBundle(0).srcType(0) := SrcType.fp
833      csBundle(0).srcType(1) := SrcType.imm
834      csBundle(0).srcType(2) := SrcType.imm
835      csBundle(0).lsrc(1) := 0.U
836      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
837      csBundle(0).fuType := FuType.f2v.U
838      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
839      csBundle(0).rfWen := false.B
840      csBundle(0).fpWen := false.B
841      csBundle(0).vecWen := true.B
842      //LMUL
843      for (i <- 0 until MAX_VLMUL) {
844        csBundle(2 * i + 1).srcType(0) := SrcType.vp
845        csBundle(2 * i + 1).srcType(1) := SrcType.vp
846        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
847        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
848        csBundle(2 * i + 1).lsrc(2) := dest + i.U
849        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
850        csBundle(2 * i + 1).uopIdx := (2 * i).U
851        if (2 * i + 2 < MAX_VLMUL * 2) {
852          csBundle(2 * i + 2).srcType(0) := SrcType.vp
853          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
854          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
855          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
856          csBundle(2 * i + 2).ldest := dest + i.U
857          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
858        }
859      }
860      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
861      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
862      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
863    }
864    is(UopSplitType.VEC_VRED) {
865      when(vlmulReg === "b001".U) {
866        csBundle(0).srcType(2) := SrcType.DC
867        csBundle(0).lsrc(0) := src2 + 1.U
868        csBundle(0).lsrc(1) := src2
869        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
870        csBundle(0).uopIdx := 0.U
871      }
872      when(vlmulReg === "b010".U) {
873        csBundle(0).srcType(2) := SrcType.DC
874        csBundle(0).lsrc(0) := src2 + 1.U
875        csBundle(0).lsrc(1) := src2
876        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
877        csBundle(0).uopIdx := 0.U
878
879        csBundle(1).srcType(2) := SrcType.DC
880        csBundle(1).lsrc(0) := src2 + 3.U
881        csBundle(1).lsrc(1) := src2 + 2.U
882        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
883        csBundle(1).uopIdx := 1.U
884
885        csBundle(2).srcType(2) := SrcType.DC
886        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
887        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
888        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
889        csBundle(2).uopIdx := 2.U
890      }
891      when(vlmulReg === "b011".U) {
892        for (i <- 0 until MAX_VLMUL) {
893          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
894            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
895            csBundle(i).lsrc(1) := src2 + (i * 2).U
896            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
897          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
898            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
899            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
900            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
901          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
902            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
903            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
904            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
905          }
906          csBundle(i).srcType(2) := SrcType.DC
907          csBundle(i).uopIdx := i.U
908        }
909      }
910      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
911        /*
912         * 2 <= vlmul <= 8
913         */
914        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
915        csBundle(numOfUop - 1.U).lsrc(0) := src1
916        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
917        csBundle(numOfUop - 1.U).lsrc(2) := dest
918        csBundle(numOfUop - 1.U).ldest := dest
919        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
920      }
921    }
922    is(UopSplitType.VEC_VFRED) {
923      val vlmul = vlmulReg
924      val vsew = vsewReg
925      when(vlmul === VLmul.m8){
926        for (i <- 0 until 4) {
927          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
928          csBundle(i).lsrc(1) := src2 + (i * 2).U
929          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
930          csBundle(i).uopIdx := i.U
931        }
932        for (i <- 4 until 6) {
933          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
934          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
935          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
936          csBundle(i).uopIdx := i.U
937        }
938        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
939        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
940        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
941        csBundle(6).uopIdx := 6.U
942        when(vsew === VSew.e64) {
943          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
944          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
945          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
946          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
947          csBundle(7).uopIdx := 7.U
948          csBundle(8).lsrc(0) := src1
949          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
950          csBundle(8).ldest := dest
951          csBundle(8).uopIdx := 8.U
952        }
953        when(vsew === VSew.e32) {
954          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
955          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
956          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
957          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
958          csBundle(7).uopIdx := 7.U
959          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
960          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
961          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
962          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
963          csBundle(8).uopIdx := 8.U
964          csBundle(9).lsrc(0) := src1
965          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
966          csBundle(9).ldest := dest
967          csBundle(9).uopIdx := 9.U
968        }
969        when(vsew === VSew.e16) {
970          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
971          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
972          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
973          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
974          csBundle(7).uopIdx := 7.U
975          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
976          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
977          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
978          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
979          csBundle(8).uopIdx := 8.U
980          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
981          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
982          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
983          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
984          csBundle(9).uopIdx := 9.U
985          csBundle(10).lsrc(0) := src1
986          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
987          csBundle(10).ldest := dest
988          csBundle(10).uopIdx := 10.U
989        }
990      }
991      when(vlmul === VLmul.m4) {
992        for (i <- 0 until 2) {
993          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
994          csBundle(i).lsrc(1) := src2 + (i * 2).U
995          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
996          csBundle(i).uopIdx := i.U
997        }
998        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
999        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1000        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1001        csBundle(2).uopIdx := 2.U
1002        when(vsew === VSew.e64) {
1003          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1004          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1005          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1006          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
1007          csBundle(3).uopIdx := 3.U
1008          csBundle(4).lsrc(0) := src1
1009          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1010          csBundle(4).ldest := dest
1011          csBundle(4).uopIdx := 4.U
1012        }
1013        when(vsew === VSew.e32) {
1014          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1015          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1016          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1017          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
1018          csBundle(3).uopIdx := 3.U
1019          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
1020          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1021          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
1022          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
1023          csBundle(4).uopIdx := 4.U
1024          csBundle(5).lsrc(0) := src1
1025          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
1026          csBundle(5).ldest := dest
1027          csBundle(5).uopIdx := 5.U
1028        }
1029        when(vsew === VSew.e16) {
1030          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1031          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1032          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1033          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
1034          csBundle(3).uopIdx := 3.U
1035          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
1036          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1037          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
1038          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
1039          csBundle(4).uopIdx := 4.U
1040          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
1041          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
1042          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
1043          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
1044          csBundle(5).uopIdx := 5.U
1045          csBundle(6).lsrc(0) := src1
1046          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
1047          csBundle(6).ldest := dest
1048          csBundle(6).uopIdx := 6.U
1049        }
1050      }
1051      when(vlmul === VLmul.m2) {
1052        csBundle(0).lsrc(0) := src2 + 1.U
1053        csBundle(0).lsrc(1) := src2 + 0.U
1054        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1055        csBundle(0).uopIdx := 0.U
1056        when(vsew === VSew.e64) {
1057          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1058          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1059          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1060          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1061          csBundle(1).uopIdx := 1.U
1062          csBundle(2).lsrc(0) := src1
1063          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1064          csBundle(2).ldest := dest
1065          csBundle(2).uopIdx := 2.U
1066        }
1067        when(vsew === VSew.e32) {
1068          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1069          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1070          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1071          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1072          csBundle(1).uopIdx := 1.U
1073          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1074          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1075          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1076          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1077          csBundle(2).uopIdx := 2.U
1078          csBundle(3).lsrc(0) := src1
1079          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1080          csBundle(3).ldest := dest
1081          csBundle(3).uopIdx := 3.U
1082        }
1083        when(vsew === VSew.e16) {
1084          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1085          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1086          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1087          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1088          csBundle(1).uopIdx := 1.U
1089          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1090          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1091          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1092          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1093          csBundle(2).uopIdx := 2.U
1094          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1095          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1096          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1097          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
1098          csBundle(3).uopIdx := 3.U
1099          csBundle(4).lsrc(0) := src1
1100          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1101          csBundle(4).ldest := dest
1102          csBundle(4).uopIdx := 4.U
1103        }
1104      }
1105      when(vlmul === VLmul.m1) {
1106        when(vsew === VSew.e64) {
1107          csBundle(0).lsrc(0) := src2
1108          csBundle(0).lsrc(1) := src2
1109          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1110          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1111          csBundle(0).uopIdx := 0.U
1112          csBundle(1).lsrc(0) := src1
1113          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1114          csBundle(1).ldest := dest
1115          csBundle(1).uopIdx := 1.U
1116        }
1117        when(vsew === VSew.e32) {
1118          csBundle(0).lsrc(0) := src2
1119          csBundle(0).lsrc(1) := src2
1120          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1121          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1122          csBundle(0).uopIdx := 0.U
1123          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1124          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1125          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1126          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1127          csBundle(1).uopIdx := 1.U
1128          csBundle(2).lsrc(0) := src1
1129          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1130          csBundle(2).ldest := dest
1131          csBundle(2).uopIdx := 2.U
1132        }
1133        when(vsew === VSew.e16) {
1134          csBundle(0).lsrc(0) := src2
1135          csBundle(0).lsrc(1) := src2
1136          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1137          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1138          csBundle(0).uopIdx := 0.U
1139          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1140          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1141          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1142          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1143          csBundle(1).uopIdx := 1.U
1144          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1145          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1146          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1147          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1148          csBundle(2).uopIdx := 2.U
1149          csBundle(3).lsrc(0) := src1
1150          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1151          csBundle(3).ldest := dest
1152          csBundle(3).uopIdx := 3.U
1153        }
1154      }
1155      when(vlmul === VLmul.mf2) {
1156        when(vsew === VSew.e32) {
1157          csBundle(0).lsrc(0) := src2
1158          csBundle(0).lsrc(1) := src2
1159          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1160          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1161          csBundle(0).uopIdx := 0.U
1162          csBundle(1).lsrc(0) := src1
1163          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1164          csBundle(1).ldest := dest
1165          csBundle(1).uopIdx := 1.U
1166        }
1167        when(vsew === VSew.e16) {
1168          csBundle(0).lsrc(0) := src2
1169          csBundle(0).lsrc(1) := src2
1170          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1171          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1172          csBundle(0).uopIdx := 0.U
1173          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1174          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1175          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1176          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1177          csBundle(1).uopIdx := 1.U
1178          csBundle(2).lsrc(0) := src1
1179          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1180          csBundle(2).ldest := dest
1181          csBundle(2).uopIdx := 2.U
1182        }
1183      }
1184      when(vlmul === VLmul.mf4) {
1185        when(vsew === VSew.e16) {
1186          csBundle(0).lsrc(0) := src2
1187          csBundle(0).lsrc(1) := src2
1188          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1189          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1190          csBundle(0).uopIdx := 0.U
1191          csBundle(1).lsrc(0) := src1
1192          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1193          csBundle(1).ldest := dest
1194          csBundle(1).uopIdx := 1.U
1195        }
1196      }
1197    }
1198
1199    is(UopSplitType.VEC_VFREDOSUM) {
1200      import yunsuan.VfaluType
1201      val vlmul = vlmulReg
1202      val vsew = vsewReg
1203      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1204      when(vlmul === VLmul.m8) {
1205        when(vsew === VSew.e64) {
1206          val vlmax = 16
1207          for (i <- 0 until vlmax) {
1208            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1209            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1211            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1212            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1213            csBundle(i).uopIdx := i.U
1214          }
1215        }
1216        when(vsew === VSew.e32) {
1217          val vlmax = 32
1218          for (i <- 0 until vlmax) {
1219            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1220            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1222            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1223            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1224            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1225            csBundle(i).uopIdx := i.U
1226          }
1227        }
1228        when(vsew === VSew.e16) {
1229          val vlmax = 64
1230          for (i <- 0 until vlmax) {
1231            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1232            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1234            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1235            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1236            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1237            csBundle(i).uopIdx := i.U
1238          }
1239        }
1240      }
1241      when(vlmul === VLmul.m4) {
1242        when(vsew === VSew.e64) {
1243          val vlmax = 8
1244          for (i <- 0 until vlmax) {
1245            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1248            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1249            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1250            csBundle(i).uopIdx := i.U
1251          }
1252        }
1253        when(vsew === VSew.e32) {
1254          val vlmax = 16
1255          for (i <- 0 until vlmax) {
1256            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1257            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1259            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1260            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1261            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1262            csBundle(i).uopIdx := i.U
1263          }
1264        }
1265        when(vsew === VSew.e16) {
1266          val vlmax = 32
1267          for (i <- 0 until vlmax) {
1268            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1269            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1270            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1271            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1272            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1273            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1274            csBundle(i).uopIdx := i.U
1275          }
1276        }
1277      }
1278      when(vlmul === VLmul.m2) {
1279        when(vsew === VSew.e64) {
1280          val vlmax = 4
1281          for (i <- 0 until vlmax) {
1282            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1283            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1284            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1285            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1286            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1287            csBundle(i).uopIdx := i.U
1288          }
1289        }
1290        when(vsew === VSew.e32) {
1291          val vlmax = 8
1292          for (i <- 0 until vlmax) {
1293            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1294            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1295            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1296            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1297            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1298            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1299            csBundle(i).uopIdx := i.U
1300          }
1301        }
1302        when(vsew === VSew.e16) {
1303          val vlmax = 16
1304          for (i <- 0 until vlmax) {
1305            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1306            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1307            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1308            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1309            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1310            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1311            csBundle(i).uopIdx := i.U
1312          }
1313        }
1314      }
1315      when(vlmul === VLmul.m1) {
1316        when(vsew === VSew.e64) {
1317          val vlmax = 2
1318          for (i <- 0 until vlmax) {
1319            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1320            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1321            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1322            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1323            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1324            csBundle(i).uopIdx := i.U
1325          }
1326        }
1327        when(vsew === VSew.e32) {
1328          val vlmax = 4
1329          for (i <- 0 until vlmax) {
1330            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1331            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1332            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1333            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1334            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1335            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1336            csBundle(i).uopIdx := i.U
1337          }
1338        }
1339        when(vsew === VSew.e16) {
1340          val vlmax = 8
1341          for (i <- 0 until vlmax) {
1342            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1343            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1344            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1345            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1346            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1347            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1348            csBundle(i).uopIdx := i.U
1349          }
1350        }
1351      }
1352      when(vlmul === VLmul.mf2) {
1353        when(vsew === VSew.e32) {
1354          val vlmax = 2
1355          for (i <- 0 until vlmax) {
1356            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1357            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1358            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1359            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1360            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1361            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1362            csBundle(i).uopIdx := i.U
1363          }
1364        }
1365        when(vsew === VSew.e16) {
1366          val vlmax = 4
1367          for (i <- 0 until vlmax) {
1368            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1369            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1370            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1371            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1372            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1373            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1374            csBundle(i).uopIdx := i.U
1375          }
1376        }
1377      }
1378      when(vlmul === VLmul.mf4) {
1379        when(vsew === VSew.e16) {
1380          val vlmax = 2
1381          for (i <- 0 until vlmax) {
1382            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1383            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1384            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1385            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1386            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1387            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1388            csBundle(i).uopIdx := i.U
1389          }
1390        }
1391      }
1392    }
1393
1394    is(UopSplitType.VEC_SLIDEUP) {
1395      // i to vector move
1396      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1397      csBundle(0).srcType(1) := SrcType.imm
1398      csBundle(0).srcType(2) := SrcType.imm
1399      csBundle(0).lsrc(1) := 0.U
1400      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1401      csBundle(0).fuType := FuType.i2v.U
1402      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1403      csBundle(0).vecWen := true.B
1404      // LMUL
1405      for (i <- 0 until MAX_VLMUL)
1406        for (j <- 0 to i) {
1407          val old_vd = if (j == 0) {
1408            dest + i.U
1409          } else (VECTOR_TMP_REG_LMUL + j).U
1410          val vd = if (j == i) {
1411            dest + i.U
1412          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1413          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1414          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1415          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1416          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1417          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1418          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1419        }
1420    }
1421
1422    is(UopSplitType.VEC_SLIDEDOWN) {
1423      // i to vector move
1424      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1425      csBundle(0).srcType(1) := SrcType.imm
1426      csBundle(0).srcType(2) := SrcType.imm
1427      csBundle(0).lsrc(1) := 0.U
1428      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1429      csBundle(0).fuType := FuType.i2v.U
1430      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1431      csBundle(0).vecWen := true.B
1432      // LMUL
1433      for (i <- 0 until MAX_VLMUL)
1434        for (j <- (0 to i).reverse) {
1435          when(i.U < lmul) {
1436            val old_vd = if (j == 0) {
1437              dest + lmul - 1.U - i.U
1438            } else (VECTOR_TMP_REG_LMUL + j).U
1439            val vd = if (j == i) {
1440              dest + lmul - 1.U - i.U
1441            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1442            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1443            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1444            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1445            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1446            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1447            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1448          }
1449        }
1450    }
1451
1452    is(UopSplitType.VEC_M0X) {
1453      // LMUL
1454      for (i <- 0 until MAX_VLMUL) {
1455        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1456        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1457        csBundle(i).srcType(0) := srcType0
1458        csBundle(i).srcType(1) := SrcType.vp
1459        csBundle(i).rfWen := false.B
1460        csBundle(i).fpWen := false.B
1461        csBundle(i).vecWen := true.B
1462        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1463        csBundle(i).lsrc(1) := src2
1464        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1465        csBundle(i).ldest := ldest
1466        csBundle(i).uopIdx := i.U
1467      }
1468      csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B)
1469      csBundle(numOfUop - 1.U).fpWen := false.B
1470      csBundle(numOfUop - 1.U).vecWen := false.B
1471      csBundle(numOfUop - 1.U).ldest := dest
1472    }
1473
1474    is(UopSplitType.VEC_MVV) {
1475      // LMUL
1476      for (i <- 0 until MAX_VLMUL) {
1477        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1478        csBundle(i * 2 + 0).srcType(0) := srcType0
1479        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1480        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1481        csBundle(i * 2 + 0).lsrc(1) := src2
1482        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1483        csBundle(i * 2 + 0).ldest := dest + i.U
1484        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1485
1486        csBundle(i * 2 + 1).srcType(0) := srcType0
1487        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1488        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1489        csBundle(i * 2 + 1).lsrc(1) := src2
1490        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1491        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1492        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1493      }
1494    }
1495    is(UopSplitType.VEC_VWW) {
1496      for (i <- 0 until MAX_VLMUL*2) {
1497        when(i.U < lmul){
1498          csBundle(i).srcType(2) := SrcType.DC
1499          csBundle(i).lsrc(0) := src2 + i.U
1500          csBundle(i).lsrc(1) := src2 + i.U
1501          // csBundle(i).lsrc(2) := dest + (2 * i).U
1502          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1503          csBundle(i).uopIdx :=  i.U
1504        } otherwise {
1505          csBundle(i).srcType(2) := SrcType.DC
1506          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1507          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1508          // csBundle(i).lsrc(2) := dest + (2 * i).U
1509          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1510          csBundle(i).uopIdx := i.U
1511        }
1512        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1513        csBundle(numOfUop-1.U).lsrc(0) := src1
1514        csBundle(numOfUop-1.U).lsrc(2) := dest
1515        csBundle(numOfUop-1.U).ldest := dest
1516      }
1517    }
1518    is(UopSplitType.VEC_RGATHER) {
1519      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1520        for (i <- 0 until len)
1521          for (j <- 0 until len) {
1522            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1523            // csBundle(i * len + j).srcType(1) := SrcType.vp
1524            // csBundle(i * len + j).srcType(2) := SrcType.vp
1525            csBundle(i * len + j).lsrc(0) := src1 + i.U
1526            csBundle(i * len + j).lsrc(1) := src2 + j.U
1527            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1528            csBundle(i * len + j).lsrc(2) := vd_old
1529            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1530            csBundle(i * len + j).ldest := vd
1531            csBundle(i * len + j).uopIdx := (i * len + j).U
1532          }
1533      }
1534      switch(vlmulReg) {
1535        is("b001".U ){
1536          genCsBundle_VEC_RGATHER(2)
1537        }
1538        is("b010".U ){
1539          genCsBundle_VEC_RGATHER(4)
1540        }
1541        is("b011".U ){
1542          genCsBundle_VEC_RGATHER(8)
1543        }
1544      }
1545    }
1546    is(UopSplitType.VEC_RGATHER_VX) {
1547      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1548        for (i <- 0 until len)
1549          for (j <- 0 until len) {
1550            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1551            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1552            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1553            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1554            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1555            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1556            csBundle(i * len + j + 1).lsrc(2) := vd_old
1557            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1558            csBundle(i * len + j + 1).ldest := vd
1559            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1560          }
1561      }
1562      // i to vector move
1563      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1564      csBundle(0).srcType(1) := SrcType.imm
1565      csBundle(0).srcType(2) := SrcType.imm
1566      csBundle(0).lsrc(1) := 0.U
1567      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1568      csBundle(0).fuType := FuType.i2v.U
1569      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1570      csBundle(0).rfWen := false.B
1571      csBundle(0).fpWen := false.B
1572      csBundle(0).vecWen := true.B
1573      genCsBundle_RGATHER_VX(1)
1574      switch(vlmulReg) {
1575        is("b001".U ){
1576          genCsBundle_RGATHER_VX(2)
1577        }
1578        is("b010".U ){
1579          genCsBundle_RGATHER_VX(4)
1580        }
1581        is("b011".U ){
1582          genCsBundle_RGATHER_VX(8)
1583        }
1584      }
1585    }
1586    is(UopSplitType.VEC_RGATHEREI16) {
1587      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1588        for (i <- 0 until len)
1589          for (j <- 0 until len) {
1590            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1591            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1592            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1593            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1594            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1595            csBundle((i * len + j)*2+0).ldest := vd0
1596            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1597            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1598            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1599            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1600            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1601            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1602            csBundle((i * len + j)*2+1).ldest := vd1
1603            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1604          }
1605      }
1606      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1607        for (i <- 0 until len)
1608          for (j <- 0 until len) {
1609            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1610            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1611            csBundle(i * len + j).lsrc(0) := src1 + i.U
1612            csBundle(i * len + j).lsrc(1) := src2 + j.U
1613            csBundle(i * len + j).lsrc(2) := vd_old
1614            csBundle(i * len + j).ldest := vd
1615            csBundle(i * len + j).uopIdx := (i * len + j).U
1616          }
1617      }
1618      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1619        for (i <- 0 until len)
1620          for (j <- 0 until len) {
1621            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1622            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1623            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1624            csBundle(i * len + j).lsrc(1) := src2 + j.U
1625            csBundle(i * len + j).lsrc(2) := vd_old
1626            csBundle(i * len + j).ldest := vd
1627            csBundle(i * len + j).uopIdx := (i * len + j).U
1628          }
1629      }
1630      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1631        for (i <- 0 until len)
1632          for (j <- 0 until len) {
1633            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1634            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1635            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1636            csBundle(i * len + j).lsrc(1) := src2 + j.U
1637            csBundle(i * len + j).lsrc(2) := vd_old
1638            csBundle(i * len + j).ldest := vd
1639            csBundle(i * len + j).uopIdx := (i * len + j).U
1640          }
1641      }
1642      when(!vsewReg.orR){
1643        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1644      }.elsewhen(vsewReg === VSew.e32){
1645        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1646      }.elsewhen(vsewReg === VSew.e64){
1647        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1648      }.otherwise{
1649        genCsBundle_VEC_RGATHEREI16(1)
1650      }
1651      switch(vlmulReg) {
1652        is("b001".U) {
1653          when(!vsewReg.orR) {
1654            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1655          }.elsewhen(vsewReg === VSew.e32){
1656            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1657          }.elsewhen(vsewReg === VSew.e64){
1658            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1659          }.otherwise{
1660            genCsBundle_VEC_RGATHEREI16(2)
1661          }
1662        }
1663        is("b010".U) {
1664          when(!vsewReg.orR) {
1665            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1666          }.elsewhen(vsewReg === VSew.e32){
1667            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1668          }.elsewhen(vsewReg === VSew.e64){
1669            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1670          }.otherwise{
1671            genCsBundle_VEC_RGATHEREI16(4)
1672          }
1673        }
1674        is("b011".U) {
1675          when(vsewReg === VSew.e32){
1676            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1677          }.elsewhen(vsewReg === VSew.e64){
1678            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1679          }.otherwise{
1680            genCsBundle_VEC_RGATHEREI16(8)
1681          }
1682        }
1683      }
1684    }
1685    is(UopSplitType.VEC_COMPRESS) {
1686      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1687        for (i <- 0 until len) {
1688          val jlen = if (i == len-1) i+1 else i+2
1689          for (j <- 0 until jlen) {
1690            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1691            val vd = if(i==len-1) (dest + j.U) else {
1692              if (j == i+1) VECTOR_TMP_REG_LMUL.U  else (VECTOR_TMP_REG_LMUL + j + 1).U
1693            }
1694            csBundle(i*(i+3)/2 + j).vecWen := true.B
1695            csBundle(i*(i+3)/2 + j).v0Wen := false.B
1696            val src13Type = if (j == i+1) DontCare else SrcType.vp
1697            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1698            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1699            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1700            if (i == 0) {
1701              csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1702            } else {
1703              csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1704            }
1705            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1706            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1707            csBundle(i*(i+3)/2 + j).ldest := vd
1708            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1709          }
1710        }
1711      }
1712      switch(vlmulReg) {
1713        is("b001".U ){
1714          genCsBundle_VEC_COMPRESS(2)
1715        }
1716        is("b010".U ){
1717          genCsBundle_VEC_COMPRESS(4)
1718        }
1719        is("b011".U ){
1720          genCsBundle_VEC_COMPRESS(8)
1721        }
1722      }
1723    }
1724    is(UopSplitType.VEC_MVNR) {
1725      for (i <- 0 until MAX_VLMUL) {
1726        csBundle(i).lsrc(0) := src1 + i.U
1727        csBundle(i).lsrc(1) := src2 + i.U
1728        csBundle(i).lsrc(2) := dest + i.U
1729        csBundle(i).ldest := dest + i.U
1730        csBundle(i).uopIdx := i.U
1731      }
1732    }
1733    is(UopSplitType.VEC_US_LDST) {
1734      /*
1735      FMV.D.X
1736       */
1737      csBundle(0).srcType(0) := SrcType.reg
1738      csBundle(0).srcType(1) := SrcType.imm
1739      csBundle(0).lsrc(1) := 0.U
1740      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1741      csBundle(0).fuType := FuType.i2v.U
1742      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1743      csBundle(0).rfWen := false.B
1744      csBundle(0).fpWen := false.B
1745      csBundle(0).vecWen := true.B
1746      csBundle(0).vlsInstr := true.B
1747      //LMUL
1748      for (i <- 0 until MAX_VLMUL) {
1749        csBundle(i + 1).srcType(0) := SrcType.vp
1750        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1751        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1752        csBundle(i + 1).ldest := dest + i.U
1753        csBundle(i + 1).uopIdx := i.U
1754        csBundle(i + 1).vlsInstr := true.B
1755      }
1756      csBundle.head.waitForward := isUsSegment
1757      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1758    }
1759    is(UopSplitType.VEC_US_FF_LD) {
1760      csBundle(0).srcType(0) := SrcType.reg
1761      csBundle(0).srcType(1) := SrcType.imm
1762      csBundle(0).lsrc(1) := 0.U
1763      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1764      csBundle(0).fuType := FuType.i2v.U
1765      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1766      csBundle(0).rfWen := false.B
1767      csBundle(0).fpWen := false.B
1768      csBundle(0).vecWen := true.B
1769      csBundle(0).vlsInstr := true.B
1770      //LMUL
1771      for (i <- 0 until MAX_VLMUL) {
1772        csBundle(i + 1).srcType(0) := SrcType.vp
1773        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1774        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1775        csBundle(i + 1).ldest := dest + i.U
1776        csBundle(i + 1).uopIdx := i.U
1777        csBundle(i + 1).vlsInstr := true.B
1778      }
1779      csBundle.head.waitForward := isUsSegment
1780      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1781      // last uop read vl and write vl
1782      csBundle(numOfUop - 1.U).srcType(0) := SrcType.no
1783      csBundle(numOfUop - 1.U).srcType(1) := SrcType.no
1784      csBundle(numOfUop - 1.U).srcType(2) := SrcType.no
1785      csBundle(numOfUop - 1.U).srcType(3) := SrcType.no
1786      csBundle(numOfUop - 1.U).srcType(4) := SrcType.vp
1787      csBundle(numOfUop - 1.U).lsrc(4) := Vl_IDX.U
1788      // vtype
1789      csBundle(numOfUop - 1.U).vecWen := false.B
1790      csBundle(numOfUop - 1.U).vlWen := true.B
1791      csBundle(numOfUop - 1.U).ldest := Vl_IDX.U
1792    }
1793    is(UopSplitType.VEC_S_LDST) {
1794      /*
1795      FMV.D.X
1796       */
1797      csBundle(0).srcType(0) := SrcType.reg
1798      csBundle(0).srcType(1) := SrcType.imm
1799      csBundle(0).lsrc(1) := 0.U
1800      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1801      csBundle(0).fuType := FuType.i2v.U
1802      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1803      csBundle(0).rfWen := false.B
1804      csBundle(0).fpWen := false.B
1805      csBundle(0).vecWen := true.B
1806      csBundle(0).vlsInstr := true.B
1807
1808      csBundle(1).srcType(0) := SrcType.reg
1809      csBundle(1).srcType(1) := SrcType.imm
1810      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1811      csBundle(1).lsrc(1) := 0.U
1812      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1813      csBundle(1).fuType := FuType.i2v.U
1814      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1815      csBundle(1).rfWen := false.B
1816      csBundle(1).fpWen := false.B
1817      csBundle(1).vecWen := true.B
1818      csBundle(1).vlsInstr := true.B
1819
1820      //LMUL
1821      for (i <- 0 until MAX_VLMUL) {
1822        csBundle(i + 2).srcType(0) := SrcType.vp
1823        csBundle(i + 2).srcType(1) := SrcType.vp
1824        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1825        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1826        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1827        csBundle(i + 2).ldest := dest + i.U
1828        csBundle(i + 2).uopIdx := i.U
1829        csBundle(i + 2).vlsInstr := true.B
1830      }
1831      csBundle.head.waitForward := isSdSegment
1832      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1833    }
1834    is(UopSplitType.VEC_I_LDST) {
1835      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1836        for (i <- 0 until MAX_VLMUL) {
1837          val vecWen = if (i < lmul * nf) true.B else false.B
1838          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1839          csBundle(i + 1).srcType(0) := SrcType.vp
1840          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1841          csBundle(i + 1).srcType(1) := SrcType.no
1842          csBundle(i + 1).lsrc(1) := src2 + i.U
1843          csBundle(i + 1).srcType(2) := src2Type
1844          csBundle(i + 1).lsrc(2) := dest + i.U
1845          csBundle(i + 1).ldest := dest + i.U
1846          csBundle(i + 1).rfWen := false.B
1847          csBundle(i + 1).fpWen := false.B
1848          csBundle(i + 1).vecWen := vecWen
1849          csBundle(i + 1).uopIdx := i.U
1850          csBundle(i + 1).vlsInstr := true.B
1851        }
1852      }
1853      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1854        for (i <- 0 until MAX_VLMUL) {
1855          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1856          csBundle(i + 1).srcType(1) := src1Type
1857          csBundle(i + 1).lsrc(1) := src2 + i.U
1858        }
1859      }
1860
1861      val vlmul = vlmulReg
1862      val vsew = Cat(0.U(1.W), vsewReg)
1863      val veew = Cat(0.U(1.W), width)
1864      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1865      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Seq(
1866        "b001".U -> 1.U,
1867        "b010".U -> 2.U,
1868        "b011".U -> 3.U
1869      ))
1870      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Seq(
1871        "b001".U -> 1.U,
1872        "b010".U -> 2.U,
1873        "b011".U -> 3.U
1874      ))
1875      csBundle(0).srcType(0) := SrcType.reg
1876      csBundle(0).srcType(1) := SrcType.imm
1877      csBundle(0).lsrc(1) := 0.U
1878      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1879      csBundle(0).fuType := FuType.i2v.U
1880      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1881      csBundle(0).rfWen := false.B
1882      csBundle(0).fpWen := false.B
1883      csBundle(0).vecWen := true.B
1884      csBundle(0).vlsInstr := true.B
1885
1886      //LMUL
1887      when(nf === 0.U) {
1888        for (i <- 0 until MAX_VLMUL) {
1889          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1890          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1891          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1892          csBundle(i + 1).srcType(0) := SrcType.vp
1893          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1894          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1895          csBundle(i + 1).srcType(2) := SrcType.vp
1896          // lsrc2 is old vd
1897          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1898          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1899          csBundle(i + 1).uopIdx := i.U
1900          csBundle(i + 1).vlsInstr := true.B
1901        }
1902      }.otherwise{
1903        // nf > 1, is segment indexed load/store
1904        // gen src0, vd
1905        switch(simple_lmul) {
1906          is(0.U) {
1907            switch(nf) {
1908              is(1.U) {
1909                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1910              }
1911              is(2.U) {
1912                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1913              }
1914              is(3.U) {
1915                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1916              }
1917              is(4.U) {
1918                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1919              }
1920              is(5.U) {
1921                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1922              }
1923              is(6.U) {
1924                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1925              }
1926              is(7.U) {
1927                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1928              }
1929            }
1930          }
1931          is(1.U) {
1932            switch(nf) {
1933              is(1.U) {
1934                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1935              }
1936              is(2.U) {
1937                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1938              }
1939              is(3.U) {
1940                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1941              }
1942            }
1943          }
1944          is(2.U) {
1945            switch(nf) {
1946              is(1.U) {
1947                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1948              }
1949            }
1950          }
1951        }
1952
1953        // gen src1
1954        switch(simple_emul) {
1955          is(0.U) {
1956            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1957          }
1958          is(1.U) {
1959            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1960          }
1961          is(2.U) {
1962            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1963          }
1964          is(3.U) {
1965            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1966          }
1967        }
1968
1969        // when is vstore instructions, not set vecwen
1970        when(isVstore) {
1971          for (i <- 0 until MAX_VLMUL) {
1972            csBundle(i + 1).vecWen := false.B
1973          }
1974        }
1975      }
1976      csBundle.head.waitForward := isIxSegment
1977      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1978    }
1979  }
1980
1981  //readyFromRename Counter
1982  val readyCounter = Mux(outReadys.head, RenameWidth.U, 0.U)
1983
1984  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1985  val thisAllOut = uopRes <= readyCounter
1986
1987  val count = RegInit(0.U(log2Up(maxUopSize/RenameWidth + 1).W))
1988  val countNext = WireInit(count)
1989
1990  switch(state) {
1991    is(s_idle) {
1992      when (inValid) {
1993        stateNext := s_active
1994        uopResNext := inUopInfo.numOfUop
1995        countNext := 0.U
1996      }
1997    }
1998    is(s_active) {
1999      when (thisAllOut) {
2000        when (inValid) {
2001          stateNext := s_active
2002          uopResNext := inUopInfo.numOfUop
2003        }.otherwise {
2004          stateNext := s_idle
2005          uopResNext := 0.U
2006        }
2007        countNext := 0.U
2008      }.otherwise {
2009        stateNext := s_active
2010        uopResNext := uopRes - readyCounter
2011        countNext := count + outReadys.head.asUInt
2012      }
2013    }
2014  }
2015
2016  state := Mux(io.redirect, s_idle, stateNext)
2017  uopRes := Mux(io.redirect, 0.U, uopResNext)
2018  count := Mux(io.redirect, 0.U, countNext)
2019
2020  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
2021
2022  fixedDecodedInst := csBundle
2023
2024  // when vstart is not zero, the last uop will modify vstart to zero
2025  // therefore, blockback and flush pipe
2026  fixedDecodedInst(numOfUop - 1.U).flushPipe := (vstartReg =/= 0.U) || latchedInst.flushPipe
2027  val uopsSeq = (0 until RenameWidth).map(i => VecInit(fixedDecodedInst.zipWithIndex.filter(_._2 % RenameWidth == i).map(_._1)))
2028  for(i <- 0 until RenameWidth) {
2029    outValids(i) := complexNum > i.U
2030    outDecodedInsts(i) := uopsSeq(i)(count)
2031  }
2032
2033  outComplexNum := Mux(state === s_active, complexNum, 0.U)
2034  inReady := state === s_idle || state === s_active && thisAllOut
2035
2036
2037  XSError(inValid && inUopInfo.numOfUop === 0.U,
2038    p"uop number ${inUopInfo.numOfUop} is illegal, cannot be zero")
2039//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
2040//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
2041//  val notInf = Wire(Vec(DecodeWidth, Bool()))
2042//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
2043//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
2044//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
2045//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
2046//
2047//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
2048//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
2049//    0.U)
2050//  validToRename.zipWithIndex.foreach{
2051//    case(dst, i) =>
2052//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
2053//      dst := MuxCase(false.B, Seq(
2054//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
2055//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
2056//      ).toSeq)
2057//  }
2058//
2059//  readyToIBuf.zipWithIndex.foreach {
2060//    case (dst, i) =>
2061//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
2062//      dst := MuxCase(true.B, Seq(
2063//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
2064//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
2065//      ).toSeq)
2066//  }
2067//
2068//  io.deq.decodedInsts := decodedInsts
2069//  io.deq.complexNum := complexNum
2070//  io.deq.validToRename := validToRename
2071//  io.deq.readyToIBuf := readyToIBuf
2072}
2073