xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 039cdc35f5f3b68b6295ec5ace90f22a77322e02)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
82  val VECTOR_COMPRESS = 1 // in v0 regfile
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153  val vstartReg = latchedInst.vpu.vstart
154
155  //Type of uop Div
156  val typeOfSplit = latchedInst.uopSplitType
157  val src1Type = latchedInst.srcType(0)
158  val src1IsImm = src1Type === SrcType.imm
159  val src1IsFp = src1Type === SrcType.fp
160
161  val isVstore = FuType.isVStore(latchedInst.fuType)
162
163  numOfUop := latchedUopInfo.numOfUop
164  numOfWB := latchedUopInfo.numOfWB
165
166  //uops dispatch
167  val s_idle :: s_active :: Nil = Enum(2)
168  val state = RegInit(s_idle)
169  val stateNext = WireDefault(state)
170  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
171  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
172  val uopResNext = WireInit(uopRes)
173  val e64 = 3.U(2.W)
174  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
175  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
176  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
177
178  //uop div up to maxUopSize
179  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
180  csBundle.foreach { case dst =>
181    dst := latchedInst
182    dst.numUops := latchedUopInfo.numOfUop
183    dst.numWB := latchedUopInfo.numOfWB
184    dst.firstUop := false.B
185    dst.lastUop := false.B
186    dst.vlsInstr := false.B
187  }
188
189  csBundle(0).firstUop := true.B
190  csBundle(numOfUop - 1.U).lastUop := true.B
191
192  // when vstart is not zero, the last uop will modify vstart to zero
193  // therefore, blockback and flush pipe
194  csBundle(numOfUop - 1.U).blockBackward := vstartReg =/= 0.U
195  csBundle(0.U).flushPipe := vstartReg =/= 0.U
196
197  switch(typeOfSplit) {
198    is(UopSplitType.VSET) {
199      // In simple decoder, rfWen and vecWen are not set
200      when(isVsetSimple) {
201        // Default
202        // uop0 set rd, never flushPipe
203        csBundle(0).fuType := FuType.vsetiwi.U
204        csBundle(0).flushPipe := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U)
205        csBundle(0).blockBackward := false.B
206        csBundle(0).rfWen := true.B
207        // uop1 set vl, vsetvl will flushPipe
208        csBundle(1).ldest := Vl_IDX.U
209        csBundle(1).vecWen := false.B
210        csBundle(1).vlWen := true.B
211        // vsetvl flush pipe and block backward
212        csBundle(1).flushPipe := false.B
213        csBundle(1).blockBackward := Mux(VSETOpType.isVsetvl(latchedInst.fuOpType), true.B, vstartReg =/= 0.U)
214        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
215          // write nothing, uop0 is a nop instruction
216          csBundle(0).rfWen := false.B
217          csBundle(0).fpWen := false.B
218          csBundle(0).vecWen := false.B
219          csBundle(0).vlWen := false.B
220          csBundle(1).fuType := FuType.vsetfwf.U
221          csBundle(1).srcType(0) := SrcType.no
222          csBundle(1).srcType(2) := SrcType.no
223          csBundle(1).srcType(3) := SrcType.no
224          csBundle(1).srcType(4) := SrcType.vp
225          csBundle(1).lsrc(4) := Vl_IDX.U
226        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
227          // uop0: mv vtype gpr to vector region
228          csBundle(0).srcType(0) := SrcType.xp
229          csBundle(0).srcType(1) := SrcType.no
230          csBundle(0).lsrc(0) := src2
231          csBundle(0).lsrc(1) := 0.U
232          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
233          csBundle(0).fuType := FuType.i2v.U
234          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
235          csBundle(0).rfWen := false.B
236          csBundle(0).fpWen := false.B
237          csBundle(0).vecWen := true.B
238          csBundle(0).vlWen := false.B
239          // uop1: uvsetvcfg_vv
240          csBundle(1).fuType := FuType.vsetfwf.U
241          // vl
242          csBundle(1).srcType(0) := SrcType.no
243          csBundle(1).srcType(2) := SrcType.no
244          csBundle(1).srcType(3) := SrcType.no
245          csBundle(1).srcType(4) := SrcType.vp
246          csBundle(1).lsrc(4) := Vl_IDX.U
247          // vtype
248          csBundle(1).srcType(1) := SrcType.vp
249          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
250          csBundle(1).vecWen := false.B
251          csBundle(1).vlWen := true.B
252          csBundle(1).ldest := Vl_IDX.U
253        }.elsewhen(dest === 0.U) {
254          // write nothing, uop0 is a nop instruction
255          csBundle(0).rfWen := false.B
256          csBundle(0).fpWen := false.B
257          csBundle(0).vecWen := false.B
258          csBundle(0).vlWen := false.B
259        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) {
260          // because vsetvl may modified src2 when src2 == rd,
261          // we need to modify vd in second uop to avoid dependency
262          // uop0 set vl
263          csBundle(0).fuType := FuType.vsetiwf.U
264          csBundle(0).ldest := Vl_IDX.U
265          csBundle(0).rfWen := false.B
266          csBundle(0).vlWen := true.B
267          // uop1 set rd
268          csBundle(1).fuType := FuType.vsetiwi.U
269          csBundle(1).ldest := dest
270          csBundle(1).rfWen := true.B
271          csBundle(1).vlWen := false.B
272        }
273        // use bypass vtype from vtypeGen
274        csBundle(0).vpu.connectVType(io.vtypeBypass)
275        csBundle(1).vpu.connectVType(io.vtypeBypass)
276      }
277    }
278    is(UopSplitType.VEC_VVV) {
279      for (i <- 0 until MAX_VLMUL) {
280        csBundle(i).lsrc(0) := src1 + i.U
281        csBundle(i).lsrc(1) := src2 + i.U
282        csBundle(i).lsrc(2) := dest + i.U
283        csBundle(i).ldest := dest + i.U
284        csBundle(i).uopIdx := i.U
285      }
286    }
287    is(UopSplitType.VEC_VFV) {
288      /*
289      f to vector move
290       */
291      csBundle(0).srcType(0) := SrcType.fp
292      csBundle(0).srcType(1) := SrcType.imm
293      csBundle(0).srcType(2) := SrcType.imm
294      csBundle(0).lsrc(1) := 0.U
295      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
296      csBundle(0).fuType := FuType.f2v.U
297      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
298      csBundle(0).vecWen := true.B
299      csBundle(0).vpu.isReverse := false.B
300      /*
301      LMUL
302       */
303      for (i <- 0 until MAX_VLMUL) {
304        csBundle(i + 1).srcType(0) := SrcType.vp
305        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
306        csBundle(i + 1).lsrc(1) := src2 + i.U
307        csBundle(i + 1).lsrc(2) := dest + i.U
308        csBundle(i + 1).ldest := dest + i.U
309        csBundle(i + 1).uopIdx := i.U
310      }
311    }
312    is(UopSplitType.VEC_EXT2) {
313      for (i <- 0 until MAX_VLMUL / 2) {
314        csBundle(2 * i).lsrc(1) := src2 + i.U
315        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
316        csBundle(2 * i).ldest := dest + (2 * i).U
317        csBundle(2 * i).uopIdx := (2 * i).U
318        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
319        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
320        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
321        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
322      }
323    }
324    is(UopSplitType.VEC_EXT4) {
325      for (i <- 0 until MAX_VLMUL / 4) {
326        csBundle(4 * i).lsrc(1) := src2 + i.U
327        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
328        csBundle(4 * i).ldest := dest + (4 * i).U
329        csBundle(4 * i).uopIdx := (4 * i).U
330        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
331        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
332        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
333        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
334        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
335        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
336        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
337        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
338        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
339        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
340        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
341        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
342      }
343    }
344    is(UopSplitType.VEC_EXT8) {
345      for (i <- 0 until MAX_VLMUL) {
346        csBundle(i).lsrc(1) := src2
347        csBundle(i).lsrc(2) := dest + i.U
348        csBundle(i).ldest := dest + i.U
349        csBundle(i).uopIdx := i.U
350      }
351    }
352    is(UopSplitType.VEC_0XV) {
353      /*
354      i/f to vector move
355       */
356      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
357      csBundle(0).srcType(1) := SrcType.imm
358      csBundle(0).srcType(2) := SrcType.imm
359      csBundle(0).lsrc(1) := 0.U
360      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
361      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
362      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
363      csBundle(0).rfWen := false.B
364      csBundle(0).fpWen := false.B
365      csBundle(0).vecWen := true.B
366      /*
367      vmv.s.x
368       */
369      csBundle(1).srcType(0) := SrcType.vp
370      csBundle(1).srcType(1) := SrcType.imm
371      csBundle(1).srcType(2) := SrcType.vp
372      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
373      csBundle(1).lsrc(1) := 0.U
374      csBundle(1).lsrc(2) := dest
375      csBundle(1).ldest := dest
376      csBundle(1).rfWen := false.B
377      csBundle(1).fpWen := false.B
378      csBundle(1).vecWen := true.B
379      csBundle(1).uopIdx := 0.U
380    }
381    is(UopSplitType.VEC_VXV) {
382      /*
383      i to vector move
384       */
385      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
386      csBundle(0).srcType(1) := SrcType.imm
387      csBundle(0).srcType(2) := SrcType.imm
388      csBundle(0).lsrc(1) := 0.U
389      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
390      csBundle(0).fuType := FuType.i2v.U
391      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
392      csBundle(0).vecWen := true.B
393      csBundle(0).vpu.isReverse := false.B
394      /*
395      LMUL
396       */
397      for (i <- 0 until MAX_VLMUL) {
398        csBundle(i + 1).srcType(0) := SrcType.vp
399        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
400        csBundle(i + 1).lsrc(1) := src2 + i.U
401        csBundle(i + 1).lsrc(2) := dest + i.U
402        csBundle(i + 1).ldest := dest + i.U
403        csBundle(i + 1).uopIdx := i.U
404      }
405    }
406    is(UopSplitType.VEC_VVW) {
407      for (i <- 0 until MAX_VLMUL / 2) {
408        csBundle(2 * i).lsrc(0) := src1 + i.U
409        csBundle(2 * i).lsrc(1) := src2 + i.U
410        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
411        csBundle(2 * i).ldest := dest + (2 * i).U
412        csBundle(2 * i).uopIdx := (2 * i).U
413        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
414        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
415        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
416        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
417        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
418      }
419    }
420    is(UopSplitType.VEC_VFW) {
421      /*
422      f to vector move
423       */
424      csBundle(0).srcType(0) := SrcType.fp
425      csBundle(0).srcType(1) := SrcType.imm
426      csBundle(0).srcType(2) := SrcType.imm
427      csBundle(0).lsrc(1) := 0.U
428      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
429      csBundle(0).fuType := FuType.f2v.U
430      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
431      csBundle(0).rfWen := false.B
432      csBundle(0).fpWen := false.B
433      csBundle(0).vecWen := true.B
434
435      for (i <- 0 until MAX_VLMUL / 2) {
436        csBundle(2 * i + 1).srcType(0) := SrcType.vp
437        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
438        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
439        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
440        csBundle(2 * i + 1).ldest := dest + (2 * i).U
441        csBundle(2 * i + 1).uopIdx := (2 * i).U
442        csBundle(2 * i + 2).srcType(0) := SrcType.vp
443        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
444        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
445        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
446        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
447        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
448      }
449    }
450    is(UopSplitType.VEC_WVW) {
451      for (i <- 0 until MAX_VLMUL / 2) {
452        csBundle(2 * i).lsrc(0) := src1 + i.U
453        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
454        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
455        csBundle(2 * i).ldest := dest + (2 * i).U
456        csBundle(2 * i).uopIdx := (2 * i).U
457        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
458        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
459        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
460        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
461        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
462      }
463    }
464    is(UopSplitType.VEC_VXW) {
465      /*
466      i to vector move
467       */
468      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
469      csBundle(0).srcType(1) := SrcType.imm
470      csBundle(0).srcType(2) := SrcType.imm
471      csBundle(0).lsrc(1) := 0.U
472      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
473      csBundle(0).fuType := FuType.i2v.U
474      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
475      csBundle(0).vecWen := true.B
476
477      for (i <- 0 until MAX_VLMUL / 2) {
478        csBundle(2 * i + 1).srcType(0) := SrcType.vp
479        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
480        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
481        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
482        csBundle(2 * i + 1).ldest := dest + (2 * i).U
483        csBundle(2 * i + 1).uopIdx := (2 * i).U
484        csBundle(2 * i + 2).srcType(0) := SrcType.vp
485        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
486        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
487        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
488        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
489        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
490      }
491    }
492    is(UopSplitType.VEC_WXW) {
493      /*
494      i to vector move
495       */
496      csBundle(0).srcType(0) := SrcType.reg
497      csBundle(0).srcType(1) := SrcType.imm
498      csBundle(0).srcType(2) := SrcType.imm
499      csBundle(0).lsrc(1) := 0.U
500      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
501      csBundle(0).fuType := FuType.i2v.U
502      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
503      csBundle(0).vecWen := true.B
504
505      for (i <- 0 until MAX_VLMUL / 2) {
506        csBundle(2 * i + 1).srcType(0) := SrcType.vp
507        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
508        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
509        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
510        csBundle(2 * i + 1).ldest := dest + (2 * i).U
511        csBundle(2 * i + 1).uopIdx := (2 * i).U
512        csBundle(2 * i + 2).srcType(0) := SrcType.vp
513        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
514        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
515        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
516        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
517        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
518      }
519    }
520    is(UopSplitType.VEC_WVV) {
521      for (i <- 0 until MAX_VLMUL / 2) {
522
523        csBundle(2 * i).lsrc(0) := src1 + i.U
524        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
525        csBundle(2 * i).lsrc(2) := dest + i.U
526        csBundle(2 * i).ldest := dest + i.U
527        csBundle(2 * i).uopIdx := (2 * i).U
528        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
529        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
530        csBundle(2 * i + 1).lsrc(2) := dest + i.U
531        csBundle(2 * i + 1).ldest := dest + i.U
532        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
533      }
534    }
535    is(UopSplitType.VEC_WFW) {
536      /*
537      f to vector move
538       */
539      csBundle(0).srcType(0) := SrcType.fp
540      csBundle(0).srcType(1) := SrcType.imm
541      csBundle(0).srcType(2) := SrcType.imm
542      csBundle(0).lsrc(1) := 0.U
543      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
544      csBundle(0).fuType := FuType.f2v.U
545      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
546      csBundle(0).rfWen := false.B
547      csBundle(0).fpWen := false.B
548      csBundle(0).vecWen := true.B
549
550      for (i <- 0 until MAX_VLMUL / 2) {
551        csBundle(2 * i + 1).srcType(0) := SrcType.vp
552        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
553        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
554        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
555        csBundle(2 * i + 1).ldest := dest + (2 * i).U
556        csBundle(2 * i + 1).uopIdx := (2 * i).U
557        csBundle(2 * i + 2).srcType(0) := SrcType.vp
558        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
559        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
560        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
561        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
562        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
563      }
564    }
565    is(UopSplitType.VEC_WXV) {
566      /*
567      i to vector move
568       */
569      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
570      csBundle(0).srcType(1) := SrcType.imm
571      csBundle(0).srcType(2) := SrcType.imm
572      csBundle(0).lsrc(1) := 0.U
573      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
574      csBundle(0).fuType := FuType.i2v.U
575      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
576      csBundle(0).vecWen := true.B
577
578      for (i <- 0 until MAX_VLMUL / 2) {
579        csBundle(2 * i + 1).srcType(0) := SrcType.vp
580        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
581        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
582        csBundle(2 * i + 1).lsrc(2) := dest + i.U
583        csBundle(2 * i + 1).ldest := dest + i.U
584        csBundle(2 * i + 1).uopIdx := (2 * i).U
585        csBundle(2 * i + 2).srcType(0) := SrcType.vp
586        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
587        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
588        csBundle(2 * i + 2).lsrc(2) := dest + i.U
589        csBundle(2 * i + 2).ldest := dest + i.U
590        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
591      }
592    }
593    is(UopSplitType.VEC_VVM) {
594      csBundle(0).lsrc(2) := dest
595      csBundle(0).ldest := dest
596      csBundle(0).uopIdx := 0.U
597      for (i <- 1 until MAX_VLMUL) {
598        csBundle(i).lsrc(0) := src1 + i.U
599        csBundle(i).lsrc(1) := src2 + i.U
600        csBundle(i).lsrc(2) := dest
601        csBundle(i).ldest := dest
602        csBundle(i).uopIdx := i.U
603      }
604    }
605    is(UopSplitType.VEC_VFM) {
606      /*
607      f to vector move
608       */
609      csBundle(0).srcType(0) := SrcType.fp
610      csBundle(0).srcType(1) := SrcType.imm
611      csBundle(0).srcType(2) := SrcType.imm
612      csBundle(0).lsrc(1) := 0.U
613      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
614      csBundle(0).fuType := FuType.f2v.U
615      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
616      csBundle(0).rfWen := false.B
617      csBundle(0).fpWen := false.B
618      csBundle(0).vecWen := true.B
619      //LMUL
620      csBundle(1).srcType(0) := SrcType.vp
621      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
622      csBundle(1).lsrc(2) := dest
623      csBundle(1).ldest := dest
624      csBundle(1).uopIdx := 0.U
625      for (i <- 1 until MAX_VLMUL) {
626        csBundle(i + 1).srcType(0) := SrcType.vp
627        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
628        csBundle(i + 1).lsrc(1) := src2 + i.U
629        csBundle(i + 1).lsrc(2) := dest
630        csBundle(i + 1).ldest := dest
631        csBundle(i + 1).uopIdx := i.U
632      }
633      csBundle(numOfUop - 1.U).ldest := dest
634    }
635    is(UopSplitType.VEC_VXM) {
636      /*
637      i to vector move
638       */
639      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
640      csBundle(0).srcType(1) := SrcType.imm
641      csBundle(0).srcType(2) := SrcType.imm
642      csBundle(0).lsrc(1) := 0.U
643      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
644      csBundle(0).fuType := FuType.i2v.U
645      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
646      csBundle(0).vecWen := true.B
647      //LMUL
648      csBundle(1).srcType(0) := SrcType.vp
649      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
650      csBundle(1).lsrc(2) := dest
651      csBundle(1).ldest := dest
652      csBundle(1).uopIdx := 0.U
653      for (i <- 1 until MAX_VLMUL) {
654        csBundle(i + 1).srcType(0) := SrcType.vp
655        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
656        csBundle(i + 1).lsrc(1) := src2 + i.U
657        csBundle(i + 1).lsrc(2) := dest
658        csBundle(i + 1).ldest := dest
659        csBundle(i + 1).uopIdx := i.U
660      }
661      csBundle(numOfUop - 1.U).ldest := dest
662    }
663    is(UopSplitType.VEC_SLIDE1UP) {
664      /*
665      i to vector move
666       */
667      csBundle(0).srcType(0) := SrcType.reg
668      csBundle(0).srcType(1) := SrcType.imm
669      csBundle(0).srcType(2) := SrcType.imm
670      csBundle(0).lsrc(1) := 0.U
671      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
672      csBundle(0).fuType := FuType.i2v.U
673      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
674      csBundle(0).vecWen := true.B
675      //LMUL
676      csBundle(1).srcType(0) := SrcType.vp
677      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
678      csBundle(1).lsrc(2) := dest
679      csBundle(1).ldest := dest
680      csBundle(1).uopIdx := 0.U
681      for (i <- 1 until MAX_VLMUL) {
682        csBundle(i + 1).srcType(0) := SrcType.vp
683        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
684        csBundle(i + 1).lsrc(1) := src2 + i.U
685        csBundle(i + 1).lsrc(2) := dest + i.U
686        csBundle(i + 1).ldest := dest + i.U
687        csBundle(i + 1).uopIdx := i.U
688      }
689    }
690    is(UopSplitType.VEC_FSLIDE1UP) {
691      /*
692      f to vector move
693       */
694      csBundle(0).srcType(0) := SrcType.fp
695      csBundle(0).srcType(1) := SrcType.imm
696      csBundle(0).srcType(2) := SrcType.imm
697      csBundle(0).lsrc(1) := 0.U
698      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
699      csBundle(0).fuType := FuType.f2v.U
700      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
701      csBundle(0).rfWen := false.B
702      csBundle(0).fpWen := false.B
703      csBundle(0).vecWen := true.B
704      //LMUL
705      csBundle(1).srcType(0) := SrcType.vp
706      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
707      csBundle(1).lsrc(1) := src2
708      csBundle(1).lsrc(2) := dest
709      csBundle(1).ldest := dest
710      csBundle(1).uopIdx := 0.U
711      for (i <- 1 until MAX_VLMUL) {
712        csBundle(i + 1).srcType(0) := SrcType.vp
713        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
714        csBundle(i + 1).lsrc(1) := src2 + i.U
715        csBundle(i + 1).lsrc(2) := dest + i.U
716        csBundle(i + 1).ldest := dest + i.U
717        csBundle(i + 1).uopIdx := i.U
718      }
719    }
720    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
721      /*
722      i to vector move
723       */
724      csBundle(0).srcType(0) := SrcType.reg
725      csBundle(0).srcType(1) := SrcType.imm
726      csBundle(0).srcType(2) := SrcType.imm
727      csBundle(0).lsrc(1) := 0.U
728      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
729      csBundle(0).fuType := FuType.i2v.U
730      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
731      csBundle(0).vecWen := true.B
732      //LMUL
733      for (i <- 0 until MAX_VLMUL) {
734        csBundle(2 * i + 1).srcType(0) := SrcType.vp
735        csBundle(2 * i + 1).srcType(1) := SrcType.vp
736        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
737        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
738        csBundle(2 * i + 1).lsrc(2) := dest + i.U
739        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
740        csBundle(2 * i + 1).uopIdx := (2 * i).U
741        if (2 * i + 2 < MAX_VLMUL * 2) {
742          csBundle(2 * i + 2).srcType(0) := SrcType.vp
743          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
744          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
745          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
746          csBundle(2 * i + 2).ldest := dest + i.U
747          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
748        }
749      }
750      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
751      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
752      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
753    }
754    is(UopSplitType.VEC_FSLIDE1DOWN) {
755      /*
756      f to vector move
757       */
758      csBundle(0).srcType(0) := SrcType.fp
759      csBundle(0).srcType(1) := SrcType.imm
760      csBundle(0).srcType(2) := SrcType.imm
761      csBundle(0).lsrc(1) := 0.U
762      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
763      csBundle(0).fuType := FuType.f2v.U
764      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
765      csBundle(0).rfWen := false.B
766      csBundle(0).fpWen := false.B
767      csBundle(0).vecWen := true.B
768      //LMUL
769      for (i <- 0 until MAX_VLMUL) {
770        csBundle(2 * i + 1).srcType(0) := SrcType.vp
771        csBundle(2 * i + 1).srcType(1) := SrcType.vp
772        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
773        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
774        csBundle(2 * i + 1).lsrc(2) := dest + i.U
775        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
776        csBundle(2 * i + 1).uopIdx := (2 * i).U
777        if (2 * i + 2 < MAX_VLMUL * 2) {
778          csBundle(2 * i + 2).srcType(0) := SrcType.vp
779          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
780          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
781          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
782          csBundle(2 * i + 2).ldest := dest + i.U
783          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
784        }
785      }
786      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
787      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
788      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
789    }
790    is(UopSplitType.VEC_VRED) {
791      when(vlmulReg === "b001".U) {
792        csBundle(0).srcType(2) := SrcType.DC
793        csBundle(0).lsrc(0) := src2 + 1.U
794        csBundle(0).lsrc(1) := src2
795        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
796        csBundle(0).uopIdx := 0.U
797      }
798      when(vlmulReg === "b010".U) {
799        csBundle(0).srcType(2) := SrcType.DC
800        csBundle(0).lsrc(0) := src2 + 1.U
801        csBundle(0).lsrc(1) := src2
802        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
803        csBundle(0).uopIdx := 0.U
804
805        csBundle(1).srcType(2) := SrcType.DC
806        csBundle(1).lsrc(0) := src2 + 3.U
807        csBundle(1).lsrc(1) := src2 + 2.U
808        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
809        csBundle(1).uopIdx := 1.U
810
811        csBundle(2).srcType(2) := SrcType.DC
812        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
813        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
814        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
815        csBundle(2).uopIdx := 2.U
816      }
817      when(vlmulReg === "b011".U) {
818        for (i <- 0 until MAX_VLMUL) {
819          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
820            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
821            csBundle(i).lsrc(1) := src2 + (i * 2).U
822            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
823          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
824            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
825            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
826            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
827          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
828            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
829            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
830            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
831          }
832          csBundle(i).srcType(2) := SrcType.DC
833          csBundle(i).uopIdx := i.U
834        }
835      }
836      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
837        /*
838         * 2 <= vlmul <= 8
839         */
840        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
841        csBundle(numOfUop - 1.U).lsrc(0) := src1
842        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
843        csBundle(numOfUop - 1.U).lsrc(2) := dest
844        csBundle(numOfUop - 1.U).ldest := dest
845        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
846      }
847    }
848    is(UopSplitType.VEC_VFRED) {
849      val vlmul = vlmulReg
850      val vsew = vsewReg
851      when(vlmul === VLmul.m8){
852        for (i <- 0 until 4) {
853          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
854          csBundle(i).lsrc(1) := src2 + (i * 2).U
855          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
856          csBundle(i).uopIdx := i.U
857        }
858        for (i <- 4 until 6) {
859          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
860          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
861          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
862          csBundle(i).uopIdx := i.U
863        }
864        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
865        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
866        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
867        csBundle(6).uopIdx := 6.U
868        when(vsew === VSew.e64) {
869          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
870          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
871          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
872          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
873          csBundle(7).uopIdx := 7.U
874          csBundle(8).lsrc(0) := src1
875          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
876          csBundle(8).ldest := dest
877          csBundle(8).uopIdx := 8.U
878        }
879        when(vsew === VSew.e32) {
880          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
881          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
882          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
883          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
884          csBundle(7).uopIdx := 7.U
885          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
886          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
887          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
888          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
889          csBundle(8).uopIdx := 8.U
890          csBundle(9).lsrc(0) := src1
891          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
892          csBundle(9).ldest := dest
893          csBundle(9).uopIdx := 9.U
894        }
895        when(vsew === VSew.e16) {
896          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
897          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
898          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
899          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
900          csBundle(7).uopIdx := 7.U
901          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
902          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
903          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
904          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
905          csBundle(8).uopIdx := 8.U
906          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
907          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
908          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
909          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
910          csBundle(9).uopIdx := 9.U
911          csBundle(10).lsrc(0) := src1
912          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
913          csBundle(10).ldest := dest
914          csBundle(10).uopIdx := 10.U
915        }
916      }
917      when(vlmul === VLmul.m4) {
918        for (i <- 0 until 2) {
919          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
920          csBundle(i).lsrc(1) := src2 + (i * 2).U
921          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
922          csBundle(i).uopIdx := i.U
923        }
924        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
925        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
926        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
927        csBundle(2).uopIdx := 2.U
928        when(vsew === VSew.e64) {
929          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
930          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
931          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
932          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
933          csBundle(3).uopIdx := 3.U
934          csBundle(4).lsrc(0) := src1
935          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
936          csBundle(4).ldest := dest
937          csBundle(4).uopIdx := 4.U
938        }
939        when(vsew === VSew.e32) {
940          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
941          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
942          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
943          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
944          csBundle(3).uopIdx := 3.U
945          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
946          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
947          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
948          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
949          csBundle(4).uopIdx := 4.U
950          csBundle(5).lsrc(0) := src1
951          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
952          csBundle(5).ldest := dest
953          csBundle(5).uopIdx := 5.U
954        }
955        when(vsew === VSew.e16) {
956          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
957          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
958          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
959          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
960          csBundle(3).uopIdx := 3.U
961          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
962          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
963          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
964          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
965          csBundle(4).uopIdx := 4.U
966          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
967          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
968          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
969          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
970          csBundle(5).uopIdx := 5.U
971          csBundle(6).lsrc(0) := src1
972          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
973          csBundle(6).ldest := dest
974          csBundle(6).uopIdx := 6.U
975        }
976      }
977      when(vlmul === VLmul.m2) {
978        csBundle(0).lsrc(0) := src2 + 1.U
979        csBundle(0).lsrc(1) := src2 + 0.U
980        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
981        csBundle(0).uopIdx := 0.U
982        when(vsew === VSew.e64) {
983          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
984          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
985          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
986          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
987          csBundle(1).uopIdx := 1.U
988          csBundle(2).lsrc(0) := src1
989          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
990          csBundle(2).ldest := dest
991          csBundle(2).uopIdx := 2.U
992        }
993        when(vsew === VSew.e32) {
994          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
995          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
996          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
997          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
998          csBundle(1).uopIdx := 1.U
999          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1000          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1001          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1002          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1003          csBundle(2).uopIdx := 2.U
1004          csBundle(3).lsrc(0) := src1
1005          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1006          csBundle(3).ldest := dest
1007          csBundle(3).uopIdx := 3.U
1008        }
1009        when(vsew === VSew.e16) {
1010          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1011          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1012          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1013          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1014          csBundle(1).uopIdx := 1.U
1015          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1016          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1017          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1018          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1019          csBundle(2).uopIdx := 2.U
1020          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1021          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1022          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1023          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
1024          csBundle(3).uopIdx := 3.U
1025          csBundle(4).lsrc(0) := src1
1026          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1027          csBundle(4).ldest := dest
1028          csBundle(4).uopIdx := 4.U
1029        }
1030      }
1031      when(vlmul === VLmul.m1) {
1032        when(vsew === VSew.e64) {
1033          csBundle(0).lsrc(0) := src2
1034          csBundle(0).lsrc(1) := src2
1035          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1036          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1037          csBundle(0).uopIdx := 0.U
1038          csBundle(1).lsrc(0) := src1
1039          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1040          csBundle(1).ldest := dest
1041          csBundle(1).uopIdx := 1.U
1042        }
1043        when(vsew === VSew.e32) {
1044          csBundle(0).lsrc(0) := src2
1045          csBundle(0).lsrc(1) := src2
1046          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1047          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1048          csBundle(0).uopIdx := 0.U
1049          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1050          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1051          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1052          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1053          csBundle(1).uopIdx := 1.U
1054          csBundle(2).lsrc(0) := src1
1055          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1056          csBundle(2).ldest := dest
1057          csBundle(2).uopIdx := 2.U
1058        }
1059        when(vsew === VSew.e16) {
1060          csBundle(0).lsrc(0) := src2
1061          csBundle(0).lsrc(1) := src2
1062          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1063          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1064          csBundle(0).uopIdx := 0.U
1065          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1066          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1067          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1068          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1069          csBundle(1).uopIdx := 1.U
1070          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1071          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1072          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1073          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1074          csBundle(2).uopIdx := 2.U
1075          csBundle(3).lsrc(0) := src1
1076          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1077          csBundle(3).ldest := dest
1078          csBundle(3).uopIdx := 3.U
1079        }
1080      }
1081      when(vlmul === VLmul.mf2) {
1082        when(vsew === VSew.e32) {
1083          csBundle(0).lsrc(0) := src2
1084          csBundle(0).lsrc(1) := src2
1085          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1086          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1087          csBundle(0).uopIdx := 0.U
1088          csBundle(1).lsrc(0) := src1
1089          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1090          csBundle(1).ldest := dest
1091          csBundle(1).uopIdx := 1.U
1092        }
1093        when(vsew === VSew.e16) {
1094          csBundle(0).lsrc(0) := src2
1095          csBundle(0).lsrc(1) := src2
1096          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1097          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1098          csBundle(0).uopIdx := 0.U
1099          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1100          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1101          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1102          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1103          csBundle(1).uopIdx := 1.U
1104          csBundle(2).lsrc(0) := src1
1105          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1106          csBundle(2).ldest := dest
1107          csBundle(2).uopIdx := 2.U
1108        }
1109      }
1110      when(vlmul === VLmul.mf4) {
1111        when(vsew === VSew.e16) {
1112          csBundle(0).lsrc(0) := src2
1113          csBundle(0).lsrc(1) := src2
1114          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1115          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1116          csBundle(0).uopIdx := 0.U
1117          csBundle(1).lsrc(0) := src1
1118          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1119          csBundle(1).ldest := dest
1120          csBundle(1).uopIdx := 1.U
1121        }
1122      }
1123    }
1124
1125    is(UopSplitType.VEC_VFREDOSUM) {
1126      import yunsuan.VfaluType
1127      val vlmul = vlmulReg
1128      val vsew = vsewReg
1129      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1130      when(vlmul === VLmul.m8) {
1131        when(vsew === VSew.e64) {
1132          val vlmax = 16
1133          for (i <- 0 until vlmax) {
1134            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1135            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1136            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1137            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1138            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1139            csBundle(i).uopIdx := i.U
1140          }
1141        }
1142        when(vsew === VSew.e32) {
1143          val vlmax = 32
1144          for (i <- 0 until vlmax) {
1145            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1146            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1149            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1150            csBundle(i).uopIdx := i.U
1151          }
1152        }
1153        when(vsew === VSew.e16) {
1154          val vlmax = 64
1155          for (i <- 0 until vlmax) {
1156            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1157            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1158            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1161            csBundle(i).uopIdx := i.U
1162          }
1163        }
1164      }
1165      when(vlmul === VLmul.m4) {
1166        when(vsew === VSew.e64) {
1167          val vlmax = 8
1168          for (i <- 0 until vlmax) {
1169            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1170            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1172            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1174            csBundle(i).uopIdx := i.U
1175          }
1176        }
1177        when(vsew === VSew.e32) {
1178          val vlmax = 16
1179          for (i <- 0 until vlmax) {
1180            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1181            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1182            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1183            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1184            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1185            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1186            csBundle(i).uopIdx := i.U
1187          }
1188        }
1189        when(vsew === VSew.e16) {
1190          val vlmax = 32
1191          for (i <- 0 until vlmax) {
1192            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1193            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1194            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1195            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1196            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1197            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1198            csBundle(i).uopIdx := i.U
1199          }
1200        }
1201      }
1202      when(vlmul === VLmul.m2) {
1203        when(vsew === VSew.e64) {
1204          val vlmax = 4
1205          for (i <- 0 until vlmax) {
1206            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1207            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1208            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1209            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1210            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1211            csBundle(i).uopIdx := i.U
1212          }
1213        }
1214        when(vsew === VSew.e32) {
1215          val vlmax = 8
1216          for (i <- 0 until vlmax) {
1217            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1218            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1219            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1220            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1221            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1222            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1223            csBundle(i).uopIdx := i.U
1224          }
1225        }
1226        when(vsew === VSew.e16) {
1227          val vlmax = 16
1228          for (i <- 0 until vlmax) {
1229            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1230            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1231            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1232            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1233            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1234            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1235            csBundle(i).uopIdx := i.U
1236          }
1237        }
1238      }
1239      when(vlmul === VLmul.m1) {
1240        when(vsew === VSew.e64) {
1241          val vlmax = 2
1242          for (i <- 0 until vlmax) {
1243            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1244            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1245            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1246            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1247            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1248            csBundle(i).uopIdx := i.U
1249          }
1250        }
1251        when(vsew === VSew.e32) {
1252          val vlmax = 4
1253          for (i <- 0 until vlmax) {
1254            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1255            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1256            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1257            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1258            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1259            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1260            csBundle(i).uopIdx := i.U
1261          }
1262        }
1263        when(vsew === VSew.e16) {
1264          val vlmax = 8
1265          for (i <- 0 until vlmax) {
1266            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1267            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1268            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1269            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1270            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1271            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1272            csBundle(i).uopIdx := i.U
1273          }
1274        }
1275      }
1276      when(vlmul === VLmul.mf2) {
1277        when(vsew === VSew.e32) {
1278          val vlmax = 2
1279          for (i <- 0 until vlmax) {
1280            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1281            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1282            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1283            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1284            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1285            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1286            csBundle(i).uopIdx := i.U
1287          }
1288        }
1289        when(vsew === VSew.e16) {
1290          val vlmax = 4
1291          for (i <- 0 until vlmax) {
1292            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1293            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1294            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1295            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1296            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1297            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1298            csBundle(i).uopIdx := i.U
1299          }
1300        }
1301      }
1302      when(vlmul === VLmul.mf4) {
1303        when(vsew === VSew.e16) {
1304          val vlmax = 2
1305          for (i <- 0 until vlmax) {
1306            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1307            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1308            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1309            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1310            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1311            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1312            csBundle(i).uopIdx := i.U
1313          }
1314        }
1315      }
1316    }
1317
1318    is(UopSplitType.VEC_SLIDEUP) {
1319      // i to vector move
1320      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1321      csBundle(0).srcType(1) := SrcType.imm
1322      csBundle(0).srcType(2) := SrcType.imm
1323      csBundle(0).lsrc(1) := 0.U
1324      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1325      csBundle(0).fuType := FuType.i2v.U
1326      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1327      csBundle(0).vecWen := true.B
1328      // LMUL
1329      for (i <- 0 until MAX_VLMUL)
1330        for (j <- 0 to i) {
1331          val old_vd = if (j == 0) {
1332            dest + i.U
1333          } else (VECTOR_TMP_REG_LMUL + j).U
1334          val vd = if (j == i) {
1335            dest + i.U
1336          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1337          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1338          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1339          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1340          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1341          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1342          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1343        }
1344    }
1345
1346    is(UopSplitType.VEC_SLIDEDOWN) {
1347      // i to vector move
1348      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1349      csBundle(0).srcType(1) := SrcType.imm
1350      csBundle(0).srcType(2) := SrcType.imm
1351      csBundle(0).lsrc(1) := 0.U
1352      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1353      csBundle(0).fuType := FuType.i2v.U
1354      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1355      csBundle(0).vecWen := true.B
1356      // LMUL
1357      for (i <- 0 until MAX_VLMUL)
1358        for (j <- (0 to i).reverse) {
1359          when(i.U < lmul) {
1360            val old_vd = if (j == 0) {
1361              dest + lmul - 1.U - i.U
1362            } else (VECTOR_TMP_REG_LMUL + j).U
1363            val vd = if (j == i) {
1364              dest + lmul - 1.U - i.U
1365            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1366            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1367            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1368            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1369            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1370            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1371            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1372          }
1373        }
1374    }
1375
1376    is(UopSplitType.VEC_M0X) {
1377      // LMUL
1378      for (i <- 0 until MAX_VLMUL) {
1379        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1380        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1381        csBundle(i).srcType(0) := srcType0
1382        csBundle(i).srcType(1) := SrcType.vp
1383        csBundle(i).rfWen := false.B
1384        csBundle(i).fpWen := false.B
1385        csBundle(i).vecWen := true.B
1386        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1387        csBundle(i).lsrc(1) := src2
1388        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1389        csBundle(i).ldest := ldest
1390        csBundle(i).uopIdx := i.U
1391      }
1392      csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B)
1393      csBundle(numOfUop - 1.U).fpWen := false.B
1394      csBundle(numOfUop - 1.U).vecWen := false.B
1395      csBundle(numOfUop - 1.U).ldest := dest
1396    }
1397
1398    is(UopSplitType.VEC_MVV) {
1399      // LMUL
1400      for (i <- 0 until MAX_VLMUL) {
1401        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1402        csBundle(i * 2 + 0).srcType(0) := srcType0
1403        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1404        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1405        csBundle(i * 2 + 0).lsrc(1) := src2
1406        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1407        csBundle(i * 2 + 0).ldest := dest + i.U
1408        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1409
1410        csBundle(i * 2 + 1).srcType(0) := srcType0
1411        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1412        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1413        csBundle(i * 2 + 1).lsrc(1) := src2
1414        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1415        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1416        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1417      }
1418    }
1419    is(UopSplitType.VEC_VWW) {
1420      for (i <- 0 until MAX_VLMUL*2) {
1421        when(i.U < lmul){
1422          csBundle(i).srcType(2) := SrcType.DC
1423          csBundle(i).lsrc(0) := src2 + i.U
1424          csBundle(i).lsrc(1) := src2 + i.U
1425          // csBundle(i).lsrc(2) := dest + (2 * i).U
1426          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1427          csBundle(i).uopIdx :=  i.U
1428        } otherwise {
1429          csBundle(i).srcType(2) := SrcType.DC
1430          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1431          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1432          // csBundle(i).lsrc(2) := dest + (2 * i).U
1433          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1434          csBundle(i).uopIdx := i.U
1435        }
1436        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1437        csBundle(numOfUop-1.U).lsrc(0) := src1
1438        csBundle(numOfUop-1.U).lsrc(2) := dest
1439        csBundle(numOfUop-1.U).ldest := dest
1440      }
1441    }
1442    is(UopSplitType.VEC_RGATHER) {
1443      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1444        for (i <- 0 until len)
1445          for (j <- 0 until len) {
1446            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1447            // csBundle(i * len + j).srcType(1) := SrcType.vp
1448            // csBundle(i * len + j).srcType(2) := SrcType.vp
1449            csBundle(i * len + j).lsrc(0) := src1 + i.U
1450            csBundle(i * len + j).lsrc(1) := src2 + j.U
1451            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1452            csBundle(i * len + j).lsrc(2) := vd_old
1453            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1454            csBundle(i * len + j).ldest := vd
1455            csBundle(i * len + j).uopIdx := (i * len + j).U
1456          }
1457      }
1458      switch(vlmulReg) {
1459        is("b001".U ){
1460          genCsBundle_VEC_RGATHER(2)
1461        }
1462        is("b010".U ){
1463          genCsBundle_VEC_RGATHER(4)
1464        }
1465        is("b011".U ){
1466          genCsBundle_VEC_RGATHER(8)
1467        }
1468      }
1469    }
1470    is(UopSplitType.VEC_RGATHER_VX) {
1471      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1472        for (i <- 0 until len)
1473          for (j <- 0 until len) {
1474            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1475            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1476            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1477            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1478            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1479            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1480            csBundle(i * len + j + 1).lsrc(2) := vd_old
1481            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1482            csBundle(i * len + j + 1).ldest := vd
1483            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1484          }
1485      }
1486      // i to vector move
1487      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1488      csBundle(0).srcType(1) := SrcType.imm
1489      csBundle(0).srcType(2) := SrcType.imm
1490      csBundle(0).lsrc(1) := 0.U
1491      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1492      csBundle(0).fuType := FuType.i2v.U
1493      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1494      csBundle(0).rfWen := false.B
1495      csBundle(0).fpWen := false.B
1496      csBundle(0).vecWen := true.B
1497      genCsBundle_RGATHER_VX(1)
1498      switch(vlmulReg) {
1499        is("b001".U ){
1500          genCsBundle_RGATHER_VX(2)
1501        }
1502        is("b010".U ){
1503          genCsBundle_RGATHER_VX(4)
1504        }
1505        is("b011".U ){
1506          genCsBundle_RGATHER_VX(8)
1507        }
1508      }
1509    }
1510    is(UopSplitType.VEC_RGATHEREI16) {
1511      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1512        for (i <- 0 until len)
1513          for (j <- 0 until len) {
1514            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1515            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1516            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1517            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1518            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1519            csBundle((i * len + j)*2+0).ldest := vd0
1520            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1521            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1522            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1523            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1524            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1525            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1526            csBundle((i * len + j)*2+1).ldest := vd1
1527            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1528          }
1529      }
1530      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1531        for (i <- 0 until len)
1532          for (j <- 0 until len) {
1533            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1534            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1535            csBundle(i * len + j).lsrc(0) := src1 + i.U
1536            csBundle(i * len + j).lsrc(1) := src2 + j.U
1537            csBundle(i * len + j).lsrc(2) := vd_old
1538            csBundle(i * len + j).ldest := vd
1539            csBundle(i * len + j).uopIdx := (i * len + j).U
1540          }
1541      }
1542      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1543        for (i <- 0 until len)
1544          for (j <- 0 until len) {
1545            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1546            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1547            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1548            csBundle(i * len + j).lsrc(1) := src2 + j.U
1549            csBundle(i * len + j).lsrc(2) := vd_old
1550            csBundle(i * len + j).ldest := vd
1551            csBundle(i * len + j).uopIdx := (i * len + j).U
1552          }
1553      }
1554      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1555        for (i <- 0 until len)
1556          for (j <- 0 until len) {
1557            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1558            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1559            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1560            csBundle(i * len + j).lsrc(1) := src2 + j.U
1561            csBundle(i * len + j).lsrc(2) := vd_old
1562            csBundle(i * len + j).ldest := vd
1563            csBundle(i * len + j).uopIdx := (i * len + j).U
1564          }
1565      }
1566      when(!vsewReg.orR){
1567        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1568      }.elsewhen(vsewReg === VSew.e32){
1569        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1570      }.elsewhen(vsewReg === VSew.e64){
1571        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1572      }.otherwise{
1573        genCsBundle_VEC_RGATHEREI16(1)
1574      }
1575      switch(vlmulReg) {
1576        is("b001".U) {
1577          when(!vsewReg.orR) {
1578            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1579          }.elsewhen(vsewReg === VSew.e32){
1580            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1581          }.elsewhen(vsewReg === VSew.e64){
1582            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1583          }.otherwise{
1584            genCsBundle_VEC_RGATHEREI16(2)
1585          }
1586        }
1587        is("b010".U) {
1588          when(!vsewReg.orR) {
1589            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1590          }.elsewhen(vsewReg === VSew.e32){
1591            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1592          }.elsewhen(vsewReg === VSew.e64){
1593            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1594          }.otherwise{
1595            genCsBundle_VEC_RGATHEREI16(4)
1596          }
1597        }
1598        is("b011".U) {
1599          when(vsewReg === VSew.e32){
1600            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1601          }.elsewhen(vsewReg === VSew.e64){
1602            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1603          }.otherwise{
1604            genCsBundle_VEC_RGATHEREI16(8)
1605          }
1606        }
1607      }
1608    }
1609    is(UopSplitType.VEC_COMPRESS) {
1610      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1611        for (i <- 0 until len) {
1612          val jlen = if (i == len-1) i+1 else i+2
1613          for (j <- 0 until jlen) {
1614            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1615            val vd = if(i==len-1) (dest + j.U) else {
1616              if (j == i+1) VECTOR_TMP_REG_LMUL.U  else (VECTOR_TMP_REG_LMUL + j + 1).U
1617            }
1618            csBundle(i*(i+3)/2 + j).vecWen := true.B
1619            csBundle(i*(i+3)/2 + j).v0Wen := false.B
1620            val src13Type = if (j == i+1) DontCare else SrcType.vp
1621            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1622            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1623            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1624            if (i == 0) {
1625              csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1626            } else {
1627              csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1628            }
1629            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1630            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1631            csBundle(i*(i+3)/2 + j).ldest := vd
1632            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1633          }
1634        }
1635      }
1636      switch(vlmulReg) {
1637        is("b001".U ){
1638          genCsBundle_VEC_COMPRESS(2)
1639        }
1640        is("b010".U ){
1641          genCsBundle_VEC_COMPRESS(4)
1642        }
1643        is("b011".U ){
1644          genCsBundle_VEC_COMPRESS(8)
1645        }
1646      }
1647    }
1648    is(UopSplitType.VEC_MVNR) {
1649      for (i <- 0 until MAX_VLMUL) {
1650        csBundle(i).lsrc(0) := src1 + i.U
1651        csBundle(i).lsrc(1) := src2 + i.U
1652        csBundle(i).lsrc(2) := dest + i.U
1653        csBundle(i).ldest := dest + i.U
1654        csBundle(i).uopIdx := i.U
1655      }
1656    }
1657    is(UopSplitType.VEC_US_LDST) {
1658      /*
1659      FMV.D.X
1660       */
1661      csBundle(0).srcType(0) := SrcType.reg
1662      csBundle(0).srcType(1) := SrcType.imm
1663      csBundle(0).lsrc(1) := 0.U
1664      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1665      csBundle(0).fuType := FuType.i2v.U
1666      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1667      csBundle(0).rfWen := false.B
1668      csBundle(0).fpWen := false.B
1669      csBundle(0).vecWen := true.B
1670      csBundle(0).vlsInstr := true.B
1671      //LMUL
1672      for (i <- 0 until MAX_VLMUL) {
1673        csBundle(i + 1).srcType(0) := SrcType.vp
1674        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1675        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1676        csBundle(i + 1).ldest := dest + i.U
1677        csBundle(i + 1).uopIdx := i.U
1678        csBundle(i + 1).vlsInstr := true.B
1679      }
1680      csBundle.head.waitForward := isUsSegment
1681      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1682    }
1683    is(UopSplitType.VEC_S_LDST) {
1684      /*
1685      FMV.D.X
1686       */
1687      csBundle(0).srcType(0) := SrcType.reg
1688      csBundle(0).srcType(1) := SrcType.imm
1689      csBundle(0).lsrc(1) := 0.U
1690      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1691      csBundle(0).fuType := FuType.i2v.U
1692      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1693      csBundle(0).rfWen := false.B
1694      csBundle(0).fpWen := false.B
1695      csBundle(0).vecWen := true.B
1696      csBundle(0).vlsInstr := true.B
1697
1698      csBundle(1).srcType(0) := SrcType.reg
1699      csBundle(1).srcType(1) := SrcType.imm
1700      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1701      csBundle(1).lsrc(1) := 0.U
1702      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1703      csBundle(1).fuType := FuType.i2v.U
1704      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1705      csBundle(1).rfWen := false.B
1706      csBundle(1).fpWen := false.B
1707      csBundle(1).vecWen := true.B
1708      csBundle(1).vlsInstr := true.B
1709
1710      //LMUL
1711      for (i <- 0 until MAX_VLMUL) {
1712        csBundle(i + 2).srcType(0) := SrcType.vp
1713        csBundle(i + 2).srcType(1) := SrcType.vp
1714        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1715        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1716        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1717        csBundle(i + 2).ldest := dest + i.U
1718        csBundle(i + 2).uopIdx := i.U
1719        csBundle(i + 2).vlsInstr := true.B
1720      }
1721      csBundle.head.waitForward := isSdSegment
1722      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1723    }
1724    is(UopSplitType.VEC_I_LDST) {
1725      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1726        for (i <- 0 until MAX_VLMUL) {
1727          val vecWen = if (i < lmul * nf) true.B else false.B
1728          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1729          csBundle(i + 1).srcType(0) := SrcType.vp
1730          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1731          csBundle(i + 1).srcType(1) := SrcType.no
1732          csBundle(i + 1).lsrc(1) := src2 + i.U
1733          csBundle(i + 1).srcType(2) := src2Type
1734          csBundle(i + 1).lsrc(2) := dest + i.U
1735          csBundle(i + 1).ldest := dest + i.U
1736          csBundle(i + 1).rfWen := false.B
1737          csBundle(i + 1).fpWen := false.B
1738          csBundle(i + 1).vecWen := vecWen
1739          csBundle(i + 1).uopIdx := i.U
1740          csBundle(i + 1).vlsInstr := true.B
1741        }
1742      }
1743      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1744        for (i <- 0 until MAX_VLMUL) {
1745          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1746          csBundle(i + 1).srcType(1) := src1Type
1747          csBundle(i + 1).lsrc(1) := src2 + i.U
1748        }
1749      }
1750
1751      val vlmul = vlmulReg
1752      val vsew = Cat(0.U(1.W), vsewReg)
1753      val veew = Cat(0.U(1.W), width)
1754      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1755      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1756        "b001".U -> 1.U,
1757        "b010".U -> 2.U,
1758        "b011".U -> 3.U
1759      ))
1760      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1761        "b001".U -> 1.U,
1762        "b010".U -> 2.U,
1763        "b011".U -> 3.U
1764      ))
1765      csBundle(0).srcType(0) := SrcType.reg
1766      csBundle(0).srcType(1) := SrcType.imm
1767      csBundle(0).lsrc(1) := 0.U
1768      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1769      csBundle(0).fuType := FuType.i2v.U
1770      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1771      csBundle(0).rfWen := false.B
1772      csBundle(0).fpWen := false.B
1773      csBundle(0).vecWen := true.B
1774      csBundle(0).vlsInstr := true.B
1775
1776      //LMUL
1777      when(nf === 0.U) {
1778        for (i <- 0 until MAX_VLMUL) {
1779          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1780          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1781          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1782          csBundle(i + 1).srcType(0) := SrcType.vp
1783          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1784          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1785          csBundle(i + 1).srcType(2) := SrcType.vp
1786          // lsrc2 is old vd
1787          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1788          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1789          csBundle(i + 1).uopIdx := i.U
1790          csBundle(i + 1).vlsInstr := true.B
1791        }
1792      }.otherwise{
1793        // nf > 1, is segment indexed load/store
1794        // gen src0, vd
1795        switch(simple_lmul) {
1796          is(0.U) {
1797            switch(nf) {
1798              is(1.U) {
1799                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1800              }
1801              is(2.U) {
1802                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1803              }
1804              is(3.U) {
1805                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1806              }
1807              is(4.U) {
1808                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1809              }
1810              is(5.U) {
1811                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1812              }
1813              is(6.U) {
1814                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1815              }
1816              is(7.U) {
1817                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1818              }
1819            }
1820          }
1821          is(1.U) {
1822            switch(nf) {
1823              is(1.U) {
1824                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1825              }
1826              is(2.U) {
1827                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1828              }
1829              is(3.U) {
1830                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1831              }
1832            }
1833          }
1834          is(2.U) {
1835            switch(nf) {
1836              is(1.U) {
1837                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1838              }
1839            }
1840          }
1841        }
1842
1843        // gen src1
1844        switch(simple_emul) {
1845          is(0.U) {
1846            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1847          }
1848          is(1.U) {
1849            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1850          }
1851          is(2.U) {
1852            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1853          }
1854          is(3.U) {
1855            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1856          }
1857        }
1858
1859        // when is vstore instructions, not set vecwen
1860        when(isVstore) {
1861          for (i <- 0 until MAX_VLMUL) {
1862            csBundle(i + 1).vecWen := false.B
1863          }
1864        }
1865      }
1866      csBundle.head.waitForward := isIxSegment
1867      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1868    }
1869  }
1870
1871  //readyFromRename Counter
1872  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1873
1874  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1875  val thisAllOut = uopRes <= readyCounter
1876
1877  switch(state) {
1878    is(s_idle) {
1879      when (inValid) {
1880        stateNext := s_active
1881        uopResNext := inUopInfo.numOfUop
1882      }
1883    }
1884    is(s_active) {
1885      when (thisAllOut) {
1886        when (inValid) {
1887          stateNext := s_active
1888          uopResNext := inUopInfo.numOfUop
1889        }.otherwise {
1890          stateNext := s_idle
1891          uopResNext := 0.U
1892        }
1893      }.otherwise {
1894        stateNext := s_active
1895        uopResNext := uopRes - readyCounter
1896      }
1897    }
1898  }
1899
1900  state := Mux(io.redirect, s_idle, stateNext)
1901  uopRes := Mux(io.redirect, 0.U, uopResNext)
1902
1903  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1904
1905  for(i <- 0 until RenameWidth) {
1906    outValids(i) := complexNum > i.U
1907    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1908  }
1909
1910  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1911  inReady := state === s_idle || state === s_active && thisAllOut
1912
1913//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1914//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1915//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1916//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1917//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1918//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1919//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1920//
1921//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1922//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1923//    0.U)
1924//  validToRename.zipWithIndex.foreach{
1925//    case(dst, i) =>
1926//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1927//      dst := MuxCase(false.B, Seq(
1928//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1929//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1930//      ).toSeq)
1931//  }
1932//
1933//  readyToIBuf.zipWithIndex.foreach {
1934//    case (dst, i) =>
1935//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1936//      dst := MuxCase(true.B, Seq(
1937//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1938//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1939//      ).toSeq)
1940//  }
1941//
1942//  io.deq.decodedInsts := decodedInsts
1943//  io.deq.complexNum := complexNum
1944//  io.deq.validToRename := validToRename
1945//  io.deq.readyToIBuf := readyToIBuf
1946}
1947