xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 3bec463ea7e6217896b6ae73531cd2ed8e8b8d92)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
82  val VECTOR_COMPRESS = 1 // in v0 regfile
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153
154  //Type of uop Div
155  val typeOfSplit = latchedInst.uopSplitType
156  val src1Type = latchedInst.srcType(0)
157  val src1IsImm = src1Type === SrcType.imm
158  val src1IsFp = src1Type === SrcType.fp
159
160  val isVstore = FuType.isVStore(latchedInst.fuType)
161
162  numOfUop := latchedUopInfo.numOfUop
163  numOfWB := latchedUopInfo.numOfWB
164
165  //uops dispatch
166  val s_idle :: s_active :: Nil = Enum(2)
167  val state = RegInit(s_idle)
168  val stateNext = WireDefault(state)
169  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
170  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
171  val uopResNext = WireInit(uopRes)
172  val e64 = 3.U(2.W)
173  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
174  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
175  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
176
177  //uop div up to maxUopSize
178  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
179  csBundle.foreach { case dst =>
180    dst := latchedInst
181    dst.numUops := latchedUopInfo.numOfUop
182    dst.numWB := latchedUopInfo.numOfWB
183    dst.firstUop := false.B
184    dst.lastUop := false.B
185    dst.vlsInstr := false.B
186  }
187
188  csBundle(0).firstUop := true.B
189  csBundle(numOfUop - 1.U).lastUop := true.B
190
191  switch(typeOfSplit) {
192    is(UopSplitType.VSET) {
193      // In simple decoder, rfWen and vecWen are not set
194      when(isVsetSimple) {
195        // Default
196        // uop0 set rd, never flushPipe
197        csBundle(0).fuType := FuType.vsetiwi.U
198        csBundle(0).flushPipe := false.B
199        csBundle(0).rfWen := true.B
200        // uop1 set vl, vsetvl will flushPipe
201        csBundle(1).ldest := Vl_IDX.U
202        csBundle(1).vecWen := false.B
203        csBundle(1).vlWen := true.B
204        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
205          // write nothing, uop0 is a nop instruction
206          csBundle(0).rfWen := false.B
207          csBundle(0).fpWen := false.B
208          csBundle(0).vecWen := false.B
209          csBundle(0).vlWen := false.B
210          csBundle(1).fuType := FuType.vsetfwf.U
211          csBundle(1).srcType(0) := SrcType.vp
212          csBundle(1).lsrc(0) := Vl_IDX.U
213        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
214          // uop0: mv vtype gpr to vector region
215          csBundle(0).srcType(0) := SrcType.xp
216          csBundle(0).srcType(1) := SrcType.no
217          csBundle(0).lsrc(0) := src2
218          csBundle(0).lsrc(1) := 0.U
219          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
220          csBundle(0).fuType := FuType.i2v.U
221          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
222          csBundle(0).rfWen := false.B
223          csBundle(0).fpWen := false.B
224          csBundle(0).vecWen := true.B
225          csBundle(0).vlWen := false.B
226          csBundle(0).flushPipe := false.B
227          // uop1: uvsetvcfg_vv
228          csBundle(1).fuType := FuType.vsetfwf.U
229          // vl
230          csBundle(1).srcType(0) := SrcType.vp
231          csBundle(1).lsrc(0) := Vl_IDX.U
232          // vtype
233          csBundle(1).srcType(1) := SrcType.vp
234          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
235          csBundle(1).vecWen := false.B
236          csBundle(1).vlWen := true.B
237          csBundle(1).ldest := Vl_IDX.U
238        }.elsewhen(dest === 0.U) {
239          // write nothing, uop0 is a nop instruction
240          csBundle(0).rfWen := false.B
241          csBundle(0).fpWen := false.B
242          csBundle(0).vecWen := false.B
243          csBundle(0).vlWen := false.B
244        }
245        // use bypass vtype from vtypeGen
246        csBundle(0).vpu.connectVType(io.vtypeBypass)
247        csBundle(1).vpu.connectVType(io.vtypeBypass)
248      }
249    }
250    is(UopSplitType.VEC_VVV) {
251      for (i <- 0 until MAX_VLMUL) {
252        csBundle(i).lsrc(0) := src1 + i.U
253        csBundle(i).lsrc(1) := src2 + i.U
254        csBundle(i).lsrc(2) := dest + i.U
255        csBundle(i).ldest := dest + i.U
256        csBundle(i).uopIdx := i.U
257      }
258    }
259    is(UopSplitType.VEC_VFV) {
260      /*
261      f to vector move
262       */
263      csBundle(0).srcType(0) := SrcType.fp
264      csBundle(0).srcType(1) := SrcType.imm
265      csBundle(0).srcType(2) := SrcType.imm
266      csBundle(0).lsrc(1) := 0.U
267      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
268      csBundle(0).fuType := FuType.f2v.U
269      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
270      csBundle(0).vecWen := true.B
271      csBundle(0).vpu.isReverse := false.B
272      /*
273      LMUL
274       */
275      for (i <- 0 until MAX_VLMUL) {
276        csBundle(i + 1).srcType(0) := SrcType.vp
277        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
278        csBundle(i + 1).lsrc(1) := src2 + i.U
279        csBundle(i + 1).lsrc(2) := dest + i.U
280        csBundle(i + 1).ldest := dest + i.U
281        csBundle(i + 1).uopIdx := i.U
282      }
283    }
284    is(UopSplitType.VEC_EXT2) {
285      for (i <- 0 until MAX_VLMUL / 2) {
286        csBundle(2 * i).lsrc(1) := src2 + i.U
287        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
288        csBundle(2 * i).ldest := dest + (2 * i).U
289        csBundle(2 * i).uopIdx := (2 * i).U
290        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
291        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
292        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
293        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
294      }
295    }
296    is(UopSplitType.VEC_EXT4) {
297      for (i <- 0 until MAX_VLMUL / 4) {
298        csBundle(4 * i).lsrc(1) := src2 + i.U
299        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
300        csBundle(4 * i).ldest := dest + (4 * i).U
301        csBundle(4 * i).uopIdx := (4 * i).U
302        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
303        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
304        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
305        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
306        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
307        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
308        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
309        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
310        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
311        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
312        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
313        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
314      }
315    }
316    is(UopSplitType.VEC_EXT8) {
317      for (i <- 0 until MAX_VLMUL) {
318        csBundle(i).lsrc(1) := src2
319        csBundle(i).lsrc(2) := dest + i.U
320        csBundle(i).ldest := dest + i.U
321        csBundle(i).uopIdx := i.U
322      }
323    }
324    is(UopSplitType.VEC_0XV) {
325      /*
326      i/f to vector move
327       */
328      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
329      csBundle(0).srcType(1) := SrcType.imm
330      csBundle(0).srcType(2) := SrcType.imm
331      csBundle(0).lsrc(1) := 0.U
332      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
333      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
334      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
335      csBundle(0).rfWen := false.B
336      csBundle(0).fpWen := false.B
337      csBundle(0).vecWen := true.B
338      /*
339      vmv.s.x
340       */
341      csBundle(1).srcType(0) := SrcType.vp
342      csBundle(1).srcType(1) := SrcType.imm
343      csBundle(1).srcType(2) := SrcType.vp
344      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
345      csBundle(1).lsrc(1) := 0.U
346      csBundle(1).lsrc(2) := dest
347      csBundle(1).ldest := dest
348      csBundle(1).rfWen := false.B
349      csBundle(1).fpWen := false.B
350      csBundle(1).vecWen := true.B
351      csBundle(1).uopIdx := 0.U
352    }
353    is(UopSplitType.VEC_VXV) {
354      /*
355      i to vector move
356       */
357      csBundle(0).srcType(0) := SrcType.reg
358      csBundle(0).srcType(1) := SrcType.imm
359      csBundle(0).srcType(2) := SrcType.imm
360      csBundle(0).lsrc(1) := 0.U
361      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
362      csBundle(0).fuType := FuType.i2v.U
363      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
364      csBundle(0).vecWen := true.B
365      csBundle(0).vpu.isReverse := false.B
366      /*
367      LMUL
368       */
369      for (i <- 0 until MAX_VLMUL) {
370        csBundle(i + 1).srcType(0) := SrcType.vp
371        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
372        csBundle(i + 1).lsrc(1) := src2 + i.U
373        csBundle(i + 1).lsrc(2) := dest + i.U
374        csBundle(i + 1).ldest := dest + i.U
375        csBundle(i + 1).uopIdx := i.U
376      }
377    }
378    is(UopSplitType.VEC_VVW) {
379      for (i <- 0 until MAX_VLMUL / 2) {
380        csBundle(2 * i).lsrc(0) := src1 + i.U
381        csBundle(2 * i).lsrc(1) := src2 + i.U
382        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
383        csBundle(2 * i).ldest := dest + (2 * i).U
384        csBundle(2 * i).uopIdx := (2 * i).U
385        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
386        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
387        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
388        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
389        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
390      }
391    }
392    is(UopSplitType.VEC_VFW) {
393      /*
394      f to vector move
395       */
396      csBundle(0).srcType(0) := SrcType.fp
397      csBundle(0).srcType(1) := SrcType.imm
398      csBundle(0).srcType(2) := SrcType.imm
399      csBundle(0).lsrc(1) := 0.U
400      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
401      csBundle(0).fuType := FuType.f2v.U
402      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
403      csBundle(0).rfWen := false.B
404      csBundle(0).fpWen := false.B
405      csBundle(0).vecWen := true.B
406
407      for (i <- 0 until MAX_VLMUL / 2) {
408        csBundle(2 * i + 1).srcType(0) := SrcType.vp
409        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
410        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
411        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
412        csBundle(2 * i + 1).ldest := dest + (2 * i).U
413        csBundle(2 * i + 1).uopIdx := (2 * i).U
414        csBundle(2 * i + 2).srcType(0) := SrcType.vp
415        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
416        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
417        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
418        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
419        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
420      }
421    }
422    is(UopSplitType.VEC_WVW) {
423      for (i <- 0 until MAX_VLMUL / 2) {
424        csBundle(2 * i).lsrc(0) := src1 + i.U
425        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
426        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
427        csBundle(2 * i).ldest := dest + (2 * i).U
428        csBundle(2 * i).uopIdx := (2 * i).U
429        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
430        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
431        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
432        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
433        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
434      }
435    }
436    is(UopSplitType.VEC_VXW) {
437      /*
438      i to vector move
439       */
440      csBundle(0).srcType(0) := SrcType.reg
441      csBundle(0).srcType(1) := SrcType.imm
442      csBundle(0).srcType(2) := SrcType.imm
443      csBundle(0).lsrc(1) := 0.U
444      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
445      csBundle(0).fuType := FuType.i2v.U
446      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
447      csBundle(0).vecWen := true.B
448
449      for (i <- 0 until MAX_VLMUL / 2) {
450        csBundle(2 * i + 1).srcType(0) := SrcType.vp
451        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
452        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
453        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
454        csBundle(2 * i + 1).ldest := dest + (2 * i).U
455        csBundle(2 * i + 1).uopIdx := (2 * i).U
456        csBundle(2 * i + 2).srcType(0) := SrcType.vp
457        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
458        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
459        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
460        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
461        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
462      }
463    }
464    is(UopSplitType.VEC_WXW) {
465      /*
466      i to vector move
467       */
468      csBundle(0).srcType(0) := SrcType.reg
469      csBundle(0).srcType(1) := SrcType.imm
470      csBundle(0).srcType(2) := SrcType.imm
471      csBundle(0).lsrc(1) := 0.U
472      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
473      csBundle(0).fuType := FuType.i2v.U
474      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
475      csBundle(0).vecWen := true.B
476
477      for (i <- 0 until MAX_VLMUL / 2) {
478        csBundle(2 * i + 1).srcType(0) := SrcType.vp
479        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
480        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
481        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
482        csBundle(2 * i + 1).ldest := dest + (2 * i).U
483        csBundle(2 * i + 1).uopIdx := (2 * i).U
484        csBundle(2 * i + 2).srcType(0) := SrcType.vp
485        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
486        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
487        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
488        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
489        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
490      }
491    }
492    is(UopSplitType.VEC_WVV) {
493      for (i <- 0 until MAX_VLMUL / 2) {
494
495        csBundle(2 * i).lsrc(0) := src1 + i.U
496        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
497        csBundle(2 * i).lsrc(2) := dest + i.U
498        csBundle(2 * i).ldest := dest + i.U
499        csBundle(2 * i).uopIdx := (2 * i).U
500        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
501        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
502        csBundle(2 * i + 1).lsrc(2) := dest + i.U
503        csBundle(2 * i + 1).ldest := dest + i.U
504        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
505      }
506    }
507    is(UopSplitType.VEC_WFW) {
508      /*
509      f to vector move
510       */
511      csBundle(0).srcType(0) := SrcType.fp
512      csBundle(0).srcType(1) := SrcType.imm
513      csBundle(0).srcType(2) := SrcType.imm
514      csBundle(0).lsrc(1) := 0.U
515      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
516      csBundle(0).fuType := FuType.f2v.U
517      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
518      csBundle(0).rfWen := false.B
519      csBundle(0).fpWen := false.B
520      csBundle(0).vecWen := true.B
521
522      for (i <- 0 until MAX_VLMUL / 2) {
523        csBundle(2 * i + 1).srcType(0) := SrcType.vp
524        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
525        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
526        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
527        csBundle(2 * i + 1).ldest := dest + (2 * i).U
528        csBundle(2 * i + 1).uopIdx := (2 * i).U
529        csBundle(2 * i + 2).srcType(0) := SrcType.vp
530        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
531        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
532        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
533        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
534        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
535      }
536    }
537    is(UopSplitType.VEC_WXV) {
538      /*
539      i to vector move
540       */
541      csBundle(0).srcType(0) := SrcType.reg
542      csBundle(0).srcType(1) := SrcType.imm
543      csBundle(0).srcType(2) := SrcType.imm
544      csBundle(0).lsrc(1) := 0.U
545      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
546      csBundle(0).fuType := FuType.i2v.U
547      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
548      csBundle(0).vecWen := true.B
549
550      for (i <- 0 until MAX_VLMUL / 2) {
551        csBundle(2 * i + 1).srcType(0) := SrcType.vp
552        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
553        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
554        csBundle(2 * i + 1).lsrc(2) := dest + i.U
555        csBundle(2 * i + 1).ldest := dest + i.U
556        csBundle(2 * i + 1).uopIdx := (2 * i).U
557        csBundle(2 * i + 2).srcType(0) := SrcType.vp
558        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
559        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
560        csBundle(2 * i + 2).lsrc(2) := dest + i.U
561        csBundle(2 * i + 2).ldest := dest + i.U
562        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
563      }
564    }
565    is(UopSplitType.VEC_VVM) {
566      csBundle(0).lsrc(2) := dest
567      csBundle(0).ldest := dest
568      csBundle(0).uopIdx := 0.U
569      for (i <- 1 until MAX_VLMUL) {
570        csBundle(i).lsrc(0) := src1 + i.U
571        csBundle(i).lsrc(1) := src2 + i.U
572        csBundle(i).lsrc(2) := dest
573        csBundle(i).ldest := dest
574        csBundle(i).uopIdx := i.U
575      }
576    }
577    is(UopSplitType.VEC_VFM) {
578      /*
579      f to vector move
580       */
581      csBundle(0).srcType(0) := SrcType.fp
582      csBundle(0).srcType(1) := SrcType.imm
583      csBundle(0).srcType(2) := SrcType.imm
584      csBundle(0).lsrc(1) := 0.U
585      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
586      csBundle(0).fuType := FuType.f2v.U
587      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
588      csBundle(0).rfWen := false.B
589      csBundle(0).fpWen := false.B
590      csBundle(0).vecWen := true.B
591      //LMUL
592      csBundle(1).srcType(0) := SrcType.vp
593      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
594      csBundle(1).lsrc(2) := dest
595      csBundle(1).ldest := dest
596      csBundle(1).uopIdx := 0.U
597      for (i <- 1 until MAX_VLMUL) {
598        csBundle(i + 1).srcType(0) := SrcType.vp
599        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
600        csBundle(i + 1).lsrc(1) := src2 + i.U
601        csBundle(i + 1).lsrc(2) := dest
602        csBundle(i + 1).ldest := dest
603        csBundle(i + 1).uopIdx := i.U
604      }
605      csBundle(numOfUop - 1.U).ldest := dest
606    }
607    is(UopSplitType.VEC_VXM) {
608      /*
609      i to vector move
610       */
611      csBundle(0).srcType(0) := SrcType.reg
612      csBundle(0).srcType(1) := SrcType.imm
613      csBundle(0).srcType(2) := SrcType.imm
614      csBundle(0).lsrc(1) := 0.U
615      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
616      csBundle(0).fuType := FuType.i2v.U
617      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
618      csBundle(0).vecWen := true.B
619      //LMUL
620      csBundle(1).srcType(0) := SrcType.vp
621      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
622      csBundle(1).lsrc(2) := dest
623      csBundle(1).ldest := dest
624      csBundle(1).uopIdx := 0.U
625      for (i <- 1 until MAX_VLMUL) {
626        csBundle(i + 1).srcType(0) := SrcType.vp
627        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
628        csBundle(i + 1).lsrc(1) := src2 + i.U
629        csBundle(i + 1).lsrc(2) := dest
630        csBundle(i + 1).ldest := dest
631        csBundle(i + 1).uopIdx := i.U
632      }
633      csBundle(numOfUop - 1.U).ldest := dest
634    }
635    is(UopSplitType.VEC_SLIDE1UP) {
636      /*
637      i to vector move
638       */
639      csBundle(0).srcType(0) := SrcType.reg
640      csBundle(0).srcType(1) := SrcType.imm
641      csBundle(0).srcType(2) := SrcType.imm
642      csBundle(0).lsrc(1) := 0.U
643      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
644      csBundle(0).fuType := FuType.i2v.U
645      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
646      csBundle(0).vecWen := true.B
647      //LMUL
648      csBundle(1).srcType(0) := SrcType.vp
649      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
650      csBundle(1).lsrc(2) := dest
651      csBundle(1).ldest := dest
652      csBundle(1).uopIdx := 0.U
653      for (i <- 1 until MAX_VLMUL) {
654        csBundle(i + 1).srcType(0) := SrcType.vp
655        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
656        csBundle(i + 1).lsrc(1) := src2 + i.U
657        csBundle(i + 1).lsrc(2) := dest + i.U
658        csBundle(i + 1).ldest := dest + i.U
659        csBundle(i + 1).uopIdx := i.U
660      }
661    }
662    is(UopSplitType.VEC_FSLIDE1UP) {
663      /*
664      f to vector move
665       */
666      csBundle(0).srcType(0) := SrcType.fp
667      csBundle(0).srcType(1) := SrcType.imm
668      csBundle(0).srcType(2) := SrcType.imm
669      csBundle(0).lsrc(1) := 0.U
670      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
671      csBundle(0).fuType := FuType.f2v.U
672      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
673      csBundle(0).rfWen := false.B
674      csBundle(0).fpWen := false.B
675      csBundle(0).vecWen := true.B
676      //LMUL
677      csBundle(1).srcType(0) := SrcType.vp
678      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
679      csBundle(1).lsrc(1) := src2
680      csBundle(1).lsrc(2) := dest
681      csBundle(1).ldest := dest
682      csBundle(1).uopIdx := 0.U
683      for (i <- 1 until MAX_VLMUL) {
684        csBundle(i + 1).srcType(0) := SrcType.vp
685        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
686        csBundle(i + 1).lsrc(1) := src2 + i.U
687        csBundle(i + 1).lsrc(2) := dest + i.U
688        csBundle(i + 1).ldest := dest + i.U
689        csBundle(i + 1).uopIdx := i.U
690      }
691    }
692    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
693      /*
694      i to vector move
695       */
696      csBundle(0).srcType(0) := SrcType.reg
697      csBundle(0).srcType(1) := SrcType.imm
698      csBundle(0).srcType(2) := SrcType.imm
699      csBundle(0).lsrc(1) := 0.U
700      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
701      csBundle(0).fuType := FuType.i2v.U
702      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
703      csBundle(0).vecWen := true.B
704      //LMUL
705      for (i <- 0 until MAX_VLMUL) {
706        csBundle(2 * i + 1).srcType(0) := SrcType.vp
707        csBundle(2 * i + 1).srcType(1) := SrcType.vp
708        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
709        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
710        csBundle(2 * i + 1).lsrc(2) := dest + i.U
711        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
712        csBundle(2 * i + 1).uopIdx := (2 * i).U
713        if (2 * i + 2 < MAX_VLMUL * 2) {
714          csBundle(2 * i + 2).srcType(0) := SrcType.vp
715          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
716          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
717          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
718          csBundle(2 * i + 2).ldest := dest + i.U
719          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
720        }
721      }
722      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
723      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
724      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
725    }
726    is(UopSplitType.VEC_FSLIDE1DOWN) {
727      /*
728      f to vector move
729       */
730      csBundle(0).srcType(0) := SrcType.fp
731      csBundle(0).srcType(1) := SrcType.imm
732      csBundle(0).srcType(2) := SrcType.imm
733      csBundle(0).lsrc(1) := 0.U
734      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
735      csBundle(0).fuType := FuType.f2v.U
736      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
737      csBundle(0).rfWen := false.B
738      csBundle(0).fpWen := false.B
739      csBundle(0).vecWen := true.B
740      //LMUL
741      for (i <- 0 until MAX_VLMUL) {
742        csBundle(2 * i + 1).srcType(0) := SrcType.vp
743        csBundle(2 * i + 1).srcType(1) := SrcType.vp
744        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
745        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
746        csBundle(2 * i + 1).lsrc(2) := dest + i.U
747        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
748        csBundle(2 * i + 1).uopIdx := (2 * i).U
749        if (2 * i + 2 < MAX_VLMUL * 2) {
750          csBundle(2 * i + 2).srcType(0) := SrcType.vp
751          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
752          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
753          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
754          csBundle(2 * i + 2).ldest := dest + i.U
755          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
756        }
757      }
758      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
759      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
760      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
761    }
762    is(UopSplitType.VEC_VRED) {
763      when(vlmulReg === "b001".U) {
764        csBundle(0).srcType(2) := SrcType.DC
765        csBundle(0).lsrc(0) := src2 + 1.U
766        csBundle(0).lsrc(1) := src2
767        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
768        csBundle(0).uopIdx := 0.U
769      }
770      when(vlmulReg === "b010".U) {
771        csBundle(0).srcType(2) := SrcType.DC
772        csBundle(0).lsrc(0) := src2 + 1.U
773        csBundle(0).lsrc(1) := src2
774        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
775        csBundle(0).uopIdx := 0.U
776
777        csBundle(1).srcType(2) := SrcType.DC
778        csBundle(1).lsrc(0) := src2 + 3.U
779        csBundle(1).lsrc(1) := src2 + 2.U
780        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
781        csBundle(1).uopIdx := 1.U
782
783        csBundle(2).srcType(2) := SrcType.DC
784        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
785        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
786        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
787        csBundle(2).uopIdx := 2.U
788      }
789      when(vlmulReg === "b011".U) {
790        for (i <- 0 until MAX_VLMUL) {
791          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
792            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
793            csBundle(i).lsrc(1) := src2 + (i * 2).U
794            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
795          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
796            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
797            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
798            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
799          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
800            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
801            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
802            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
803          }
804          csBundle(i).srcType(2) := SrcType.DC
805          csBundle(i).uopIdx := i.U
806        }
807      }
808      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
809        /*
810         * 2 <= vlmul <= 8
811         */
812        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
813        csBundle(numOfUop - 1.U).lsrc(0) := src1
814        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
815        csBundle(numOfUop - 1.U).lsrc(2) := dest
816        csBundle(numOfUop - 1.U).ldest := dest
817        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
818      }
819    }
820    is(UopSplitType.VEC_VFRED) {
821      val vlmul = vlmulReg
822      val vsew = vsewReg
823      when(vlmul === VLmul.m8){
824        for (i <- 0 until 4) {
825          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
826          csBundle(i).lsrc(1) := src2 + (i * 2).U
827          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
828          csBundle(i).uopIdx := i.U
829        }
830        for (i <- 4 until 6) {
831          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
832          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
833          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
834          csBundle(i).uopIdx := i.U
835        }
836        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
837        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
838        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
839        csBundle(6).uopIdx := 6.U
840        when(vsew === VSew.e64) {
841          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
842          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
843          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
844          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
845          csBundle(7).uopIdx := 7.U
846          csBundle(8).lsrc(0) := src1
847          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
848          csBundle(8).ldest := dest
849          csBundle(8).uopIdx := 8.U
850        }
851        when(vsew === VSew.e32) {
852          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
853          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
854          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
855          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
856          csBundle(7).uopIdx := 7.U
857          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
858          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
859          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
860          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
861          csBundle(8).uopIdx := 8.U
862          csBundle(9).lsrc(0) := src1
863          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
864          csBundle(9).ldest := dest
865          csBundle(9).uopIdx := 9.U
866        }
867        when(vsew === VSew.e16) {
868          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
869          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
870          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
871          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
872          csBundle(7).uopIdx := 7.U
873          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
874          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
875          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
876          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
877          csBundle(8).uopIdx := 8.U
878          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
879          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
880          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
881          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
882          csBundle(9).uopIdx := 9.U
883          csBundle(10).lsrc(0) := src1
884          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
885          csBundle(10).ldest := dest
886          csBundle(10).uopIdx := 10.U
887        }
888      }
889      when(vlmul === VLmul.m4) {
890        for (i <- 0 until 2) {
891          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
892          csBundle(i).lsrc(1) := src2 + (i * 2).U
893          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
894          csBundle(i).uopIdx := i.U
895        }
896        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
897        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
898        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
899        csBundle(2).uopIdx := 2.U
900        when(vsew === VSew.e64) {
901          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
902          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
903          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
904          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
905          csBundle(3).uopIdx := 3.U
906          csBundle(4).lsrc(0) := src1
907          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
908          csBundle(4).ldest := dest
909          csBundle(4).uopIdx := 4.U
910        }
911        when(vsew === VSew.e32) {
912          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
913          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
914          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
915          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
916          csBundle(3).uopIdx := 3.U
917          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
918          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
919          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
920          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
921          csBundle(4).uopIdx := 4.U
922          csBundle(5).lsrc(0) := src1
923          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
924          csBundle(5).ldest := dest
925          csBundle(5).uopIdx := 5.U
926        }
927        when(vsew === VSew.e16) {
928          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
929          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
930          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
931          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
932          csBundle(3).uopIdx := 3.U
933          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
934          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
935          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
936          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
937          csBundle(4).uopIdx := 4.U
938          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
939          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
940          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
941          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
942          csBundle(5).uopIdx := 5.U
943          csBundle(6).lsrc(0) := src1
944          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
945          csBundle(6).ldest := dest
946          csBundle(6).uopIdx := 6.U
947        }
948      }
949      when(vlmul === VLmul.m2) {
950        csBundle(0).lsrc(0) := src2 + 1.U
951        csBundle(0).lsrc(1) := src2 + 0.U
952        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
953        csBundle(0).uopIdx := 0.U
954        when(vsew === VSew.e64) {
955          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
956          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
957          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
958          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
959          csBundle(1).uopIdx := 1.U
960          csBundle(2).lsrc(0) := src1
961          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
962          csBundle(2).ldest := dest
963          csBundle(2).uopIdx := 2.U
964        }
965        when(vsew === VSew.e32) {
966          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
967          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
968          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
969          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
970          csBundle(1).uopIdx := 1.U
971          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
972          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
973          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
974          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
975          csBundle(2).uopIdx := 2.U
976          csBundle(3).lsrc(0) := src1
977          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
978          csBundle(3).ldest := dest
979          csBundle(3).uopIdx := 3.U
980        }
981        when(vsew === VSew.e16) {
982          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
983          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
984          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
985          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
986          csBundle(1).uopIdx := 1.U
987          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
988          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
989          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
990          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
991          csBundle(2).uopIdx := 2.U
992          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
993          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
994          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
995          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
996          csBundle(3).uopIdx := 3.U
997          csBundle(4).lsrc(0) := src1
998          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
999          csBundle(4).ldest := dest
1000          csBundle(4).uopIdx := 4.U
1001        }
1002      }
1003      when(vlmul === VLmul.m1) {
1004        when(vsew === VSew.e64) {
1005          csBundle(0).lsrc(0) := src2
1006          csBundle(0).lsrc(1) := src2
1007          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1008          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1009          csBundle(0).uopIdx := 0.U
1010          csBundle(1).lsrc(0) := src1
1011          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1012          csBundle(1).ldest := dest
1013          csBundle(1).uopIdx := 1.U
1014        }
1015        when(vsew === VSew.e32) {
1016          csBundle(0).lsrc(0) := src2
1017          csBundle(0).lsrc(1) := src2
1018          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1019          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1020          csBundle(0).uopIdx := 0.U
1021          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1022          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1023          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1024          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1025          csBundle(1).uopIdx := 1.U
1026          csBundle(2).lsrc(0) := src1
1027          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1028          csBundle(2).ldest := dest
1029          csBundle(2).uopIdx := 2.U
1030        }
1031        when(vsew === VSew.e16) {
1032          csBundle(0).lsrc(0) := src2
1033          csBundle(0).lsrc(1) := src2
1034          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1035          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1036          csBundle(0).uopIdx := 0.U
1037          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1038          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1039          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1040          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1041          csBundle(1).uopIdx := 1.U
1042          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1043          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1044          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1045          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1046          csBundle(2).uopIdx := 2.U
1047          csBundle(3).lsrc(0) := src1
1048          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1049          csBundle(3).ldest := dest
1050          csBundle(3).uopIdx := 3.U
1051        }
1052      }
1053      when(vlmul === VLmul.mf2) {
1054        when(vsew === VSew.e32) {
1055          csBundle(0).lsrc(0) := src2
1056          csBundle(0).lsrc(1) := src2
1057          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1058          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1059          csBundle(0).uopIdx := 0.U
1060          csBundle(1).lsrc(0) := src1
1061          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1062          csBundle(1).ldest := dest
1063          csBundle(1).uopIdx := 1.U
1064        }
1065        when(vsew === VSew.e16) {
1066          csBundle(0).lsrc(0) := src2
1067          csBundle(0).lsrc(1) := src2
1068          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1069          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1070          csBundle(0).uopIdx := 0.U
1071          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1072          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1073          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1074          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1075          csBundle(1).uopIdx := 1.U
1076          csBundle(2).lsrc(0) := src1
1077          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1078          csBundle(2).ldest := dest
1079          csBundle(2).uopIdx := 2.U
1080        }
1081      }
1082      when(vlmul === VLmul.mf4) {
1083        when(vsew === VSew.e16) {
1084          csBundle(0).lsrc(0) := src2
1085          csBundle(0).lsrc(1) := src2
1086          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1087          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1088          csBundle(0).uopIdx := 0.U
1089          csBundle(1).lsrc(0) := src1
1090          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1091          csBundle(1).ldest := dest
1092          csBundle(1).uopIdx := 1.U
1093        }
1094      }
1095    }
1096
1097    is(UopSplitType.VEC_VFREDOSUM) {
1098      import yunsuan.VfaluType
1099      val vlmul = vlmulReg
1100      val vsew = vsewReg
1101      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1102      when(vlmul === VLmul.m8) {
1103        when(vsew === VSew.e64) {
1104          val vlmax = 16
1105          for (i <- 0 until vlmax) {
1106            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1107            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1108            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1109            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1110            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1111            csBundle(i).uopIdx := i.U
1112          }
1113        }
1114        when(vsew === VSew.e32) {
1115          val vlmax = 32
1116          for (i <- 0 until vlmax) {
1117            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1118            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1119            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1120            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1121            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1122            csBundle(i).uopIdx := i.U
1123          }
1124        }
1125        when(vsew === VSew.e16) {
1126          val vlmax = 64
1127          for (i <- 0 until vlmax) {
1128            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1129            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1130            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1131            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1132            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1133            csBundle(i).uopIdx := i.U
1134          }
1135        }
1136      }
1137      when(vlmul === VLmul.m4) {
1138        when(vsew === VSew.e64) {
1139          val vlmax = 8
1140          for (i <- 0 until vlmax) {
1141            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1142            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1143            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1144            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1145            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1146            csBundle(i).uopIdx := i.U
1147          }
1148        }
1149        when(vsew === VSew.e32) {
1150          val vlmax = 16
1151          for (i <- 0 until vlmax) {
1152            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1153            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1154            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1155            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1156            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1157            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1158            csBundle(i).uopIdx := i.U
1159          }
1160        }
1161        when(vsew === VSew.e16) {
1162          val vlmax = 32
1163          for (i <- 0 until vlmax) {
1164            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1165            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1166            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1167            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1168            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1169            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1170            csBundle(i).uopIdx := i.U
1171          }
1172        }
1173      }
1174      when(vlmul === VLmul.m2) {
1175        when(vsew === VSew.e64) {
1176          val vlmax = 4
1177          for (i <- 0 until vlmax) {
1178            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1179            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1180            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1181            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1182            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1183            csBundle(i).uopIdx := i.U
1184          }
1185        }
1186        when(vsew === VSew.e32) {
1187          val vlmax = 8
1188          for (i <- 0 until vlmax) {
1189            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1190            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1191            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1192            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1193            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1194            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1195            csBundle(i).uopIdx := i.U
1196          }
1197        }
1198        when(vsew === VSew.e16) {
1199          val vlmax = 16
1200          for (i <- 0 until vlmax) {
1201            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1202            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1203            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1204            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1205            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1206            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1207            csBundle(i).uopIdx := i.U
1208          }
1209        }
1210      }
1211      when(vlmul === VLmul.m1) {
1212        when(vsew === VSew.e64) {
1213          val vlmax = 2
1214          for (i <- 0 until vlmax) {
1215            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1216            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1217            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1218            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1219            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1220            csBundle(i).uopIdx := i.U
1221          }
1222        }
1223        when(vsew === VSew.e32) {
1224          val vlmax = 4
1225          for (i <- 0 until vlmax) {
1226            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1227            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1228            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1229            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1230            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1231            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1232            csBundle(i).uopIdx := i.U
1233          }
1234        }
1235        when(vsew === VSew.e16) {
1236          val vlmax = 8
1237          for (i <- 0 until vlmax) {
1238            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1239            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1240            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1241            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1242            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1243            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1244            csBundle(i).uopIdx := i.U
1245          }
1246        }
1247      }
1248      when(vlmul === VLmul.mf2) {
1249        when(vsew === VSew.e32) {
1250          val vlmax = 2
1251          for (i <- 0 until vlmax) {
1252            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1253            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1254            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1255            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1256            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1257            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1258            csBundle(i).uopIdx := i.U
1259          }
1260        }
1261        when(vsew === VSew.e16) {
1262          val vlmax = 4
1263          for (i <- 0 until vlmax) {
1264            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1265            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1266            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1267            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1268            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1269            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1270            csBundle(i).uopIdx := i.U
1271          }
1272        }
1273      }
1274      when(vlmul === VLmul.mf4) {
1275        when(vsew === VSew.e16) {
1276          val vlmax = 2
1277          for (i <- 0 until vlmax) {
1278            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1279            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1280            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1281            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1282            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1283            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1284            csBundle(i).uopIdx := i.U
1285          }
1286        }
1287      }
1288    }
1289
1290    is(UopSplitType.VEC_SLIDEUP) {
1291      // i to vector move
1292      csBundle(0).srcType(0) := SrcType.reg
1293      csBundle(0).srcType(1) := SrcType.imm
1294      csBundle(0).srcType(2) := SrcType.imm
1295      csBundle(0).lsrc(1) := 0.U
1296      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1297      csBundle(0).fuType := FuType.i2v.U
1298      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1299      csBundle(0).vecWen := true.B
1300      // LMUL
1301      for (i <- 0 until MAX_VLMUL)
1302        for (j <- 0 to i) {
1303          val old_vd = if (j == 0) {
1304            dest + i.U
1305          } else (VECTOR_TMP_REG_LMUL + j).U
1306          val vd = if (j == i) {
1307            dest + i.U
1308          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1309          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1310          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1311          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1312          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1313          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1314          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1315        }
1316    }
1317
1318    is(UopSplitType.VEC_SLIDEDOWN) {
1319      // i to vector move
1320      csBundle(0).srcType(0) := SrcType.reg
1321      csBundle(0).srcType(1) := SrcType.imm
1322      csBundle(0).srcType(2) := SrcType.imm
1323      csBundle(0).lsrc(1) := 0.U
1324      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1325      csBundle(0).fuType := FuType.i2v.U
1326      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1327      csBundle(0).vecWen := true.B
1328      // LMUL
1329      for (i <- 0 until MAX_VLMUL)
1330        for (j <- (0 to i).reverse) {
1331          when(i.U < lmul) {
1332            val old_vd = if (j == 0) {
1333              dest + lmul - 1.U - i.U
1334            } else (VECTOR_TMP_REG_LMUL + j).U
1335            val vd = if (j == i) {
1336              dest + lmul - 1.U - i.U
1337            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1338            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1339            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1340            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1341            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1342            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1343            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1344          }
1345        }
1346    }
1347
1348    is(UopSplitType.VEC_M0X) {
1349      // LMUL
1350      for (i <- 0 until MAX_VLMUL) {
1351        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1352        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1353        csBundle(i).srcType(0) := srcType0
1354        csBundle(i).srcType(1) := SrcType.vp
1355        csBundle(i).rfWen := false.B
1356        csBundle(i).fpWen := false.B
1357        csBundle(i).vecWen := true.B
1358        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1359        csBundle(i).lsrc(1) := src2
1360        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1361        csBundle(i).ldest := ldest
1362        csBundle(i).uopIdx := i.U
1363      }
1364      csBundle(lmul - 1.U).rfWen := true.B
1365      csBundle(lmul - 1.U).fpWen := false.B
1366      csBundle(lmul - 1.U).vecWen := false.B
1367      csBundle(lmul - 1.U).ldest := dest
1368    }
1369
1370    is(UopSplitType.VEC_MVV) {
1371      // LMUL
1372      for (i <- 0 until MAX_VLMUL) {
1373        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1374        csBundle(i * 2 + 0).srcType(0) := srcType0
1375        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1376        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1377        csBundle(i * 2 + 0).lsrc(1) := src2
1378        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1379        csBundle(i * 2 + 0).ldest := dest + i.U
1380        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1381
1382        csBundle(i * 2 + 1).srcType(0) := srcType0
1383        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1384        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1385        csBundle(i * 2 + 1).lsrc(1) := src2
1386        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1387        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1388        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1389      }
1390    }
1391
1392    is(UopSplitType.VEC_M0X_VFIRST) {
1393      // LMUL
1394      csBundle(0).rfWen := true.B
1395      csBundle(0).fpWen := false.B
1396      csBundle(0).vecWen := false.B
1397      csBundle(0).ldest := dest
1398    }
1399    is(UopSplitType.VEC_VWW) {
1400      for (i <- 0 until MAX_VLMUL*2) {
1401        when(i.U < lmul){
1402          csBundle(i).srcType(2) := SrcType.DC
1403          csBundle(i).lsrc(0) := src2 + i.U
1404          csBundle(i).lsrc(1) := src2 + i.U
1405          // csBundle(i).lsrc(2) := dest + (2 * i).U
1406          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1407          csBundle(i).uopIdx :=  i.U
1408        } otherwise {
1409          csBundle(i).srcType(2) := SrcType.DC
1410          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1411          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1412          // csBundle(i).lsrc(2) := dest + (2 * i).U
1413          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1414          csBundle(i).uopIdx := i.U
1415        }
1416        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1417        csBundle(numOfUop-1.U).lsrc(0) := src1
1418        csBundle(numOfUop-1.U).lsrc(2) := dest
1419        csBundle(numOfUop-1.U).ldest := dest
1420      }
1421    }
1422    is(UopSplitType.VEC_RGATHER) {
1423      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1424        for (i <- 0 until len)
1425          for (j <- 0 until len) {
1426            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1427            // csBundle(i * len + j).srcType(1) := SrcType.vp
1428            // csBundle(i * len + j).srcType(2) := SrcType.vp
1429            csBundle(i * len + j).lsrc(0) := src1 + i.U
1430            csBundle(i * len + j).lsrc(1) := src2 + j.U
1431            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1432            csBundle(i * len + j).lsrc(2) := vd_old
1433            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1434            csBundle(i * len + j).ldest := vd
1435            csBundle(i * len + j).uopIdx := (i * len + j).U
1436          }
1437      }
1438      switch(vlmulReg) {
1439        is("b001".U ){
1440          genCsBundle_VEC_RGATHER(2)
1441        }
1442        is("b010".U ){
1443          genCsBundle_VEC_RGATHER(4)
1444        }
1445        is("b011".U ){
1446          genCsBundle_VEC_RGATHER(8)
1447        }
1448      }
1449    }
1450    is(UopSplitType.VEC_RGATHER_VX) {
1451      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1452        for (i <- 0 until len)
1453          for (j <- 0 until len) {
1454            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1455            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1456            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1457            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1458            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1459            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1460            csBundle(i * len + j + 1).lsrc(2) := vd_old
1461            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1462            csBundle(i * len + j + 1).ldest := vd
1463            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1464          }
1465      }
1466      // i to vector move
1467      csBundle(0).srcType(0) := SrcType.reg
1468      csBundle(0).srcType(1) := SrcType.imm
1469      csBundle(0).srcType(2) := SrcType.imm
1470      csBundle(0).lsrc(1) := 0.U
1471      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1472      csBundle(0).fuType := FuType.i2v.U
1473      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1474      csBundle(0).rfWen := false.B
1475      csBundle(0).fpWen := false.B
1476      csBundle(0).vecWen := true.B
1477      genCsBundle_RGATHER_VX(1)
1478      switch(vlmulReg) {
1479        is("b001".U ){
1480          genCsBundle_RGATHER_VX(2)
1481        }
1482        is("b010".U ){
1483          genCsBundle_RGATHER_VX(4)
1484        }
1485        is("b011".U ){
1486          genCsBundle_RGATHER_VX(8)
1487        }
1488      }
1489    }
1490    is(UopSplitType.VEC_RGATHEREI16) {
1491      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1492        for (i <- 0 until len)
1493          for (j <- 0 until len) {
1494            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1495            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1496            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1497            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1498            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1499            csBundle((i * len + j)*2+0).ldest := vd0
1500            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1501            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1502            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1503            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1504            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1505            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1506            csBundle((i * len + j)*2+1).ldest := vd1
1507            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1508          }
1509      }
1510      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1511        for (i <- 0 until len)
1512          for (j <- 0 until len) {
1513            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1514            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1515            csBundle(i * len + j).lsrc(0) := src1 + i.U
1516            csBundle(i * len + j).lsrc(1) := src2 + j.U
1517            csBundle(i * len + j).lsrc(2) := vd_old
1518            csBundle(i * len + j).ldest := vd
1519            csBundle(i * len + j).uopIdx := (i * len + j).U
1520          }
1521      }
1522      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1523        for (i <- 0 until len)
1524          for (j <- 0 until len) {
1525            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1526            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1527            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1528            csBundle(i * len + j).lsrc(1) := src2 + j.U
1529            csBundle(i * len + j).lsrc(2) := vd_old
1530            csBundle(i * len + j).ldest := vd
1531            csBundle(i * len + j).uopIdx := (i * len + j).U
1532          }
1533      }
1534      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1535        for (i <- 0 until len)
1536          for (j <- 0 until len) {
1537            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1538            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1539            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1540            csBundle(i * len + j).lsrc(1) := src2 + j.U
1541            csBundle(i * len + j).lsrc(2) := vd_old
1542            csBundle(i * len + j).ldest := vd
1543            csBundle(i * len + j).uopIdx := (i * len + j).U
1544          }
1545      }
1546      when(!vsewReg.orR){
1547        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1548      }.elsewhen(vsewReg === VSew.e32){
1549        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1550      }.elsewhen(vsewReg === VSew.e64){
1551        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1552      }.otherwise{
1553        genCsBundle_VEC_RGATHEREI16(1)
1554      }
1555      switch(vlmulReg) {
1556        is("b001".U) {
1557          when(!vsewReg.orR) {
1558            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1559          }.elsewhen(vsewReg === VSew.e32){
1560            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1561          }.elsewhen(vsewReg === VSew.e64){
1562            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1563          }.otherwise{
1564            genCsBundle_VEC_RGATHEREI16(2)
1565          }
1566        }
1567        is("b010".U) {
1568          when(!vsewReg.orR) {
1569            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1570          }.elsewhen(vsewReg === VSew.e32){
1571            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1572          }.elsewhen(vsewReg === VSew.e64){
1573            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1574          }.otherwise{
1575            genCsBundle_VEC_RGATHEREI16(4)
1576          }
1577        }
1578        is("b011".U) {
1579          when(vsewReg === VSew.e32){
1580            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1581          }.elsewhen(vsewReg === VSew.e64){
1582            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1583          }.otherwise{
1584            genCsBundle_VEC_RGATHEREI16(8)
1585          }
1586        }
1587      }
1588    }
1589    is(UopSplitType.VEC_COMPRESS) {
1590      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1591        for (i <- 0 until len) {
1592          val jlen = if (i == len-1) i+1 else i+2
1593          for (j <- 0 until jlen) {
1594            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1595            val vd = if(i==len-1) (dest + j.U) else {
1596              if (j == i+1) VECTOR_TMP_REG_LMUL.U  else (VECTOR_TMP_REG_LMUL + j + 1).U
1597            }
1598            csBundle(i*(i+3)/2 + j).vecWen := true.B
1599            csBundle(i*(i+3)/2 + j).v0Wen := false.B
1600            val src13Type = if (j == i+1) DontCare else SrcType.vp
1601            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1602            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1603            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1604            if (i == 0) {
1605              csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1606            } else {
1607              csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1608            }
1609            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1610            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1611            csBundle(i*(i+3)/2 + j).ldest := vd
1612            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1613          }
1614        }
1615      }
1616      switch(vlmulReg) {
1617        is("b001".U ){
1618          genCsBundle_VEC_COMPRESS(2)
1619        }
1620        is("b010".U ){
1621          genCsBundle_VEC_COMPRESS(4)
1622        }
1623        is("b011".U ){
1624          genCsBundle_VEC_COMPRESS(8)
1625        }
1626      }
1627    }
1628    is(UopSplitType.VEC_MVNR) {
1629      for (i <- 0 until MAX_VLMUL) {
1630        csBundle(i).lsrc(0) := src1 + i.U
1631        csBundle(i).lsrc(1) := src2 + i.U
1632        csBundle(i).lsrc(2) := dest + i.U
1633        csBundle(i).ldest := dest + i.U
1634        csBundle(i).uopIdx := i.U
1635      }
1636    }
1637    is(UopSplitType.VEC_US_LDST) {
1638      /*
1639      FMV.D.X
1640       */
1641      csBundle(0).srcType(0) := SrcType.reg
1642      csBundle(0).srcType(1) := SrcType.imm
1643      csBundle(0).lsrc(1) := 0.U
1644      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1645      csBundle(0).fuType := FuType.i2v.U
1646      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1647      csBundle(0).rfWen := false.B
1648      csBundle(0).fpWen := false.B
1649      csBundle(0).vecWen := true.B
1650      csBundle(0).vlsInstr := true.B
1651      //LMUL
1652      for (i <- 0 until MAX_VLMUL) {
1653        csBundle(i + 1).srcType(0) := SrcType.vp
1654        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1655        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1656        csBundle(i + 1).ldest := dest + i.U
1657        csBundle(i + 1).uopIdx := i.U
1658        csBundle(i + 1).vlsInstr := true.B
1659      }
1660      csBundle.head.waitForward := isUsSegment
1661      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1662    }
1663    is(UopSplitType.VEC_S_LDST) {
1664      /*
1665      FMV.D.X
1666       */
1667      csBundle(0).srcType(0) := SrcType.reg
1668      csBundle(0).srcType(1) := SrcType.imm
1669      csBundle(0).lsrc(1) := 0.U
1670      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1671      csBundle(0).fuType := FuType.i2v.U
1672      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1673      csBundle(0).rfWen := false.B
1674      csBundle(0).fpWen := false.B
1675      csBundle(0).vecWen := true.B
1676      csBundle(0).vlsInstr := true.B
1677
1678      csBundle(1).srcType(0) := SrcType.reg
1679      csBundle(1).srcType(1) := SrcType.imm
1680      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1681      csBundle(1).lsrc(1) := 0.U
1682      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1683      csBundle(1).fuType := FuType.i2v.U
1684      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1685      csBundle(1).rfWen := false.B
1686      csBundle(1).fpWen := false.B
1687      csBundle(1).vecWen := true.B
1688      csBundle(1).vlsInstr := true.B
1689
1690      //LMUL
1691      for (i <- 0 until MAX_VLMUL) {
1692        csBundle(i + 2).srcType(0) := SrcType.vp
1693        csBundle(i + 2).srcType(1) := SrcType.vp
1694        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1695        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1696        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1697        csBundle(i + 2).ldest := dest + i.U
1698        csBundle(i + 2).uopIdx := i.U
1699        csBundle(i + 2).vlsInstr := true.B
1700      }
1701      csBundle.head.waitForward := isSdSegment
1702      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1703    }
1704    is(UopSplitType.VEC_I_LDST) {
1705      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1706        for (i <- 0 until MAX_VLMUL) {
1707          val vecWen = if (i < lmul * nf) true.B else false.B
1708          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1709          csBundle(i + 1).srcType(0) := SrcType.vp
1710          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1711          csBundle(i + 1).srcType(1) := SrcType.no
1712          csBundle(i + 1).lsrc(1) := src2 + i.U
1713          csBundle(i + 1).srcType(2) := src2Type
1714          csBundle(i + 1).lsrc(2) := dest + i.U
1715          csBundle(i + 1).ldest := dest + i.U
1716          csBundle(i + 1).rfWen := false.B
1717          csBundle(i + 1).fpWen := false.B
1718          csBundle(i + 1).vecWen := vecWen
1719          csBundle(i + 1).uopIdx := i.U
1720          csBundle(i + 1).vlsInstr := true.B
1721        }
1722      }
1723      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1724        for (i <- 0 until MAX_VLMUL) {
1725          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1726          csBundle(i + 1).srcType(1) := src1Type
1727          csBundle(i + 1).lsrc(1) := src2 + i.U
1728        }
1729      }
1730
1731      val vlmul = vlmulReg
1732      val vsew = Cat(0.U(1.W), vsewReg)
1733      val veew = Cat(0.U(1.W), width)
1734      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1735      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1736        "b001".U -> 1.U,
1737        "b010".U -> 2.U,
1738        "b011".U -> 3.U
1739      ))
1740      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1741        "b001".U -> 1.U,
1742        "b010".U -> 2.U,
1743        "b011".U -> 3.U
1744      ))
1745      csBundle(0).srcType(0) := SrcType.reg
1746      csBundle(0).srcType(1) := SrcType.imm
1747      csBundle(0).lsrc(1) := 0.U
1748      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1749      csBundle(0).fuType := FuType.i2v.U
1750      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1751      csBundle(0).rfWen := false.B
1752      csBundle(0).fpWen := false.B
1753      csBundle(0).vecWen := true.B
1754      csBundle(0).vlsInstr := true.B
1755
1756      //LMUL
1757      when(nf === 0.U) {
1758        for (i <- 0 until MAX_VLMUL) {
1759          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1760          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1761          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1762          csBundle(i + 1).srcType(0) := SrcType.vp
1763          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1764          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1765          csBundle(i + 1).srcType(2) := SrcType.vp
1766          // lsrc2 is old vd
1767          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1768          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1769          csBundle(i + 1).uopIdx := i.U
1770          csBundle(i + 1).vlsInstr := true.B
1771        }
1772      }.otherwise{
1773        // nf > 1, is segment indexed load/store
1774        // gen src0, vd
1775        switch(simple_lmul) {
1776          is(0.U) {
1777            switch(nf) {
1778              is(1.U) {
1779                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1780              }
1781              is(2.U) {
1782                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1783              }
1784              is(3.U) {
1785                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1786              }
1787              is(4.U) {
1788                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1789              }
1790              is(5.U) {
1791                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1792              }
1793              is(6.U) {
1794                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1795              }
1796              is(7.U) {
1797                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1798              }
1799            }
1800          }
1801          is(1.U) {
1802            switch(nf) {
1803              is(1.U) {
1804                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1805              }
1806              is(2.U) {
1807                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1808              }
1809              is(3.U) {
1810                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1811              }
1812            }
1813          }
1814          is(2.U) {
1815            switch(nf) {
1816              is(1.U) {
1817                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1818              }
1819            }
1820          }
1821        }
1822
1823        // gen src1
1824        switch(simple_emul) {
1825          is(0.U) {
1826            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1827          }
1828          is(1.U) {
1829            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1830          }
1831          is(2.U) {
1832            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1833          }
1834          is(3.U) {
1835            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1836          }
1837        }
1838
1839        // when is vstore instructions, not set vecwen
1840        when(isVstore) {
1841          for (i <- 0 until MAX_VLMUL) {
1842            csBundle(i + 1).vecWen := false.B
1843          }
1844        }
1845      }
1846      csBundle.head.waitForward := isIxSegment
1847      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1848    }
1849  }
1850
1851  //readyFromRename Counter
1852  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1853
1854  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1855  val thisAllOut = uopRes <= readyCounter
1856
1857  switch(state) {
1858    is(s_idle) {
1859      when (inValid) {
1860        stateNext := s_active
1861        uopResNext := inUopInfo.numOfUop
1862      }
1863    }
1864    is(s_active) {
1865      when (thisAllOut) {
1866        when (inValid) {
1867          stateNext := s_active
1868          uopResNext := inUopInfo.numOfUop
1869        }.otherwise {
1870          stateNext := s_idle
1871          uopResNext := 0.U
1872        }
1873      }.otherwise {
1874        stateNext := s_active
1875        uopResNext := uopRes - readyCounter
1876      }
1877    }
1878  }
1879
1880  state := Mux(io.redirect, s_idle, stateNext)
1881  uopRes := Mux(io.redirect, 0.U, uopResNext)
1882
1883  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1884
1885  for(i <- 0 until RenameWidth) {
1886    outValids(i) := complexNum > i.U
1887    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1888  }
1889
1890  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1891  inReady := state === s_idle || state === s_active && thisAllOut
1892
1893//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1894//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1895//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1896//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1897//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1898//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1899//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1900//
1901//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1902//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1903//    0.U)
1904//  validToRename.zipWithIndex.foreach{
1905//    case(dst, i) =>
1906//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1907//      dst := MuxCase(false.B, Seq(
1908//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1909//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1910//      ).toSeq)
1911//  }
1912//
1913//  readyToIBuf.zipWithIndex.foreach {
1914//    case (dst, i) =>
1915//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1916//      dst := MuxCase(true.B, Seq(
1917//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1918//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1919//      ).toSeq)
1920//  }
1921//
1922//  io.deq.decodedInsts := decodedInsts
1923//  io.deq.complexNum := complexNum
1924//  io.deq.validToRename := validToRename
1925//  io.deq.readyToIBuf := readyToIBuf
1926}
1927