xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 97b279b93a7df926b188fd7a380588518418d28a)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(7.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  val outIsFirstUopInVd = IO(Output(Bool()))
43  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={
44    if (lmul * nfields <= 8) {
45      for (k <-0 until nfields) {
46        if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
47          var offset = 1 << (emul - lmul)
48          for (i <- 0 until (1 << emul)) {
49            if (uopIdx == k * (1 << emul) + i) {
50              return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0)
51            }
52          }
53        } else {              // lmul > emul, uop num is depend on lmul * nf
54          var offset = 1 << (lmul - emul)
55          for (i <- 0 until (1 << lmul)) {
56            if (uopIdx == k * (1 << lmul) + i) {
57              return (i / offset, i + k * (1 << lmul), 1)
58            }
59          }
60        }
61      }
62    }
63    return (0, 0, 1)
64  }
65  // strided load/store
66  var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq()
67  for (emul <- 0 until 4) {
68    for (lmul <- 0 until 4) {
69      for (nf <- 0 until 8) {
70        var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx)
71        var offsetVs2 = offset._1
72        var offsetVd = offset._2
73        var isFirstUopInVd = offset._3
74        combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd)
75      }
76    }
77  }
78  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
79    case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) =>
80      (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W)))
81  }, BitPat.N(7)))
82  outOffsetVs2 := out(5, 3)
83  outOffsetVd := out(2, 0)
84  outIsFirstUopInVd := out(6).asBool
85}
86
87trait VectorConstants {
88  val MAX_VLMUL = 8
89  val FP_TMP_REG_MV = 32
90  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
91  val MAX_INDEXED_LS_UOPNUM = 64
92}
93
94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
95  val simple = new Bundle {
96    val decodedInst = Input(new DecodedInst)
97    val isComplex = Input(Bool())
98    val uopInfo = Input(new UopInfo)
99  }
100  val vtype = Input(new VType)
101  val in0pc = Input(UInt(VAddrBits.W))
102  val isComplex = Input(Vec(DecodeWidth, Bool()))
103  val validFromIBuf = Input(Vec(DecodeWidth, Bool()))
104  val readyFromRename = Input(Vec(RenameWidth, Bool()))
105  val deq = new Bundle {
106    val decodedInsts = Output(Vec(RenameWidth, new DecodedInst))
107    val isVset = Output(Bool())
108    val readyToIBuf = Output(Vec(DecodeWidth, Bool()))
109    val validToRename = Output(Vec(RenameWidth, Bool()))
110    val complexNum = Output(UInt(3.W))
111  }
112  val csrCtrl = Input(new CustomCSRCtrlIO)
113}
114
115/**
116  * @author zly
117  */
118class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
119  val io = IO(new DecodeUnitCompIO)
120
121  val maxUopSize = MaxUopSize
122  //input bits
123  private val inst: XSInstBitFields = io.simple.decodedInst.instr.asTypeOf(new XSInstBitFields)
124
125  val src1 = Cat(0.U(1.W), inst.RS1)
126  val src2 = Cat(0.U(1.W), inst.RS2)
127  val dest = Cat(0.U(1.W), inst.RD)
128
129  val nf    = inst.NF
130  val width = inst.WIDTH(1, 0)
131
132  //output bits
133  val decodedInsts = Wire(Vec(RenameWidth, new DecodedInst))
134  val validToRename = Wire(Vec(RenameWidth, Bool()))
135  val readyToIBuf = Wire(Vec(DecodeWidth, Bool()))
136  val complexNum = Wire(UInt(3.W))
137
138  //output of DecodeUnit
139  val decodedInstsSimple = Wire(new DecodedInst)
140  val numOfUop = Wire(UInt(log2Up(maxUopSize+1).W))
141  val lmul = Wire(UInt(4.W))
142  val isVsetSimple = Wire(Bool())
143
144  val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i)))
145  indexedLSRegOffset.map(_.src := 0.U)
146
147  //pre decode
148  decodedInstsSimple := io.simple.decodedInst
149  lmul := io.simple.uopInfo.lmul
150  isVsetSimple := io.simple.decodedInst.isVset
151  val vlmulReg = io.simple.decodedInst.vpu.vlmul
152  val vsewReg = io.simple.decodedInst.vpu.vsew
153  when(isVsetSimple) {
154    when(dest === 0.U && src1 === 0.U) {
155      decodedInstsSimple.fuOpType := VSETOpType.keepVl(io.simple.decodedInst.fuOpType)
156    }.elsewhen(src1 === 0.U) {
157      decodedInstsSimple.fuOpType := VSETOpType.setVlmax(io.simple.decodedInst.fuOpType)
158    }
159    when(io.vtype.illegal){
160      decodedInstsSimple.flushPipe := true.B
161    }
162  }
163  //Type of uop Div
164  val typeOfSplit = decodedInstsSimple.uopSplitType
165  val src1Type = decodedInstsSimple.srcType(0)
166  val src1IsImm = src1Type === SrcType.imm
167
168  when(typeOfSplit === UopSplitType.DIR) {
169    numOfUop := Mux(dest =/= 0.U, 2.U,
170      Mux(src1 =/= 0.U, 1.U,
171        Mux(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType), 2.U, 1.U)))
172  } .otherwise {
173    numOfUop := io.simple.uopInfo.numOfUop
174  }
175
176
177  //uop div up to maxUopSize
178  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
179  csBundle.map { case dst =>
180    dst := decodedInstsSimple
181    dst.firstUop := false.B
182    dst.lastUop := false.B
183  }
184
185  csBundle(0).numUops := numOfUop
186  csBundle(0).firstUop := true.B
187  csBundle(numOfUop - 1.U).lastUop := true.B
188
189  switch(typeOfSplit) {
190    is(UopSplitType.DIR) {
191      when(isVsetSimple) {
192        when(dest =/= 0.U) {
193          csBundle(0).fuType := FuType.vsetiwi.U
194          csBundle(0).fuOpType := VSETOpType.switchDest(decodedInstsSimple.fuOpType)
195          csBundle(0).flushPipe := false.B
196          csBundle(0).rfWen := true.B
197          csBundle(0).vecWen := false.B
198          csBundle(1).ldest := VCONFIG_IDX.U
199          csBundle(1).rfWen := false.B
200          csBundle(1).vecWen := true.B
201        }.elsewhen(src1 =/= 0.U) {
202          csBundle(0).ldest := VCONFIG_IDX.U
203        }.elsewhen(VSETOpType.isVsetvli(decodedInstsSimple.fuOpType)) {
204          csBundle(0).fuType := FuType.vsetfwf.U
205          csBundle(0).srcType(0) := SrcType.vp
206          csBundle(0).lsrc(0) := VCONFIG_IDX.U
207        }.elsewhen(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType)) {
208          csBundle(0).srcType(0) := SrcType.reg
209          csBundle(0).srcType(1) := SrcType.imm
210          csBundle(0).lsrc(1) := 0.U
211          csBundle(0).ldest := FP_TMP_REG_MV.U
212          csBundle(0).fuType := FuType.i2f.U
213          csBundle(0).rfWen := false.B
214          csBundle(0).fpWen := true.B
215          csBundle(0).vecWen := false.B
216          csBundle(0).fpu.isAddSub := false.B
217          csBundle(0).fpu.typeTagIn := FPU.D
218          csBundle(0).fpu.typeTagOut := FPU.D
219          csBundle(0).fpu.fromInt := true.B
220          csBundle(0).fpu.wflags := false.B
221          csBundle(0).fpu.fpWen := true.B
222          csBundle(0).fpu.div := false.B
223          csBundle(0).fpu.sqrt := false.B
224          csBundle(0).fpu.fcvt := false.B
225          csBundle(0).flushPipe := false.B
226          csBundle(1).fuType := FuType.vsetfwf.U
227          csBundle(1).srcType(0) := SrcType.vp
228          csBundle(1).lsrc(0) := VCONFIG_IDX.U
229          csBundle(1).srcType(1) := SrcType.fp
230          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
231          csBundle(1).ldest := VCONFIG_IDX.U
232        }
233      }
234    }
235    is(UopSplitType.VEC_VVV) {
236      for (i <- 0 until MAX_VLMUL) {
237        csBundle(i).lsrc(0) := src1 + i.U
238        csBundle(i).lsrc(1) := src2 + i.U
239        csBundle(i).lsrc(2) := dest + i.U
240        csBundle(i).ldest := dest + i.U
241        csBundle(i).uopIdx := i.U
242      }
243    }
244    is(UopSplitType.VEC_VFV) {
245      for (i <- 0 until MAX_VLMUL) {
246        csBundle(i).lsrc(1) := src2 + i.U
247        csBundle(i).lsrc(2) := dest + i.U
248        csBundle(i).ldest := dest + i.U
249        csBundle(i).uopIdx := i.U
250      }
251    }
252    is(UopSplitType.VEC_EXT2) {
253      for (i <- 0 until MAX_VLMUL / 2) {
254        csBundle(2 * i).lsrc(1) := src2 + i.U
255        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
256        csBundle(2 * i).ldest := dest + (2 * i).U
257        csBundle(2 * i).uopIdx := (2 * i).U
258        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
259        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
260        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
261        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
262      }
263    }
264    is(UopSplitType.VEC_EXT4) {
265      for (i <- 0 until MAX_VLMUL / 4) {
266        csBundle(4 * i).lsrc(1) := src2 + i.U
267        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
268        csBundle(4 * i).ldest := dest + (4 * i).U
269        csBundle(4 * i).uopIdx := (4 * i).U
270        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
271        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
272        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
273        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
274        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
275        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
276        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
277        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
278        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
279        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
280        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
281        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
282      }
283    }
284    is(UopSplitType.VEC_EXT8) {
285      for (i <- 0 until MAX_VLMUL) {
286        csBundle(i).lsrc(1) := src2
287        csBundle(i).lsrc(2) := dest + i.U
288        csBundle(i).ldest := dest + i.U
289        csBundle(i).uopIdx := i.U
290      }
291    }
292    is(UopSplitType.VEC_0XV) {
293      /*
294      FMV.D.X
295       */
296      csBundle(0).srcType(0) := SrcType.reg
297      csBundle(0).srcType(1) := SrcType.imm
298      csBundle(0).lsrc(1) := 0.U
299      csBundle(0).ldest := FP_TMP_REG_MV.U
300      csBundle(0).fuType := FuType.i2f.U
301      csBundle(0).rfWen := false.B
302      csBundle(0).fpWen := true.B
303      csBundle(0).vecWen := false.B
304      csBundle(0).fpu.isAddSub := false.B
305      csBundle(0).fpu.typeTagIn := FPU.D
306      csBundle(0).fpu.typeTagOut := FPU.D
307      csBundle(0).fpu.fromInt := true.B
308      csBundle(0).fpu.wflags := false.B
309      csBundle(0).fpu.fpWen := true.B
310      csBundle(0).fpu.div := false.B
311      csBundle(0).fpu.sqrt := false.B
312      csBundle(0).fpu.fcvt := false.B
313      /*
314      vfmv.s.f
315       */
316      csBundle(1).srcType(0) := SrcType.fp
317      csBundle(1).srcType(1) := SrcType.vp
318      csBundle(1).srcType(2) := SrcType.vp
319      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
320      csBundle(1).lsrc(1) := 0.U
321      csBundle(1).lsrc(2) := dest
322      csBundle(1).ldest := dest
323      csBundle(1).fuType := FuType.vppu.U
324      csBundle(1).fuOpType := VpermType.dummy
325      csBundle(1).rfWen := false.B
326      csBundle(1).fpWen := false.B
327      csBundle(1).vecWen := true.B
328    }
329    is(UopSplitType.VEC_VXV) {
330      /*
331      i to vector move
332       */
333      csBundle(0).srcType(0) := SrcType.reg
334      csBundle(0).srcType(1) := SrcType.imm
335      csBundle(0).lsrc(1) := 0.U
336      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
337      csBundle(0).fuType := FuType.i2v.U
338      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
339      csBundle(0).vecWen := true.B
340      /*
341      LMUL
342       */
343      for (i <- 0 until MAX_VLMUL) {
344        csBundle(i + 1).srcType(0) := SrcType.vp
345        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
346        csBundle(i + 1).lsrc(1) := src2 + i.U
347        csBundle(i + 1).lsrc(2) := dest + i.U
348        csBundle(i + 1).ldest := dest + i.U
349        csBundle(i + 1).uopIdx := i.U
350      }
351    }
352    is(UopSplitType.VEC_VVW) {
353      for (i <- 0 until MAX_VLMUL / 2) {
354        csBundle(2 * i).lsrc(0) := src1 + i.U
355        csBundle(2 * i).lsrc(1) := src2 + i.U
356        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
357        csBundle(2 * i).ldest := dest + (2 * i).U
358        csBundle(2 * i).uopIdx := (2 * i).U
359        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
360        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
361        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
362        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
363        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
364      }
365    }
366    is(UopSplitType.VEC_VFW) {
367      for (i <- 0 until MAX_VLMUL / 2) {
368        csBundle(2 * i).lsrc(0) := src1
369        csBundle(2 * i).lsrc(1) := src2 + i.U
370        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
371        csBundle(2 * i).ldest := dest + (2 * i).U
372        csBundle(2 * i).uopIdx := (2 * i).U
373        csBundle(2 * i + 1).lsrc(0) := src1
374        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
375        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
376        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
377        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
378      }
379    }
380    is(UopSplitType.VEC_WVW) {
381      for (i <- 0 until MAX_VLMUL / 2) {
382        csBundle(2 * i).lsrc(0) := src1 + i.U
383        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
384        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
385        csBundle(2 * i).ldest := dest + (2 * i).U
386        csBundle(2 * i).uopIdx := (2 * i).U
387        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
388        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
389        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
390        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
391        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
392      }
393    }
394    is(UopSplitType.VEC_VXW) {
395      /*
396      i to vector move
397       */
398      csBundle(0).srcType(0) := SrcType.reg
399      csBundle(0).srcType(1) := SrcType.imm
400      csBundle(0).lsrc(1) := 0.U
401      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
402      csBundle(0).fuType := FuType.i2v.U
403      csBundle(0).fuOpType := vsewReg
404      csBundle(0).vecWen := true.B
405
406      for (i <- 0 until MAX_VLMUL / 2) {
407        csBundle(2 * i + 1).srcType(0) := SrcType.vp
408        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
409        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
410        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
411        csBundle(2 * i + 1).ldest := dest + (2 * i).U
412        csBundle(2 * i + 1).uopIdx := (2 * i).U
413        csBundle(2 * i + 2).srcType(0) := SrcType.vp
414        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
415        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
416        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
417        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
418        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
419      }
420    }
421    is(UopSplitType.VEC_WXW) {
422      /*
423      i to vector move
424       */
425      csBundle(0).srcType(0) := SrcType.reg
426      csBundle(0).srcType(1) := SrcType.imm
427      csBundle(0).lsrc(1) := 0.U
428      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
429      csBundle(0).fuType := FuType.i2v.U
430      csBundle(0).fuOpType := vsewReg
431      csBundle(0).vecWen := true.B
432
433      for (i <- 0 until MAX_VLMUL / 2) {
434        csBundle(2 * i + 1).srcType(0) := SrcType.vp
435        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
436        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
437        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
438        csBundle(2 * i + 1).ldest := dest + (2 * i).U
439        csBundle(2 * i + 1).uopIdx := (2 * i).U
440        csBundle(2 * i + 2).srcType(0) := SrcType.vp
441        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
442        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
443        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
444        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
445        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
446      }
447    }
448    is(UopSplitType.VEC_WVV) {
449      for (i <- 0 until MAX_VLMUL / 2) {
450
451        csBundle(2 * i).lsrc(0) := src1 + i.U
452        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
453        csBundle(2 * i).lsrc(2) := dest + i.U
454        csBundle(2 * i).ldest := dest + i.U
455        csBundle(2 * i).uopIdx := (2 * i).U
456        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
457        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
458        csBundle(2 * i + 1).lsrc(2) := dest + i.U
459        csBundle(2 * i + 1).ldest := dest + i.U
460        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
461      }
462    }
463    is(UopSplitType.VEC_WFW) {
464      for (i <- 0 until MAX_VLMUL / 2) {
465        csBundle(2 * i).lsrc(0) := src1
466        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
467        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
468        csBundle(2 * i).ldest := dest + (2 * i).U
469        csBundle(2 * i).uopIdx := (2 * i).U
470        csBundle(2 * i + 1).lsrc(0) := src1
471        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
472        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
473        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
474        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
475      }
476    }
477    is(UopSplitType.VEC_WXV) {
478      /*
479      i to vector move
480       */
481      csBundle(0).srcType(0) := SrcType.reg
482      csBundle(0).srcType(1) := SrcType.imm
483      csBundle(0).lsrc(1) := 0.U
484      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
485      csBundle(0).fuType := FuType.i2v.U
486      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
487      csBundle(0).vecWen := true.B
488
489      for (i <- 0 until MAX_VLMUL / 2) {
490        csBundle(2 * i + 1).srcType(0) := SrcType.vp
491        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
492        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
493        csBundle(2 * i + 1).lsrc(2) := dest + i.U
494        csBundle(2 * i + 1).ldest := dest + i.U
495        csBundle(2 * i + 1).uopIdx := (2 * i).U
496        csBundle(2 * i + 2).srcType(0) := SrcType.vp
497        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
498        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
499        csBundle(2 * i + 2).lsrc(2) := dest + i.U
500        csBundle(2 * i + 2).ldest := dest + i.U
501        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
502      }
503    }
504    is(UopSplitType.VEC_VVM) {
505      csBundle(0).lsrc(2) := dest
506      csBundle(0).ldest := dest
507      csBundle(0).uopIdx := 0.U
508      for (i <- 1 until MAX_VLMUL) {
509        csBundle(i).lsrc(0) := src1 + i.U
510        csBundle(i).lsrc(1) := src2 + i.U
511        csBundle(i).lsrc(2) := dest
512        csBundle(i).ldest := dest
513        csBundle(i).uopIdx := i.U
514      }
515    }
516    is(UopSplitType.VEC_VFM) {
517      csBundle(0).lsrc(2) := dest
518      csBundle(0).ldest := dest
519      csBundle(0).uopIdx := 0.U
520      for (i <- 1 until MAX_VLMUL) {
521        csBundle(i).lsrc(0) := src1
522        csBundle(i).lsrc(1) := src2 + i.U
523        csBundle(i).lsrc(2) := dest
524        csBundle(i).ldest := dest
525        csBundle(i).uopIdx := i.U
526      }
527      csBundle(numOfUop - 1.U).ldest := dest
528    }
529    is(UopSplitType.VEC_VXM) {
530      /*
531      i to vector move
532       */
533      csBundle(0).srcType(0) := SrcType.reg
534      csBundle(0).srcType(1) := SrcType.imm
535      csBundle(0).lsrc(1) := 0.U
536      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
537      csBundle(0).fuType := FuType.i2v.U
538      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
539      csBundle(0).vecWen := true.B
540      //LMUL
541      csBundle(1).srcType(0) := SrcType.vp
542      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
543      csBundle(1).lsrc(2) := dest
544      csBundle(1).ldest := dest
545      csBundle(1).uopIdx := 0.U
546      for (i <- 1 until MAX_VLMUL) {
547        csBundle(i + 1).srcType(0) := SrcType.vp
548        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
549        csBundle(i + 1).lsrc(1) := src2 + i.U
550        csBundle(i + 1).lsrc(2) := dest
551        csBundle(i + 1).ldest := dest
552        csBundle(i + 1).uopIdx := i.U
553      }
554      csBundle(numOfUop - 1.U).ldest := dest
555    }
556    is(UopSplitType.VEC_SLIDE1UP) {
557      /*
558      i to vector move
559       */
560      csBundle(0).srcType(0) := SrcType.reg
561      csBundle(0).srcType(1) := SrcType.imm
562      csBundle(0).lsrc(1) := 0.U
563      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
564      csBundle(0).fuType := FuType.i2v.U
565      csBundle(0).fuOpType := vsewReg
566      csBundle(0).vecWen := true.B
567      //LMUL
568      csBundle(1).srcType(0) := SrcType.vp
569      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
570      csBundle(1).lsrc(2) := dest
571      csBundle(1).ldest := dest
572      csBundle(1).uopIdx := 0.U
573      for (i <- 1 until MAX_VLMUL) {
574        csBundle(i + 1).srcType(0) := SrcType.vp
575        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
576        csBundle(i + 1).lsrc(1) := src2 + i.U
577        csBundle(i + 1).lsrc(2) := dest + i.U
578        csBundle(i + 1).ldest := dest + i.U
579        csBundle(i + 1).uopIdx := i.U
580      }
581    }
582    is(UopSplitType.VEC_FSLIDE1UP) {
583      //LMUL
584      csBundle(0).srcType(0) := SrcType.fp
585      csBundle(0).lsrc(0) := src1
586      csBundle(0).lsrc(1) := src2
587      csBundle(0).lsrc(2) := dest
588      csBundle(0).ldest := dest
589      csBundle(0).uopIdx := 0.U
590      for (i <- 1 until MAX_VLMUL) {
591        csBundle(i).srcType(0) := SrcType.vp
592        csBundle(i).lsrc(0) := src2 + (i - 1).U
593        csBundle(i).lsrc(1) := src2 + i.U
594        csBundle(i).lsrc(2) := dest + i.U
595        csBundle(i).ldest := dest + i.U
596        csBundle(i).uopIdx := i.U
597      }
598    }
599    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
600      /*
601      i to vector move
602       */
603      csBundle(0).srcType(0) := SrcType.reg
604      csBundle(0).srcType(1) := SrcType.imm
605      csBundle(0).lsrc(1) := 0.U
606      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
607      csBundle(0).fuType := FuType.i2v.U
608      csBundle(0).fuOpType := vsewReg
609      csBundle(0).vecWen := true.B
610      //LMUL
611      for (i <- 0 until MAX_VLMUL) {
612        csBundle(2 * i + 1).srcType(0) := SrcType.vp
613        csBundle(2 * i + 1).srcType(1) := SrcType.vp
614        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
615        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
616        csBundle(2 * i + 1).lsrc(2) := dest + i.U
617        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
618        csBundle(2 * i + 1).uopIdx := (2 * i).U
619        if (2 * i + 2 < MAX_VLMUL * 2) {
620          csBundle(2 * i + 2).srcType(0) := SrcType.vp
621          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
622          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
623          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
624          csBundle(2 * i + 2).ldest := dest + i.U
625          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
626        }
627      }
628      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
629      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
630      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
631    }
632    is(UopSplitType.VEC_FSLIDE1DOWN) {
633      //LMUL
634      for (i <- 0 until MAX_VLMUL) {
635        csBundle(2 * i).srcType(0) := SrcType.vp
636        csBundle(2 * i).srcType(1) := SrcType.vp
637        csBundle(2 * i).lsrc(0) := src2 + (i + 1).U
638        csBundle(2 * i).lsrc(1) := src2 + i.U
639        csBundle(2 * i).lsrc(2) := dest + i.U
640        csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U
641        csBundle(2 * i).uopIdx := (2 * i).U
642        csBundle(2 * i + 1).srcType(0) := SrcType.fp
643        csBundle(2 * i + 1).lsrc(0) := src1
644        csBundle(2 * i + 1).lsrc(2) := VECTOR_TMP_REG_LMUL.U
645        csBundle(2 * i + 1).ldest := dest + i.U
646        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
647      }
648      csBundle(numOfUop - 1.U).srcType(0) := SrcType.fp
649      csBundle(numOfUop - 1.U).lsrc(0) := src1
650      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
651    }
652    is(UopSplitType.VEC_VRED) {
653      when(vlmulReg === "b001".U) {
654        csBundle(0).srcType(2) := SrcType.DC
655        csBundle(0).lsrc(0) := src2 + 1.U
656        csBundle(0).lsrc(1) := src2
657        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
658        csBundle(0).uopIdx := 0.U
659      }
660      when(vlmulReg === "b010".U) {
661        csBundle(0).srcType(2) := SrcType.DC
662        csBundle(0).lsrc(0) := src2 + 1.U
663        csBundle(0).lsrc(1) := src2
664        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
665        csBundle(0).uopIdx := 0.U
666
667        csBundle(1).srcType(2) := SrcType.DC
668        csBundle(1).lsrc(0) := src2 + 3.U
669        csBundle(1).lsrc(1) := src2 + 2.U
670        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
671        csBundle(1).uopIdx := 1.U
672
673        csBundle(2).srcType(2) := SrcType.DC
674        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
675        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
676        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
677        csBundle(2).uopIdx := 2.U
678      }
679      when(vlmulReg === "b011".U) {
680        for (i <- 0 until MAX_VLMUL) {
681          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
682            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
683            csBundle(i).lsrc(1) := src2 + (i * 2).U
684            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
685          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
686            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
687            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
688            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
689          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
690            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
691            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
692            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
693          }
694          csBundle(i).srcType(2) := SrcType.DC
695          csBundle(i).uopIdx := i.U
696        }
697      }
698      when(vlmulReg.orR) {
699        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
700        csBundle(numOfUop - 1.U).lsrc(0) := src1
701        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
702        csBundle(numOfUop - 1.U).lsrc(2) := dest
703        csBundle(numOfUop - 1.U).ldest := dest
704        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
705      }
706    }
707    is(UopSplitType.VEC_VFRED) {
708      val vlmul = vlmulReg
709      val vsew = vsewReg
710      when(vlmul === VLmul.m8){
711        for (i <- 0 until 4) {
712          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
713          csBundle(i).lsrc(1) := src2 + (i * 2).U
714          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
715          csBundle(i).uopIdx := i.U
716        }
717        for (i <- 4 until 6) {
718          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
719          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
720          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
721          csBundle(i).uopIdx := i.U
722        }
723        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
724        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
725        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
726        csBundle(6).uopIdx := 6.U
727        when(vsew === VSew.e64) {
728          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
729          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
730          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
731          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
732          csBundle(7).uopIdx := 7.U
733          csBundle(8).lsrc(0) := src1
734          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
735          csBundle(8).ldest := dest
736          csBundle(8).uopIdx := 8.U
737        }
738        when(vsew === VSew.e32) {
739          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
740          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
741          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
742          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
743          csBundle(7).uopIdx := 7.U
744          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
745          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
746          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
747          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
748          csBundle(8).uopIdx := 8.U
749          csBundle(9).lsrc(0) := src1
750          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
751          csBundle(9).ldest := dest
752          csBundle(9).uopIdx := 9.U
753        }
754        when(vsew === VSew.e16) {
755          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
756          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
757          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
758          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
759          csBundle(7).uopIdx := 7.U
760          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
761          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
762          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
763          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
764          csBundle(8).uopIdx := 8.U
765          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
766          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
767          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
768          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
769          csBundle(9).uopIdx := 9.U
770          csBundle(10).lsrc(0) := src1
771          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
772          csBundle(10).ldest := dest
773          csBundle(10).uopIdx := 10.U
774        }
775      }
776      when(vlmul === VLmul.m4) {
777        for (i <- 0 until 2) {
778          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
779          csBundle(i).lsrc(1) := src2 + (i * 2).U
780          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
781          csBundle(i).uopIdx := i.U
782        }
783        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
784        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
785        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
786        csBundle(2).uopIdx := 2.U
787        when(vsew === VSew.e64) {
788          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
789          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
790          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
791          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
792          csBundle(3).uopIdx := 3.U
793          csBundle(4).lsrc(0) := src1
794          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
795          csBundle(4).ldest := dest
796          csBundle(4).uopIdx := 4.U
797        }
798        when(vsew === VSew.e32) {
799          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
800          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
801          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
802          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
803          csBundle(3).uopIdx := 3.U
804          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
805          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
806          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
807          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
808          csBundle(4).uopIdx := 4.U
809          csBundle(5).lsrc(0) := src1
810          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
811          csBundle(5).ldest := dest
812          csBundle(5).uopIdx := 5.U
813        }
814        when(vsew === VSew.e16) {
815          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
816          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
817          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
818          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
819          csBundle(3).uopIdx := 3.U
820          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
821          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
822          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
823          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
824          csBundle(4).uopIdx := 4.U
825          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
826          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
827          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
828          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
829          csBundle(5).uopIdx := 5.U
830          csBundle(6).lsrc(0) := src1
831          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
832          csBundle(6).ldest := dest
833          csBundle(6).uopIdx := 6.U
834        }
835      }
836      when(vlmul === VLmul.m2) {
837        csBundle(0).lsrc(0) := src2 + 1.U
838        csBundle(0).lsrc(1) := src2 + 0.U
839        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
840        csBundle(0).uopIdx := 0.U
841        when(vsew === VSew.e64) {
842          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
843          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
844          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
845          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
846          csBundle(1).uopIdx := 1.U
847          csBundle(2).lsrc(0) := src1
848          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
849          csBundle(2).ldest := dest
850          csBundle(2).uopIdx := 2.U
851        }
852        when(vsew === VSew.e32) {
853          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
854          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
855          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
856          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
857          csBundle(1).uopIdx := 1.U
858          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
859          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
860          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
861          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
862          csBundle(2).uopIdx := 2.U
863          csBundle(3).lsrc(0) := src1
864          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
865          csBundle(3).ldest := dest
866          csBundle(3).uopIdx := 3.U
867        }
868        when(vsew === VSew.e16) {
869          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
870          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
871          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
872          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
873          csBundle(1).uopIdx := 1.U
874          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
875          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
876          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
877          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
878          csBundle(2).uopIdx := 2.U
879          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
880          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
881          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
882          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
883          csBundle(3).uopIdx := 3.U
884          csBundle(4).lsrc(0) := src1
885          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
886          csBundle(4).ldest := dest
887          csBundle(4).uopIdx := 4.U
888        }
889      }
890      when(vlmul === VLmul.m1) {
891        when(vsew === VSew.e64) {
892          csBundle(0).lsrc(0) := src2
893          csBundle(0).lsrc(1) := src2
894          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
895          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
896          csBundle(0).uopIdx := 0.U
897          csBundle(1).lsrc(0) := src1
898          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
899          csBundle(1).ldest := dest
900          csBundle(1).uopIdx := 1.U
901        }
902        when(vsew === VSew.e32) {
903          csBundle(0).lsrc(0) := src2
904          csBundle(0).lsrc(1) := src2
905          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
906          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
907          csBundle(0).uopIdx := 0.U
908          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
909          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
910          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
911          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
912          csBundle(1).uopIdx := 1.U
913          csBundle(2).lsrc(0) := src1
914          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
915          csBundle(2).ldest := dest
916          csBundle(2).uopIdx := 2.U
917        }
918        when(vsew === VSew.e16) {
919          csBundle(0).lsrc(0) := src2
920          csBundle(0).lsrc(1) := src2
921          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
922          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
923          csBundle(0).uopIdx := 0.U
924          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
925          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
926          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
927          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
928          csBundle(1).uopIdx := 1.U
929          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
930          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
931          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
932          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
933          csBundle(2).uopIdx := 2.U
934          csBundle(3).lsrc(0) := src1
935          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
936          csBundle(3).ldest := dest
937          csBundle(3).uopIdx := 3.U
938        }
939      }
940      when(vlmul === VLmul.mf2) {
941        when(vsew === VSew.e32) {
942          csBundle(0).lsrc(0) := src2
943          csBundle(0).lsrc(1) := src2
944          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
945          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
946          csBundle(0).uopIdx := 0.U
947          csBundle(1).lsrc(0) := src1
948          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
949          csBundle(1).ldest := dest
950          csBundle(1).uopIdx := 1.U
951        }
952        when(vsew === VSew.e16) {
953          csBundle(0).lsrc(0) := src2
954          csBundle(0).lsrc(1) := src2
955          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
956          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
957          csBundle(0).uopIdx := 0.U
958          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
959          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
960          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
961          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
962          csBundle(1).uopIdx := 1.U
963          csBundle(2).lsrc(0) := src1
964          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
965          csBundle(2).ldest := dest
966          csBundle(2).uopIdx := 2.U
967        }
968      }
969      when(vlmul === VLmul.mf4) {
970        when(vsew === VSew.e16) {
971          csBundle(0).lsrc(0) := src2
972          csBundle(0).lsrc(1) := src2
973          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
974          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
975          csBundle(0).uopIdx := 0.U
976          csBundle(1).lsrc(0) := src1
977          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
978          csBundle(1).ldest := dest
979          csBundle(1).uopIdx := 1.U
980        }
981      }
982    }
983
984    is(UopSplitType.VEC_VFREDOSUM) {
985      import yunsuan.VfaluType
986      val vlmul = vlmulReg
987      val vsew = vsewReg
988      val isWiden = decodedInstsSimple.fuOpType === VfaluType.vfwredosum
989      when(vlmul === VLmul.m8) {
990        when(vsew === VSew.e64) {
991          val vlmax = 16
992          for (i <- 0 until vlmax) {
993            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
994            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
995            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
996            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
997            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
998            csBundle(i).uopIdx := i.U
999          }
1000        }
1001        when(vsew === VSew.e32) {
1002          val vlmax = 32
1003          for (i <- 0 until vlmax) {
1004            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1005            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1006            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1007            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1008            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1009            csBundle(i).uopIdx := i.U
1010          }
1011        }
1012        when(vsew === VSew.e16) {
1013          val vlmax = 64
1014          for (i <- 0 until vlmax) {
1015            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1016            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1017            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1018            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1019            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1020            csBundle(i).uopIdx := i.U
1021          }
1022        }
1023      }
1024      when(vlmul === VLmul.m4) {
1025        when(vsew === VSew.e64) {
1026          val vlmax = 8
1027          for (i <- 0 until vlmax) {
1028            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1029            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1030            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1031            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1032            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1033            csBundle(i).uopIdx := i.U
1034          }
1035        }
1036        when(vsew === VSew.e32) {
1037          val vlmax = 16
1038          for (i <- 0 until vlmax) {
1039            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1040            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1041            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1042            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1043            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1044            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1045            csBundle(i).uopIdx := i.U
1046          }
1047        }
1048        when(vsew === VSew.e16) {
1049          val vlmax = 32
1050          for (i <- 0 until vlmax) {
1051            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1052            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1053            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1054            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1055            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1056            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1057            csBundle(i).uopIdx := i.U
1058          }
1059        }
1060      }
1061      when(vlmul === VLmul.m2) {
1062        when(vsew === VSew.e64) {
1063          val vlmax = 4
1064          for (i <- 0 until vlmax) {
1065            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1066            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1067            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1068            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1069            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1070            csBundle(i).uopIdx := i.U
1071          }
1072        }
1073        when(vsew === VSew.e32) {
1074          val vlmax = 8
1075          for (i <- 0 until vlmax) {
1076            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1077            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1078            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1079            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1080            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1081            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1082            csBundle(i).uopIdx := i.U
1083          }
1084        }
1085        when(vsew === VSew.e16) {
1086          val vlmax = 16
1087          for (i <- 0 until vlmax) {
1088            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1089            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1090            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1091            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1092            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1093            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1094            csBundle(i).uopIdx := i.U
1095          }
1096        }
1097      }
1098      when(vlmul === VLmul.m1) {
1099        when(vsew === VSew.e64) {
1100          val vlmax = 2
1101          for (i <- 0 until vlmax) {
1102            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1103            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1104            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1105            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1106            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1107            csBundle(i).uopIdx := i.U
1108          }
1109        }
1110        when(vsew === VSew.e32) {
1111          val vlmax = 4
1112          for (i <- 0 until vlmax) {
1113            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1114            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1115            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1116            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1117            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1118            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1119            csBundle(i).uopIdx := i.U
1120          }
1121        }
1122        when(vsew === VSew.e16) {
1123          val vlmax = 8
1124          for (i <- 0 until vlmax) {
1125            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1126            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1127            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1128            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1129            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1130            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1131            csBundle(i).uopIdx := i.U
1132          }
1133        }
1134      }
1135      when(vlmul === VLmul.mf2) {
1136        when(vsew === VSew.e32) {
1137          val vlmax = 2
1138          for (i <- 0 until vlmax) {
1139            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1140            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1141            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1142            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1143            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1144            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1145            csBundle(i).uopIdx := i.U
1146          }
1147        }
1148        when(vsew === VSew.e16) {
1149          val vlmax = 4
1150          for (i <- 0 until vlmax) {
1151            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1152            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1153            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1154            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1155            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1156            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1157            csBundle(i).uopIdx := i.U
1158          }
1159        }
1160      }
1161      when(vlmul === VLmul.mf4) {
1162        when(vsew === VSew.e16) {
1163          val vlmax = 2
1164          for (i <- 0 until vlmax) {
1165            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1166            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1167            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1168            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1169            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1170            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1171            csBundle(i).uopIdx := i.U
1172          }
1173        }
1174      }
1175    }
1176
1177    is(UopSplitType.VEC_SLIDEUP) {
1178      // i to vector move
1179      csBundle(0).srcType(0) := SrcType.reg
1180      csBundle(0).srcType(1) := SrcType.imm
1181      csBundle(0).lsrc(1) := 0.U
1182      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1183      csBundle(0).fuType := FuType.i2v.U
1184      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
1185      csBundle(0).vecWen := true.B
1186      // LMUL
1187      for (i <- 0 until MAX_VLMUL)
1188        for (j <- 0 to i) {
1189          val old_vd = if (j == 0) {
1190            dest + i.U
1191          } else (VECTOR_TMP_REG_LMUL + j).U
1192          val vd = if (j == i) {
1193            dest + i.U
1194          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1195          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1196          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1197          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1198          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1199          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1200          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1201        }
1202    }
1203
1204    is(UopSplitType.VEC_SLIDEDOWN) {
1205      // i to vector move
1206      csBundle(0).srcType(0) := SrcType.reg
1207      csBundle(0).srcType(1) := SrcType.imm
1208      csBundle(0).lsrc(1) := 0.U
1209      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1210      csBundle(0).fuType := FuType.i2v.U
1211      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
1212      csBundle(0).vecWen := true.B
1213      // LMUL
1214      for (i <- 0 until MAX_VLMUL)
1215        for (j <- (0 to i).reverse) {
1216          when(i.U < lmul) {
1217            val old_vd = if (j == 0) {
1218              dest + lmul - 1.U - i.U
1219            } else (VECTOR_TMP_REG_LMUL + j).U
1220            val vd = if (j == i) {
1221              dest + lmul - 1.U - i.U
1222            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1223            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1224            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1225            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1226            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1227            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1228            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1229          }
1230        }
1231    }
1232
1233    is(UopSplitType.VEC_M0X) {
1234      // LMUL
1235      for (i <- 0 until MAX_VLMUL) {
1236        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1237        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1238        csBundle(i).srcType(0) := srcType0
1239        csBundle(i).srcType(1) := SrcType.vp
1240        csBundle(i).rfWen := false.B
1241        csBundle(i).vecWen := true.B
1242        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1243        csBundle(i).lsrc(1) := src2
1244        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1245        csBundle(i).ldest := ldest
1246        csBundle(i).uopIdx := i.U
1247      }
1248      csBundle(lmul - 1.U).vecWen := false.B
1249      csBundle(lmul - 1.U).fpWen := true.B
1250      csBundle(lmul - 1.U).ldest := FP_TMP_REG_MV.U
1251      // FMV_X_D
1252      csBundle(lmul).srcType(0) := SrcType.fp
1253      csBundle(lmul).srcType(1) := SrcType.imm
1254      csBundle(lmul).lsrc(0) := FP_TMP_REG_MV.U
1255      csBundle(lmul).lsrc(1) := 0.U
1256      csBundle(lmul).ldest := dest
1257      csBundle(lmul).fuType := FuType.fmisc.U
1258      csBundle(lmul).rfWen := true.B
1259      csBundle(lmul).fpWen := false.B
1260      csBundle(lmul).vecWen := false.B
1261      csBundle(lmul).fpu.isAddSub := false.B
1262      csBundle(lmul).fpu.typeTagIn := FPU.D
1263      csBundle(lmul).fpu.typeTagOut := FPU.D
1264      csBundle(lmul).fpu.fromInt := false.B
1265      csBundle(lmul).fpu.wflags := false.B
1266      csBundle(lmul).fpu.fpWen := false.B
1267      csBundle(lmul).fpu.div := false.B
1268      csBundle(lmul).fpu.sqrt := false.B
1269      csBundle(lmul).fpu.fcvt := false.B
1270    }
1271
1272    is(UopSplitType.VEC_MVV) {
1273      // LMUL
1274      for (i <- 0 until MAX_VLMUL) {
1275        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1276        csBundle(i * 2 + 0).srcType(0) := srcType0
1277        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1278        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1279        csBundle(i * 2 + 0).lsrc(1) := src2
1280        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1281        csBundle(i * 2 + 0).ldest := dest + i.U
1282        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1283
1284        csBundle(i * 2 + 1).srcType(0) := srcType0
1285        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1286        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1287        csBundle(i * 2 + 1).lsrc(1) := src2
1288        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1289        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1290        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1291      }
1292    }
1293
1294    is(UopSplitType.VEC_M0X_VFIRST) {
1295      // LMUL
1296      csBundle(0).rfWen := false.B
1297      csBundle(0).fpWen := true.B
1298      csBundle(0).ldest := FP_TMP_REG_MV.U
1299      // FMV_X_D
1300      csBundle(1).srcType(0) := SrcType.fp
1301      csBundle(1).srcType(1) := SrcType.imm
1302      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
1303      csBundle(1).lsrc(1) := 0.U
1304      csBundle(1).ldest := dest
1305      csBundle(1).fuType := FuType.fmisc.U
1306      csBundle(1).rfWen := true.B
1307      csBundle(1).fpWen := false.B
1308      csBundle(1).vecWen := false.B
1309      csBundle(1).fpu.isAddSub := false.B
1310      csBundle(1).fpu.typeTagIn := FPU.D
1311      csBundle(1).fpu.typeTagOut := FPU.D
1312      csBundle(1).fpu.fromInt := false.B
1313      csBundle(1).fpu.wflags := false.B
1314      csBundle(1).fpu.fpWen := false.B
1315      csBundle(1).fpu.div := false.B
1316      csBundle(1).fpu.sqrt := false.B
1317      csBundle(1).fpu.fcvt := false.B
1318    }
1319    is(UopSplitType.VEC_VWW) {
1320      for (i <- 0 until MAX_VLMUL*2) {
1321        when(i.U < lmul){
1322          csBundle(i).srcType(2) := SrcType.DC
1323          csBundle(i).lsrc(0) := src2 + i.U
1324          csBundle(i).lsrc(1) := src2 + i.U
1325          // csBundle(i).lsrc(2) := dest + (2 * i).U
1326          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1327          csBundle(i).uopIdx :=  i.U
1328        } otherwise {
1329          csBundle(i).srcType(2) := SrcType.DC
1330          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1331          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1332          // csBundle(i).lsrc(2) := dest + (2 * i).U
1333          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1334          csBundle(i).uopIdx := i.U
1335        }
1336        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1337        csBundle(numOfUop-1.U).lsrc(0) := src1
1338        csBundle(numOfUop-1.U).lsrc(2) := dest
1339        csBundle(numOfUop-1.U).ldest := dest
1340      }
1341    }
1342    is(UopSplitType.VEC_RGATHER) {
1343      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1344        for (i <- 0 until len)
1345          for (j <- 0 until len) {
1346            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1347            // csBundle(i * len + j).srcType(1) := SrcType.vp
1348            // csBundle(i * len + j).srcType(2) := SrcType.vp
1349            csBundle(i * len + j).lsrc(0) := src1 + i.U
1350            csBundle(i * len + j).lsrc(1) := src2 + j.U
1351            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1352            csBundle(i * len + j).lsrc(2) := vd_old
1353            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1354            csBundle(i * len + j).ldest := vd
1355            csBundle(i * len + j).uopIdx := (i * len + j).U
1356          }
1357      }
1358      switch(vlmulReg) {
1359        is("b001".U ){
1360          genCsBundle_VEC_RGATHER(2)
1361        }
1362        is("b010".U ){
1363          genCsBundle_VEC_RGATHER(4)
1364        }
1365        is("b011".U ){
1366          genCsBundle_VEC_RGATHER(8)
1367        }
1368      }
1369    }
1370    is(UopSplitType.VEC_RGATHER_VX) {
1371      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1372        for (i <- 0 until len)
1373          for (j <- 0 until len) {
1374            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1375            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1376            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1377            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1378            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1379            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1380            csBundle(i * len + j + 1).lsrc(2) := vd_old
1381            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1382            csBundle(i * len + j + 1).ldest := vd
1383            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1384          }
1385      }
1386      // i to vector move
1387      csBundle(0).srcType(0) := SrcType.reg
1388      csBundle(0).srcType(1) := SrcType.imm
1389      csBundle(0).lsrc(1) := 0.U
1390      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1391      csBundle(0).fuType := FuType.i2v.U
1392      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
1393      csBundle(0).vecWen := true.B
1394      switch(vlmulReg) {
1395        is("b000".U ){
1396          genCsBundle_RGATHER_VX(1)
1397        }
1398        is("b001".U ){
1399          genCsBundle_RGATHER_VX(2)
1400        }
1401        is("b010".U ){
1402          genCsBundle_RGATHER_VX(4)
1403        }
1404        is("b011".U ){
1405          genCsBundle_RGATHER_VX(8)
1406        }
1407      }
1408    }
1409    is(UopSplitType.VEC_RGATHEREI16) {
1410      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1411        for (i <- 0 until len)
1412          for (j <- 0 until len) {
1413            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1414            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1415            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1416            // csBundle(i * len + j).srcType(1) := SrcType.vp
1417            // csBundle(i * len + j).srcType(2) := SrcType.vp
1418            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1419            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1420            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1421            csBundle((i * len + j)*2+0).ldest := vd0
1422            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1423            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1424            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1425            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1426            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1427            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1428            csBundle((i * len + j)*2+1).ldest := vd1
1429            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1430          }
1431      }
1432      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1433        for (i <- 0 until len)
1434          for (j <- 0 until len) {
1435            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1436            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1437            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1438            // csBundle(i * len + j).srcType(1) := SrcType.vp
1439            // csBundle(i * len + j).srcType(2) := SrcType.vp
1440            csBundle(i * len + j).lsrc(0) := src1 + i.U
1441            csBundle(i * len + j).lsrc(1) := src2 + j.U
1442            csBundle(i * len + j).lsrc(2) := vd_old
1443            csBundle(i * len + j).ldest := vd
1444            csBundle(i * len + j).uopIdx := (i * len + j).U
1445          }
1446      }
1447      switch(vlmulReg) {
1448        is("b000".U ){
1449          when(!vsewReg.orR){
1450            genCsBundle_VEC_RGATHEREI16_SEW8(1)
1451          } .otherwise{
1452            genCsBundle_VEC_RGATHEREI16(1)
1453          }
1454        }
1455        is("b001".U) {
1456          when(!vsewReg.orR) {
1457            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1458          }.otherwise {
1459            genCsBundle_VEC_RGATHEREI16(2)
1460          }
1461        }
1462        is("b010".U) {
1463          when(!vsewReg.orR) {
1464            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1465          }.otherwise {
1466            genCsBundle_VEC_RGATHEREI16(4)
1467          }
1468        }
1469        is("b011".U) {
1470          genCsBundle_VEC_RGATHEREI16(8)
1471        }
1472      }
1473    }
1474    is(UopSplitType.VEC_COMPRESS) {
1475      def genCsBundle_VEC_COMPRESS(len:Int): Unit ={
1476        for (i <- 0 until len){
1477          val jlen = if (i == len-1) i+1 else i+2
1478          for (j <- 0 until jlen) {
1479            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1480            val vd = if(i==len-1) (dest + j.U) else{
1481              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1482            }
1483            val src23Type = if (j == i+1) DontCare else SrcType.vp
1484            csBundle(i*(i+3)/2 + j).srcType(0) := SrcType.vp
1485            csBundle(i*(i+3)/2 + j).srcType(1) := src23Type
1486            csBundle(i*(i+3)/2 + j).srcType(2) := src23Type
1487            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1488            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1489            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1490            // csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1491            csBundle(i*(i+3)/2 + j).ldest := vd
1492            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1493          }
1494        }
1495      }
1496      switch(vlmulReg) {
1497        is("b001".U ){
1498          genCsBundle_VEC_COMPRESS(2)
1499        }
1500        is("b010".U ){
1501          genCsBundle_VEC_COMPRESS(4)
1502        }
1503        is("b011".U ){
1504          genCsBundle_VEC_COMPRESS(8)
1505        }
1506      }
1507    }
1508    is(UopSplitType.VEC_MVNR) {
1509      for (i <- 0 until MAX_VLMUL) {
1510        csBundle(i).lsrc(0) := src1 + i.U
1511        csBundle(i).lsrc(1) := src2 + i.U
1512        csBundle(i).lsrc(2) := dest + i.U
1513        csBundle(i).ldest := dest + i.U
1514        csBundle(i).uopIdx := i.U
1515      }
1516    }
1517    is(UopSplitType.VEC_US_LDST) {
1518      /*
1519      FMV.D.X
1520       */
1521      csBundle(0).srcType(0) := SrcType.reg
1522      csBundle(0).srcType(1) := SrcType.imm
1523      csBundle(0).lsrc(1) := 0.U
1524      csBundle(0).ldest := FP_TMP_REG_MV.U
1525      csBundle(0).fuType := FuType.i2f.U
1526      csBundle(0).rfWen := false.B
1527      csBundle(0).fpWen := true.B
1528      csBundle(0).vecWen := false.B
1529      csBundle(0).fpu.isAddSub := false.B
1530      csBundle(0).fpu.typeTagIn := FPU.D
1531      csBundle(0).fpu.typeTagOut := FPU.D
1532      csBundle(0).fpu.fromInt := true.B
1533      csBundle(0).fpu.wflags := false.B
1534      csBundle(0).fpu.fpWen := true.B
1535      csBundle(0).fpu.div := false.B
1536      csBundle(0).fpu.sqrt := false.B
1537      csBundle(0).fpu.fcvt := false.B
1538      //LMUL
1539      for (i <- 0 until MAX_VLMUL) {
1540        csBundle(i + 1).srcType(0) := SrcType.fp
1541        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1542        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1543        csBundle(i + 1).ldest := dest + i.U
1544        csBundle(i + 1).uopIdx := i.U
1545      }
1546    }
1547    is(UopSplitType.VEC_S_LDST) {
1548      /*
1549      FMV.D.X
1550       */
1551      csBundle(0).srcType(0) := SrcType.reg
1552      csBundle(0).srcType(1) := SrcType.imm
1553      csBundle(0).lsrc(1) := 0.U
1554      csBundle(0).ldest := FP_TMP_REG_MV.U
1555      csBundle(0).fuType := FuType.i2f.U
1556      csBundle(0).rfWen := false.B
1557      csBundle(0).fpWen := true.B
1558      csBundle(0).vecWen := false.B
1559      csBundle(0).fpu.isAddSub := false.B
1560      csBundle(0).fpu.typeTagIn := FPU.D
1561      csBundle(0).fpu.typeTagOut := FPU.D
1562      csBundle(0).fpu.fromInt := true.B
1563      csBundle(0).fpu.wflags := false.B
1564      csBundle(0).fpu.fpWen := true.B
1565      csBundle(0).fpu.div := false.B
1566      csBundle(0).fpu.sqrt := false.B
1567      csBundle(0).fpu.fcvt := false.B
1568
1569      csBundle(1).srcType(0) := SrcType.reg
1570      csBundle(1).srcType(1) := SrcType.imm
1571      csBundle(1).lsrc(0) := decodedInstsSimple.lsrc(1)
1572      csBundle(1).lsrc(1) := 0.U
1573      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1574      csBundle(1).fuType := FuType.i2f.U
1575      csBundle(1).rfWen := false.B
1576      csBundle(1).fpWen := true.B
1577      csBundle(1).vecWen := false.B
1578      csBundle(1).fpu.isAddSub := false.B
1579      csBundle(1).fpu.typeTagIn := FPU.D
1580      csBundle(1).fpu.typeTagOut := FPU.D
1581      csBundle(1).fpu.fromInt := true.B
1582      csBundle(1).fpu.wflags := false.B
1583      csBundle(1).fpu.fpWen := true.B
1584      csBundle(1).fpu.div := false.B
1585      csBundle(1).fpu.sqrt := false.B
1586      csBundle(1).fpu.fcvt := false.B
1587
1588      //LMUL
1589      for (i <- 0 until MAX_VLMUL) {
1590        csBundle(i + 2).srcType(0) := SrcType.fp
1591        csBundle(i + 2).srcType(1) := SrcType.fp
1592        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1593        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1594        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1595        csBundle(i + 2).ldest := dest + i.U
1596        csBundle(i + 2).uopIdx := i.U
1597      }
1598    }
1599    is(UopSplitType.VEC_I_LDST) {
1600    /*
1601      FMV.D.X
1602       */
1603      val vlmul = vlmulReg
1604      val vsew = Cat(0.U(1.W), vsewReg)
1605      val veew = Cat(0.U(1.W), width)
1606      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1607      val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array(
1608        "b001".U -> 1.U,
1609        "b010".U -> 2.U,
1610        "b011".U -> 3.U
1611      ))
1612      val simple_emul = MuxLookup(vemul, 0.U(2.W), Array(
1613        "b001".U -> 1.U,
1614        "b010".U -> 2.U,
1615        "b011".U -> 3.U
1616      ))
1617      csBundle(0).srcType(0) := SrcType.reg
1618      csBundle(0).srcType(1) := SrcType.imm
1619      csBundle(0).lsrc(1) := 0.U
1620      csBundle(0).ldest := FP_TMP_REG_MV.U
1621      csBundle(0).fuType := FuType.i2f.U
1622      csBundle(0).rfWen := false.B
1623      csBundle(0).fpWen := true.B
1624      csBundle(0).vecWen := false.B
1625      csBundle(0).fpu.isAddSub := false.B
1626      csBundle(0).fpu.typeTagIn := FPU.D
1627      csBundle(0).fpu.typeTagOut := FPU.D
1628      csBundle(0).fpu.fromInt := true.B
1629      csBundle(0).fpu.wflags := false.B
1630      csBundle(0).fpu.fpWen := true.B
1631      csBundle(0).fpu.div := false.B
1632      csBundle(0).fpu.sqrt := false.B
1633      csBundle(0).fpu.fcvt := false.B
1634
1635      //LMUL
1636      for (i <- 0 until MAX_INDEXED_LS_UOPNUM) {
1637        indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf)
1638        val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1639        val offsetVd = indexedLSRegOffset(i).outOffsetVd
1640        val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd
1641        csBundle(i + 1).srcType(0) := SrcType.fp
1642        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1643        csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1644        /**
1645          * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and
1646          * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same
1647          * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be
1648          * deadlock for indexed instructions with emul > lmul.
1649          *
1650          * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest
1651          * N-1 uops will read temporary vector register.
1652          */
1653        // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1654        csBundle(i + 1).lsrc(2) := Mux(
1655          isFirstUopInVd,
1656          Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)),
1657          VECTOR_TMP_REG_LMUL.U
1658        )
1659        csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1660        csBundle(i + 1).uopIdx := i.U
1661      }
1662    }
1663  }
1664
1665  //uops dispatch
1666  val s_normal :: s_ext :: Nil = Enum(2)
1667  val state = RegInit(s_normal)
1668  val state_next = WireDefault(state)
1669  val uopRes = RegInit(0.U)
1670
1671  //readyFromRename Counter
1672  val readyCounter = PriorityMuxDefault(io.readyFromRename.map(x => !x).zip((0 to (RenameWidth - 1)).map(_.U)), RenameWidth.U)
1673
1674  switch(state) {
1675    is(s_normal) {
1676      state_next := Mux(io.validFromIBuf(0) && (numOfUop > readyCounter) && (readyCounter =/= 0.U), s_ext, s_normal)
1677    }
1678    is(s_ext) {
1679      state_next := Mux(io.validFromIBuf(0) && (uopRes > readyCounter), s_ext, s_normal)
1680    }
1681  }
1682
1683  state := state_next
1684
1685  val uopRes0 = Mux(state === s_normal, numOfUop, uopRes)
1686  val uopResJudge = Mux(state === s_normal,
1687    io.validFromIBuf(0) && (readyCounter =/= 0.U) && (uopRes0 > readyCounter),
1688    io.validFromIBuf(0) && (uopRes0 > readyCounter))
1689  uopRes := Mux(uopResJudge, uopRes0 - readyCounter, 0.U)
1690
1691  for(i <- 0 until RenameWidth) {
1692    decodedInsts(i) := MuxCase(csBundle(i), Seq(
1693      (state === s_normal) -> csBundle(i),
1694      (state === s_ext) -> Mux((i.U + numOfUop -uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1695    ).toSeq)
1696  }
1697
1698  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1699  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1700  val notInf = Wire(Vec(DecodeWidth, Bool()))
1701  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1702  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1703  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1704  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1705
1706  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1707    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1708    0.U)
1709  validToRename.zipWithIndex.foreach{
1710    case(dst, i) =>
1711      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1712      dst := MuxCase(false.B, Seq(
1713        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1714        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1715      ).toSeq)
1716  }
1717
1718  readyToIBuf.zipWithIndex.foreach {
1719    case (dst, i) =>
1720      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1721      dst := MuxCase(true.B, Seq(
1722        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1723        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1724      ).toSeq)
1725  }
1726
1727  io.deq.decodedInsts := decodedInsts
1728  io.deq.isVset := isVsetSimple
1729  io.deq.complexNum := complexNum
1730  io.deq.validToRename := validToRename
1731  io.deq.readyToIBuf := readyToIBuf
1732
1733}
1734