xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 3235a9d8281092fdea8b64634006a2bc888e7031)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(7.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  val outIsFirstUopInVd = IO(Output(Bool()))
43  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, nfields:Int, uopIdx:Int): (Int, Int, Int) ={
44    if (lmul * nfields <= 8) {
45      for (k <-0 until nfields) {
46        if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
47          var offset = 1 << (emul - lmul)
48          for (i <- 0 until (1 << emul)) {
49            if (uopIdx == k * (1 << emul) + i) {
50              return (i, i / offset + k * (1 << lmul), if (i % offset == 0) 1 else 0)
51            }
52          }
53        } else {              // lmul > emul, uop num is depend on lmul * nf
54          var offset = 1 << (lmul - emul)
55          for (i <- 0 until (1 << lmul)) {
56            if (uopIdx == k * (1 << lmul) + i) {
57              return (i / offset, i + k * (1 << lmul), 1)
58            }
59          }
60        }
61      }
62    }
63    return (0, 0, 1)
64  }
65  // strided load/store
66  var combVemulNf : Seq[(Int, Int, Int, Int, Int, Int)] = Seq()
67  for (emul <- 0 until 4) {
68    for (lmul <- 0 until 4) {
69      for (nf <- 0 until 8) {
70        var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, nf+1, uopIdx)
71        var offsetVs2 = offset._1
72        var offsetVd = offset._2
73        var isFirstUopInVd = offset._3
74        combVemulNf :+= (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd)
75      }
76    }
77  }
78  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
79    case (emul, lmul, nf, isFirstUopInVd, offsetVs2, offsetVd) =>
80      (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat((isFirstUopInVd << 6 | offsetVs2 << 3 | offsetVd).U(7.W)))
81  }, BitPat.N(7)))
82  outOffsetVs2 := out(5, 3)
83  outOffsetVd := out(2, 0)
84  outIsFirstUopInVd := out(6).asBool
85}
86
87trait VectorConstants {
88  val MAX_VLMUL = 8
89  val FP_TMP_REG_MV = 32
90  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
91  val MAX_INDEXED_LS_UOPNUM = 64
92}
93
94class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
95  val simple = new Bundle {
96    val decodedInst = Input(new DecodedInst)
97    val isComplex = Input(Bool())
98    val uopInfo = Input(new UopInfo)
99  }
100  val vtype = Input(new VType)
101  val in0pc = Input(UInt(VAddrBits.W))
102  val isComplex = Input(Vec(DecodeWidth, Bool()))
103  val validFromIBuf = Input(Vec(DecodeWidth, Bool()))
104  val readyFromRename = Input(Vec(RenameWidth, Bool()))
105  val deq = new Bundle {
106    val decodedInsts = Output(Vec(RenameWidth, new DecodedInst))
107    val isVset = Output(Bool())
108    val readyToIBuf = Output(Vec(DecodeWidth, Bool()))
109    val validToRename = Output(Vec(RenameWidth, Bool()))
110    val complexNum = Output(UInt(3.W))
111  }
112  val csrCtrl = Input(new CustomCSRCtrlIO)
113}
114
115/**
116  * @author zly
117  */
118class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
119  val io = IO(new DecodeUnitCompIO)
120
121  val maxUopSize = MaxUopSize
122  //input bits
123  private val inst: XSInstBitFields = io.simple.decodedInst.instr.asTypeOf(new XSInstBitFields)
124
125  val src1 = Cat(0.U(1.W), inst.RS1)
126  val src2 = Cat(0.U(1.W), inst.RS2)
127  val dest = Cat(0.U(1.W), inst.RD)
128
129  val nf    = inst.NF
130  val width = inst.WIDTH(1, 0)
131
132  //output bits
133  val decodedInsts = Wire(Vec(RenameWidth, new DecodedInst))
134  val validToRename = Wire(Vec(RenameWidth, Bool()))
135  val readyToIBuf = Wire(Vec(DecodeWidth, Bool()))
136  val complexNum = Wire(UInt(3.W))
137
138  //output of DecodeUnit
139  val decodedInstsSimple = Wire(new DecodedInst)
140  val numOfUop = Wire(UInt(log2Up(maxUopSize+1).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize+1).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_INDEXED_LS_UOPNUM)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  decodedInstsSimple := io.simple.decodedInst
150  lmul := io.simple.uopInfo.lmul
151  isVsetSimple := io.simple.decodedInst.isVset
152  val vlmulReg = io.simple.decodedInst.vpu.vlmul
153  val vsewReg = io.simple.decodedInst.vpu.vsew
154  when(isVsetSimple) {
155    when(dest === 0.U && src1 === 0.U) {
156      decodedInstsSimple.fuOpType := VSETOpType.keepVl(io.simple.decodedInst.fuOpType)
157    }.elsewhen(src1 === 0.U) {
158      decodedInstsSimple.fuOpType := VSETOpType.setVlmax(io.simple.decodedInst.fuOpType)
159    }
160    when(io.vtype.illegal){
161      decodedInstsSimple.flushPipe := true.B
162    }
163  }
164  //Type of uop Div
165  val typeOfSplit = decodedInstsSimple.uopSplitType
166  val src1Type = decodedInstsSimple.srcType(0)
167  val src1IsImm = src1Type === SrcType.imm
168
169  when(typeOfSplit === UopSplitType.DIR) {
170    numOfUop := Mux(dest =/= 0.U, 2.U,
171      Mux(src1 =/= 0.U, 1.U,
172        Mux(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType), 2.U, 1.U)))
173    numOfWB := Mux(dest =/= 0.U, 2.U,
174      Mux(src1 =/= 0.U, 1.U,
175        Mux(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType), 2.U, 1.U)))
176  } .otherwise {
177    numOfUop := io.simple.uopInfo.numOfUop
178    numOfWB := io.simple.uopInfo.numOfWB
179  }
180
181  //uop div up to maxUopSize
182  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
183  csBundle.map { case dst =>
184    dst := decodedInstsSimple
185    dst.firstUop := false.B
186    dst.lastUop := false.B
187  }
188
189  csBundle(0).numUops := numOfUop
190  csBundle(0).numWB := numOfWB
191  csBundle(0).firstUop := true.B
192  csBundle(numOfUop - 1.U).lastUop := true.B
193
194  switch(typeOfSplit) {
195    is(UopSplitType.DIR) {
196      when(isVsetSimple) {
197        when(dest =/= 0.U) {
198          csBundle(0).fuType := FuType.vsetiwi.U
199          csBundle(0).fuOpType := VSETOpType.switchDest(decodedInstsSimple.fuOpType)
200          csBundle(0).flushPipe := false.B
201          csBundle(0).rfWen := true.B
202          csBundle(0).vecWen := false.B
203          csBundle(1).ldest := VCONFIG_IDX.U
204          csBundle(1).rfWen := false.B
205          csBundle(1).vecWen := true.B
206        }.elsewhen(src1 =/= 0.U) {
207          csBundle(0).ldest := VCONFIG_IDX.U
208        }.elsewhen(VSETOpType.isVsetvli(decodedInstsSimple.fuOpType)) {
209          csBundle(0).fuType := FuType.vsetfwf.U
210          csBundle(0).srcType(0) := SrcType.vp
211          csBundle(0).lsrc(0) := VCONFIG_IDX.U
212        }.elsewhen(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType)) {
213          csBundle(0).srcType(0) := SrcType.reg
214          csBundle(0).srcType(1) := SrcType.imm
215          csBundle(0).lsrc(1) := 0.U
216          csBundle(0).ldest := FP_TMP_REG_MV.U
217          csBundle(0).fuType := FuType.i2f.U
218          csBundle(0).rfWen := false.B
219          csBundle(0).fpWen := true.B
220          csBundle(0).vecWen := false.B
221          csBundle(0).fpu.isAddSub := false.B
222          csBundle(0).fpu.typeTagIn := FPU.D
223          csBundle(0).fpu.typeTagOut := FPU.D
224          csBundle(0).fpu.fromInt := true.B
225          csBundle(0).fpu.wflags := false.B
226          csBundle(0).fpu.fpWen := true.B
227          csBundle(0).fpu.div := false.B
228          csBundle(0).fpu.sqrt := false.B
229          csBundle(0).fpu.fcvt := false.B
230          csBundle(0).flushPipe := false.B
231          csBundle(1).fuType := FuType.vsetfwf.U
232          csBundle(1).srcType(0) := SrcType.vp
233          csBundle(1).lsrc(0) := VCONFIG_IDX.U
234          csBundle(1).srcType(1) := SrcType.fp
235          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
236          csBundle(1).ldest := VCONFIG_IDX.U
237        }
238      }
239    }
240    is(UopSplitType.VEC_VVV) {
241      for (i <- 0 until MAX_VLMUL) {
242        csBundle(i).lsrc(0) := src1 + i.U
243        csBundle(i).lsrc(1) := src2 + i.U
244        csBundle(i).lsrc(2) := dest + i.U
245        csBundle(i).ldest := dest + i.U
246        csBundle(i).uopIdx := i.U
247      }
248    }
249    is(UopSplitType.VEC_VFV) {
250      for (i <- 0 until MAX_VLMUL) {
251        csBundle(i).lsrc(1) := src2 + i.U
252        csBundle(i).lsrc(2) := dest + i.U
253        csBundle(i).ldest := dest + i.U
254        csBundle(i).uopIdx := i.U
255      }
256    }
257    is(UopSplitType.VEC_EXT2) {
258      for (i <- 0 until MAX_VLMUL / 2) {
259        csBundle(2 * i).lsrc(1) := src2 + i.U
260        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
261        csBundle(2 * i).ldest := dest + (2 * i).U
262        csBundle(2 * i).uopIdx := (2 * i).U
263        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
264        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
265        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
266        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
267      }
268    }
269    is(UopSplitType.VEC_EXT4) {
270      for (i <- 0 until MAX_VLMUL / 4) {
271        csBundle(4 * i).lsrc(1) := src2 + i.U
272        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
273        csBundle(4 * i).ldest := dest + (4 * i).U
274        csBundle(4 * i).uopIdx := (4 * i).U
275        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
276        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
277        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
278        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
279        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
280        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
281        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
282        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
283        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
284        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
285        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
286        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
287      }
288    }
289    is(UopSplitType.VEC_EXT8) {
290      for (i <- 0 until MAX_VLMUL) {
291        csBundle(i).lsrc(1) := src2
292        csBundle(i).lsrc(2) := dest + i.U
293        csBundle(i).ldest := dest + i.U
294        csBundle(i).uopIdx := i.U
295      }
296    }
297    is(UopSplitType.VEC_0XV) {
298      /*
299      FMV.D.X
300       */
301      csBundle(0).srcType(0) := SrcType.reg
302      csBundle(0).srcType(1) := SrcType.imm
303      csBundle(0).lsrc(1) := 0.U
304      csBundle(0).ldest := FP_TMP_REG_MV.U
305      csBundle(0).fuType := FuType.i2f.U
306      csBundle(0).rfWen := false.B
307      csBundle(0).fpWen := true.B
308      csBundle(0).vecWen := false.B
309      csBundle(0).fpu.isAddSub := false.B
310      csBundle(0).fpu.typeTagIn := FPU.D
311      csBundle(0).fpu.typeTagOut := FPU.D
312      csBundle(0).fpu.fromInt := true.B
313      csBundle(0).fpu.wflags := false.B
314      csBundle(0).fpu.fpWen := true.B
315      csBundle(0).fpu.div := false.B
316      csBundle(0).fpu.sqrt := false.B
317      csBundle(0).fpu.fcvt := false.B
318      /*
319      vfmv.s.f
320       */
321      csBundle(1).srcType(0) := SrcType.fp
322      csBundle(1).srcType(1) := SrcType.vp
323      csBundle(1).srcType(2) := SrcType.vp
324      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
325      csBundle(1).lsrc(1) := 0.U
326      csBundle(1).lsrc(2) := dest
327      csBundle(1).ldest := dest
328      csBundle(1).fuType := FuType.vppu.U
329      csBundle(1).fuOpType := VpermType.dummy
330      csBundle(1).rfWen := false.B
331      csBundle(1).fpWen := false.B
332      csBundle(1).vecWen := true.B
333    }
334    is(UopSplitType.VEC_VXV) {
335      /*
336      i to vector move
337       */
338      csBundle(0).srcType(0) := SrcType.reg
339      csBundle(0).srcType(1) := SrcType.imm
340      csBundle(0).lsrc(1) := 0.U
341      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
342      csBundle(0).fuType := FuType.i2v.U
343      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
344      csBundle(0).vecWen := true.B
345      /*
346      LMUL
347       */
348      for (i <- 0 until MAX_VLMUL) {
349        csBundle(i + 1).srcType(0) := SrcType.vp
350        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
351        csBundle(i + 1).lsrc(1) := src2 + i.U
352        csBundle(i + 1).lsrc(2) := dest + i.U
353        csBundle(i + 1).ldest := dest + i.U
354        csBundle(i + 1).uopIdx := i.U
355      }
356    }
357    is(UopSplitType.VEC_VVW) {
358      for (i <- 0 until MAX_VLMUL / 2) {
359        csBundle(2 * i).lsrc(0) := src1 + i.U
360        csBundle(2 * i).lsrc(1) := src2 + i.U
361        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
362        csBundle(2 * i).ldest := dest + (2 * i).U
363        csBundle(2 * i).uopIdx := (2 * i).U
364        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
365        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
366        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
367        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
368        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
369      }
370    }
371    is(UopSplitType.VEC_VFW) {
372      for (i <- 0 until MAX_VLMUL / 2) {
373        csBundle(2 * i).lsrc(0) := src1
374        csBundle(2 * i).lsrc(1) := src2 + i.U
375        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
376        csBundle(2 * i).ldest := dest + (2 * i).U
377        csBundle(2 * i).uopIdx := (2 * i).U
378        csBundle(2 * i + 1).lsrc(0) := src1
379        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
380        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
381        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
382        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
383      }
384    }
385    is(UopSplitType.VEC_WVW) {
386      for (i <- 0 until MAX_VLMUL / 2) {
387        csBundle(2 * i).lsrc(0) := src1 + i.U
388        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
389        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
390        csBundle(2 * i).ldest := dest + (2 * i).U
391        csBundle(2 * i).uopIdx := (2 * i).U
392        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
393        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
394        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
395        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
396        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
397      }
398    }
399    is(UopSplitType.VEC_VXW) {
400      /*
401      i to vector move
402       */
403      csBundle(0).srcType(0) := SrcType.reg
404      csBundle(0).srcType(1) := SrcType.imm
405      csBundle(0).lsrc(1) := 0.U
406      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
407      csBundle(0).fuType := FuType.i2v.U
408      csBundle(0).fuOpType := vsewReg
409      csBundle(0).vecWen := true.B
410
411      for (i <- 0 until MAX_VLMUL / 2) {
412        csBundle(2 * i + 1).srcType(0) := SrcType.vp
413        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
414        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
415        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
416        csBundle(2 * i + 1).ldest := dest + (2 * i).U
417        csBundle(2 * i + 1).uopIdx := (2 * i).U
418        csBundle(2 * i + 2).srcType(0) := SrcType.vp
419        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
420        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
421        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
422        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
423        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
424      }
425    }
426    is(UopSplitType.VEC_WXW) {
427      /*
428      i to vector move
429       */
430      csBundle(0).srcType(0) := SrcType.reg
431      csBundle(0).srcType(1) := SrcType.imm
432      csBundle(0).lsrc(1) := 0.U
433      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
434      csBundle(0).fuType := FuType.i2v.U
435      csBundle(0).fuOpType := vsewReg
436      csBundle(0).vecWen := true.B
437
438      for (i <- 0 until MAX_VLMUL / 2) {
439        csBundle(2 * i + 1).srcType(0) := SrcType.vp
440        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
441        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
442        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
443        csBundle(2 * i + 1).ldest := dest + (2 * i).U
444        csBundle(2 * i + 1).uopIdx := (2 * i).U
445        csBundle(2 * i + 2).srcType(0) := SrcType.vp
446        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
447        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
448        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
449        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
450        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
451      }
452    }
453    is(UopSplitType.VEC_WVV) {
454      for (i <- 0 until MAX_VLMUL / 2) {
455
456        csBundle(2 * i).lsrc(0) := src1 + i.U
457        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
458        csBundle(2 * i).lsrc(2) := dest + i.U
459        csBundle(2 * i).ldest := dest + i.U
460        csBundle(2 * i).uopIdx := (2 * i).U
461        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
462        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
463        csBundle(2 * i + 1).lsrc(2) := dest + i.U
464        csBundle(2 * i + 1).ldest := dest + i.U
465        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
466      }
467    }
468    is(UopSplitType.VEC_WFW) {
469      for (i <- 0 until MAX_VLMUL / 2) {
470        csBundle(2 * i).lsrc(0) := src1
471        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
472        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
473        csBundle(2 * i).ldest := dest + (2 * i).U
474        csBundle(2 * i).uopIdx := (2 * i).U
475        csBundle(2 * i + 1).lsrc(0) := src1
476        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
477        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
478        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
479        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
480      }
481    }
482    is(UopSplitType.VEC_WXV) {
483      /*
484      i to vector move
485       */
486      csBundle(0).srcType(0) := SrcType.reg
487      csBundle(0).srcType(1) := SrcType.imm
488      csBundle(0).lsrc(1) := 0.U
489      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
490      csBundle(0).fuType := FuType.i2v.U
491      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
492      csBundle(0).vecWen := true.B
493
494      for (i <- 0 until MAX_VLMUL / 2) {
495        csBundle(2 * i + 1).srcType(0) := SrcType.vp
496        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
497        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
498        csBundle(2 * i + 1).lsrc(2) := dest + i.U
499        csBundle(2 * i + 1).ldest := dest + i.U
500        csBundle(2 * i + 1).uopIdx := (2 * i).U
501        csBundle(2 * i + 2).srcType(0) := SrcType.vp
502        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
503        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
504        csBundle(2 * i + 2).lsrc(2) := dest + i.U
505        csBundle(2 * i + 2).ldest := dest + i.U
506        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
507      }
508    }
509    is(UopSplitType.VEC_VVM) {
510      csBundle(0).lsrc(2) := dest
511      csBundle(0).ldest := dest
512      csBundle(0).uopIdx := 0.U
513      for (i <- 1 until MAX_VLMUL) {
514        csBundle(i).lsrc(0) := src1 + i.U
515        csBundle(i).lsrc(1) := src2 + i.U
516        csBundle(i).lsrc(2) := dest
517        csBundle(i).ldest := dest
518        csBundle(i).uopIdx := i.U
519      }
520    }
521    is(UopSplitType.VEC_VFM) {
522      csBundle(0).lsrc(2) := dest
523      csBundle(0).ldest := dest
524      csBundle(0).uopIdx := 0.U
525      for (i <- 1 until MAX_VLMUL) {
526        csBundle(i).lsrc(0) := src1
527        csBundle(i).lsrc(1) := src2 + i.U
528        csBundle(i).lsrc(2) := dest
529        csBundle(i).ldest := dest
530        csBundle(i).uopIdx := i.U
531      }
532      csBundle(numOfUop - 1.U).ldest := dest
533    }
534    is(UopSplitType.VEC_VXM) {
535      /*
536      i to vector move
537       */
538      csBundle(0).srcType(0) := SrcType.reg
539      csBundle(0).srcType(1) := SrcType.imm
540      csBundle(0).lsrc(1) := 0.U
541      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
542      csBundle(0).fuType := FuType.i2v.U
543      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
544      csBundle(0).vecWen := true.B
545      //LMUL
546      csBundle(1).srcType(0) := SrcType.vp
547      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
548      csBundle(1).lsrc(2) := dest
549      csBundle(1).ldest := dest
550      csBundle(1).uopIdx := 0.U
551      for (i <- 1 until MAX_VLMUL) {
552        csBundle(i + 1).srcType(0) := SrcType.vp
553        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
554        csBundle(i + 1).lsrc(1) := src2 + i.U
555        csBundle(i + 1).lsrc(2) := dest
556        csBundle(i + 1).ldest := dest
557        csBundle(i + 1).uopIdx := i.U
558      }
559      csBundle(numOfUop - 1.U).ldest := dest
560    }
561    is(UopSplitType.VEC_SLIDE1UP) {
562      /*
563      i to vector move
564       */
565      csBundle(0).srcType(0) := SrcType.reg
566      csBundle(0).srcType(1) := SrcType.imm
567      csBundle(0).lsrc(1) := 0.U
568      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
569      csBundle(0).fuType := FuType.i2v.U
570      csBundle(0).fuOpType := vsewReg
571      csBundle(0).vecWen := true.B
572      //LMUL
573      csBundle(1).srcType(0) := SrcType.vp
574      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
575      csBundle(1).lsrc(2) := dest
576      csBundle(1).ldest := dest
577      csBundle(1).uopIdx := 0.U
578      for (i <- 1 until MAX_VLMUL) {
579        csBundle(i + 1).srcType(0) := SrcType.vp
580        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
581        csBundle(i + 1).lsrc(1) := src2 + i.U
582        csBundle(i + 1).lsrc(2) := dest + i.U
583        csBundle(i + 1).ldest := dest + i.U
584        csBundle(i + 1).uopIdx := i.U
585      }
586    }
587    is(UopSplitType.VEC_FSLIDE1UP) {
588      //LMUL
589      csBundle(0).srcType(0) := SrcType.fp
590      csBundle(0).lsrc(0) := src1
591      csBundle(0).lsrc(1) := src2
592      csBundle(0).lsrc(2) := dest
593      csBundle(0).ldest := dest
594      csBundle(0).uopIdx := 0.U
595      for (i <- 1 until MAX_VLMUL) {
596        csBundle(i).srcType(0) := SrcType.vp
597        csBundle(i).lsrc(0) := src2 + (i - 1).U
598        csBundle(i).lsrc(1) := src2 + i.U
599        csBundle(i).lsrc(2) := dest + i.U
600        csBundle(i).ldest := dest + i.U
601        csBundle(i).uopIdx := i.U
602      }
603    }
604    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
605      /*
606      i to vector move
607       */
608      csBundle(0).srcType(0) := SrcType.reg
609      csBundle(0).srcType(1) := SrcType.imm
610      csBundle(0).lsrc(1) := 0.U
611      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
612      csBundle(0).fuType := FuType.i2v.U
613      csBundle(0).fuOpType := vsewReg
614      csBundle(0).vecWen := true.B
615      //LMUL
616      for (i <- 0 until MAX_VLMUL) {
617        csBundle(2 * i + 1).srcType(0) := SrcType.vp
618        csBundle(2 * i + 1).srcType(1) := SrcType.vp
619        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
620        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
621        csBundle(2 * i + 1).lsrc(2) := dest + i.U
622        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
623        csBundle(2 * i + 1).uopIdx := (2 * i).U
624        if (2 * i + 2 < MAX_VLMUL * 2) {
625          csBundle(2 * i + 2).srcType(0) := SrcType.vp
626          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
627          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
628          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
629          csBundle(2 * i + 2).ldest := dest + i.U
630          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
631        }
632      }
633      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
634      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
635      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
636    }
637    is(UopSplitType.VEC_FSLIDE1DOWN) {
638      //LMUL
639      for (i <- 0 until MAX_VLMUL) {
640        csBundle(2 * i).srcType(0) := SrcType.vp
641        csBundle(2 * i).srcType(1) := SrcType.vp
642        csBundle(2 * i).lsrc(0) := src2 + (i + 1).U
643        csBundle(2 * i).lsrc(1) := src2 + i.U
644        csBundle(2 * i).lsrc(2) := dest + i.U
645        csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U
646        csBundle(2 * i).uopIdx := (2 * i).U
647        csBundle(2 * i + 1).srcType(0) := SrcType.fp
648        csBundle(2 * i + 1).lsrc(0) := src1
649        csBundle(2 * i + 1).lsrc(2) := VECTOR_TMP_REG_LMUL.U
650        csBundle(2 * i + 1).ldest := dest + i.U
651        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
652      }
653      csBundle(numOfUop - 1.U).srcType(0) := SrcType.fp
654      csBundle(numOfUop - 1.U).lsrc(0) := src1
655      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
656    }
657    is(UopSplitType.VEC_VRED) {
658      when(vlmulReg === "b001".U) {
659        csBundle(0).srcType(2) := SrcType.DC
660        csBundle(0).lsrc(0) := src2 + 1.U
661        csBundle(0).lsrc(1) := src2
662        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
663        csBundle(0).uopIdx := 0.U
664      }
665      when(vlmulReg === "b010".U) {
666        csBundle(0).srcType(2) := SrcType.DC
667        csBundle(0).lsrc(0) := src2 + 1.U
668        csBundle(0).lsrc(1) := src2
669        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
670        csBundle(0).uopIdx := 0.U
671
672        csBundle(1).srcType(2) := SrcType.DC
673        csBundle(1).lsrc(0) := src2 + 3.U
674        csBundle(1).lsrc(1) := src2 + 2.U
675        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
676        csBundle(1).uopIdx := 1.U
677
678        csBundle(2).srcType(2) := SrcType.DC
679        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
680        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
681        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
682        csBundle(2).uopIdx := 2.U
683      }
684      when(vlmulReg === "b011".U) {
685        for (i <- 0 until MAX_VLMUL) {
686          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
687            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
688            csBundle(i).lsrc(1) := src2 + (i * 2).U
689            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
690          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
691            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
692            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
693            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
694          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
695            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
696            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
697            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
698          }
699          csBundle(i).srcType(2) := SrcType.DC
700          csBundle(i).uopIdx := i.U
701        }
702      }
703      when(vlmulReg.orR) {
704        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
705        csBundle(numOfUop - 1.U).lsrc(0) := src1
706        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
707        csBundle(numOfUop - 1.U).lsrc(2) := dest
708        csBundle(numOfUop - 1.U).ldest := dest
709        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
710      }
711    }
712    is(UopSplitType.VEC_VFRED) {
713      val vlmul = vlmulReg
714      val vsew = vsewReg
715      when(vlmul === VLmul.m8){
716        for (i <- 0 until 4) {
717          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
718          csBundle(i).lsrc(1) := src2 + (i * 2).U
719          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
720          csBundle(i).uopIdx := i.U
721        }
722        for (i <- 4 until 6) {
723          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
724          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
725          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
726          csBundle(i).uopIdx := i.U
727        }
728        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
729        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
730        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
731        csBundle(6).uopIdx := 6.U
732        when(vsew === VSew.e64) {
733          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
734          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
735          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
736          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
737          csBundle(7).uopIdx := 7.U
738          csBundle(8).lsrc(0) := src1
739          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
740          csBundle(8).ldest := dest
741          csBundle(8).uopIdx := 8.U
742        }
743        when(vsew === VSew.e32) {
744          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
745          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
746          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
747          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
748          csBundle(7).uopIdx := 7.U
749          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
750          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
751          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
752          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
753          csBundle(8).uopIdx := 8.U
754          csBundle(9).lsrc(0) := src1
755          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
756          csBundle(9).ldest := dest
757          csBundle(9).uopIdx := 9.U
758        }
759        when(vsew === VSew.e16) {
760          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
761          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
762          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
763          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
764          csBundle(7).uopIdx := 7.U
765          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
766          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
767          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
768          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
769          csBundle(8).uopIdx := 8.U
770          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
771          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
772          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
773          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
774          csBundle(9).uopIdx := 9.U
775          csBundle(10).lsrc(0) := src1
776          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
777          csBundle(10).ldest := dest
778          csBundle(10).uopIdx := 10.U
779        }
780      }
781      when(vlmul === VLmul.m4) {
782        for (i <- 0 until 2) {
783          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
784          csBundle(i).lsrc(1) := src2 + (i * 2).U
785          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
786          csBundle(i).uopIdx := i.U
787        }
788        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
789        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
790        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
791        csBundle(2).uopIdx := 2.U
792        when(vsew === VSew.e64) {
793          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
794          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
795          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
796          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
797          csBundle(3).uopIdx := 3.U
798          csBundle(4).lsrc(0) := src1
799          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
800          csBundle(4).ldest := dest
801          csBundle(4).uopIdx := 4.U
802        }
803        when(vsew === VSew.e32) {
804          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
805          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
806          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
807          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
808          csBundle(3).uopIdx := 3.U
809          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
810          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
811          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
812          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
813          csBundle(4).uopIdx := 4.U
814          csBundle(5).lsrc(0) := src1
815          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
816          csBundle(5).ldest := dest
817          csBundle(5).uopIdx := 5.U
818        }
819        when(vsew === VSew.e16) {
820          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
821          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
822          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
823          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
824          csBundle(3).uopIdx := 3.U
825          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
826          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
827          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
828          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
829          csBundle(4).uopIdx := 4.U
830          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
831          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
832          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
833          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
834          csBundle(5).uopIdx := 5.U
835          csBundle(6).lsrc(0) := src1
836          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
837          csBundle(6).ldest := dest
838          csBundle(6).uopIdx := 6.U
839        }
840      }
841      when(vlmul === VLmul.m2) {
842        csBundle(0).lsrc(0) := src2 + 1.U
843        csBundle(0).lsrc(1) := src2 + 0.U
844        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
845        csBundle(0).uopIdx := 0.U
846        when(vsew === VSew.e64) {
847          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
848          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
849          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
850          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
851          csBundle(1).uopIdx := 1.U
852          csBundle(2).lsrc(0) := src1
853          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
854          csBundle(2).ldest := dest
855          csBundle(2).uopIdx := 2.U
856        }
857        when(vsew === VSew.e32) {
858          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
859          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
860          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
861          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
862          csBundle(1).uopIdx := 1.U
863          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
864          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
865          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
866          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
867          csBundle(2).uopIdx := 2.U
868          csBundle(3).lsrc(0) := src1
869          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
870          csBundle(3).ldest := dest
871          csBundle(3).uopIdx := 3.U
872        }
873        when(vsew === VSew.e16) {
874          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
875          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
876          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
877          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
878          csBundle(1).uopIdx := 1.U
879          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
880          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
881          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
882          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
883          csBundle(2).uopIdx := 2.U
884          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
885          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
886          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
887          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
888          csBundle(3).uopIdx := 3.U
889          csBundle(4).lsrc(0) := src1
890          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
891          csBundle(4).ldest := dest
892          csBundle(4).uopIdx := 4.U
893        }
894      }
895      when(vlmul === VLmul.m1) {
896        when(vsew === VSew.e64) {
897          csBundle(0).lsrc(0) := src2
898          csBundle(0).lsrc(1) := src2
899          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
900          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
901          csBundle(0).uopIdx := 0.U
902          csBundle(1).lsrc(0) := src1
903          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
904          csBundle(1).ldest := dest
905          csBundle(1).uopIdx := 1.U
906        }
907        when(vsew === VSew.e32) {
908          csBundle(0).lsrc(0) := src2
909          csBundle(0).lsrc(1) := src2
910          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
911          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
912          csBundle(0).uopIdx := 0.U
913          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
914          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
915          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
916          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
917          csBundle(1).uopIdx := 1.U
918          csBundle(2).lsrc(0) := src1
919          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
920          csBundle(2).ldest := dest
921          csBundle(2).uopIdx := 2.U
922        }
923        when(vsew === VSew.e16) {
924          csBundle(0).lsrc(0) := src2
925          csBundle(0).lsrc(1) := src2
926          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
927          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
928          csBundle(0).uopIdx := 0.U
929          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
930          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
931          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
932          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
933          csBundle(1).uopIdx := 1.U
934          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
935          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
936          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
937          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
938          csBundle(2).uopIdx := 2.U
939          csBundle(3).lsrc(0) := src1
940          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
941          csBundle(3).ldest := dest
942          csBundle(3).uopIdx := 3.U
943        }
944      }
945      when(vlmul === VLmul.mf2) {
946        when(vsew === VSew.e32) {
947          csBundle(0).lsrc(0) := src2
948          csBundle(0).lsrc(1) := src2
949          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
950          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
951          csBundle(0).uopIdx := 0.U
952          csBundle(1).lsrc(0) := src1
953          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
954          csBundle(1).ldest := dest
955          csBundle(1).uopIdx := 1.U
956        }
957        when(vsew === VSew.e16) {
958          csBundle(0).lsrc(0) := src2
959          csBundle(0).lsrc(1) := src2
960          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
961          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
962          csBundle(0).uopIdx := 0.U
963          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
964          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
965          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
966          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
967          csBundle(1).uopIdx := 1.U
968          csBundle(2).lsrc(0) := src1
969          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
970          csBundle(2).ldest := dest
971          csBundle(2).uopIdx := 2.U
972        }
973      }
974      when(vlmul === VLmul.mf4) {
975        when(vsew === VSew.e16) {
976          csBundle(0).lsrc(0) := src2
977          csBundle(0).lsrc(1) := src2
978          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
979          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
980          csBundle(0).uopIdx := 0.U
981          csBundle(1).lsrc(0) := src1
982          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
983          csBundle(1).ldest := dest
984          csBundle(1).uopIdx := 1.U
985        }
986      }
987    }
988
989    is(UopSplitType.VEC_VFREDOSUM) {
990      import yunsuan.VfaluType
991      val vlmul = vlmulReg
992      val vsew = vsewReg
993      val isWiden = decodedInstsSimple.fuOpType === VfaluType.vfwredosum
994      when(vlmul === VLmul.m8) {
995        when(vsew === VSew.e64) {
996          val vlmax = 16
997          for (i <- 0 until vlmax) {
998            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
999            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1000            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1001            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1002            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1003            csBundle(i).uopIdx := i.U
1004          }
1005        }
1006        when(vsew === VSew.e32) {
1007          val vlmax = 32
1008          for (i <- 0 until vlmax) {
1009            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1010            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1011            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1012            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1013            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1014            csBundle(i).uopIdx := i.U
1015          }
1016        }
1017        when(vsew === VSew.e16) {
1018          val vlmax = 64
1019          for (i <- 0 until vlmax) {
1020            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1021            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1022            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1023            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1024            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1025            csBundle(i).uopIdx := i.U
1026          }
1027        }
1028      }
1029      when(vlmul === VLmul.m4) {
1030        when(vsew === VSew.e64) {
1031          val vlmax = 8
1032          for (i <- 0 until vlmax) {
1033            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1034            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1035            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1036            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1037            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1038            csBundle(i).uopIdx := i.U
1039          }
1040        }
1041        when(vsew === VSew.e32) {
1042          val vlmax = 16
1043          for (i <- 0 until vlmax) {
1044            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1045            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1046            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1047            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1048            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1049            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1050            csBundle(i).uopIdx := i.U
1051          }
1052        }
1053        when(vsew === VSew.e16) {
1054          val vlmax = 32
1055          for (i <- 0 until vlmax) {
1056            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1057            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1058            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1059            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1060            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1061            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1062            csBundle(i).uopIdx := i.U
1063          }
1064        }
1065      }
1066      when(vlmul === VLmul.m2) {
1067        when(vsew === VSew.e64) {
1068          val vlmax = 4
1069          for (i <- 0 until vlmax) {
1070            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1071            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1072            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1073            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1074            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1075            csBundle(i).uopIdx := i.U
1076          }
1077        }
1078        when(vsew === VSew.e32) {
1079          val vlmax = 8
1080          for (i <- 0 until vlmax) {
1081            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1082            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1083            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1084            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1085            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1086            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1087            csBundle(i).uopIdx := i.U
1088          }
1089        }
1090        when(vsew === VSew.e16) {
1091          val vlmax = 16
1092          for (i <- 0 until vlmax) {
1093            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1094            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1095            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1096            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1097            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1098            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1099            csBundle(i).uopIdx := i.U
1100          }
1101        }
1102      }
1103      when(vlmul === VLmul.m1) {
1104        when(vsew === VSew.e64) {
1105          val vlmax = 2
1106          for (i <- 0 until vlmax) {
1107            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1108            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1109            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1110            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1111            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1112            csBundle(i).uopIdx := i.U
1113          }
1114        }
1115        when(vsew === VSew.e32) {
1116          val vlmax = 4
1117          for (i <- 0 until vlmax) {
1118            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1119            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1120            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1121            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1122            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1123            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1124            csBundle(i).uopIdx := i.U
1125          }
1126        }
1127        when(vsew === VSew.e16) {
1128          val vlmax = 8
1129          for (i <- 0 until vlmax) {
1130            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1131            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1132            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1133            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1134            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1135            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1136            csBundle(i).uopIdx := i.U
1137          }
1138        }
1139      }
1140      when(vlmul === VLmul.mf2) {
1141        when(vsew === VSew.e32) {
1142          val vlmax = 2
1143          for (i <- 0 until vlmax) {
1144            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1145            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1146            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1147            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1148            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1149            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1150            csBundle(i).uopIdx := i.U
1151          }
1152        }
1153        when(vsew === VSew.e16) {
1154          val vlmax = 4
1155          for (i <- 0 until vlmax) {
1156            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1157            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1158            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1159            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1160            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1161            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1162            csBundle(i).uopIdx := i.U
1163          }
1164        }
1165      }
1166      when(vlmul === VLmul.mf4) {
1167        when(vsew === VSew.e16) {
1168          val vlmax = 2
1169          for (i <- 0 until vlmax) {
1170            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1171            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1172            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1173            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1174            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1175            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1176            csBundle(i).uopIdx := i.U
1177          }
1178        }
1179      }
1180    }
1181
1182    is(UopSplitType.VEC_SLIDEUP) {
1183      // i to vector move
1184      csBundle(0).srcType(0) := SrcType.reg
1185      csBundle(0).srcType(1) := SrcType.imm
1186      csBundle(0).lsrc(1) := 0.U
1187      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1188      csBundle(0).fuType := FuType.i2v.U
1189      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
1190      csBundle(0).vecWen := true.B
1191      // LMUL
1192      for (i <- 0 until MAX_VLMUL)
1193        for (j <- 0 to i) {
1194          val old_vd = if (j == 0) {
1195            dest + i.U
1196          } else (VECTOR_TMP_REG_LMUL + j).U
1197          val vd = if (j == i) {
1198            dest + i.U
1199          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1200          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1201          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1202          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1203          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1204          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1205          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1206        }
1207    }
1208
1209    is(UopSplitType.VEC_SLIDEDOWN) {
1210      // i to vector move
1211      csBundle(0).srcType(0) := SrcType.reg
1212      csBundle(0).srcType(1) := SrcType.imm
1213      csBundle(0).lsrc(1) := 0.U
1214      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1215      csBundle(0).fuType := FuType.i2v.U
1216      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
1217      csBundle(0).vecWen := true.B
1218      // LMUL
1219      for (i <- 0 until MAX_VLMUL)
1220        for (j <- (0 to i).reverse) {
1221          when(i.U < lmul) {
1222            val old_vd = if (j == 0) {
1223              dest + lmul - 1.U - i.U
1224            } else (VECTOR_TMP_REG_LMUL + j).U
1225            val vd = if (j == i) {
1226              dest + lmul - 1.U - i.U
1227            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1228            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1229            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1230            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1231            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1232            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1233            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1234          }
1235        }
1236    }
1237
1238    is(UopSplitType.VEC_M0X) {
1239      // LMUL
1240      for (i <- 0 until MAX_VLMUL) {
1241        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1242        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1243        csBundle(i).srcType(0) := srcType0
1244        csBundle(i).srcType(1) := SrcType.vp
1245        csBundle(i).rfWen := false.B
1246        csBundle(i).vecWen := true.B
1247        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1248        csBundle(i).lsrc(1) := src2
1249        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1250        csBundle(i).ldest := ldest
1251        csBundle(i).uopIdx := i.U
1252      }
1253      csBundle(lmul - 1.U).vecWen := false.B
1254      csBundle(lmul - 1.U).fpWen := true.B
1255      csBundle(lmul - 1.U).ldest := FP_TMP_REG_MV.U
1256      // FMV_X_D
1257      csBundle(lmul).srcType(0) := SrcType.fp
1258      csBundle(lmul).srcType(1) := SrcType.imm
1259      csBundle(lmul).lsrc(0) := FP_TMP_REG_MV.U
1260      csBundle(lmul).lsrc(1) := 0.U
1261      csBundle(lmul).ldest := dest
1262      csBundle(lmul).fuType := FuType.fmisc.U
1263      csBundle(lmul).rfWen := true.B
1264      csBundle(lmul).fpWen := false.B
1265      csBundle(lmul).vecWen := false.B
1266      csBundle(lmul).fpu.isAddSub := false.B
1267      csBundle(lmul).fpu.typeTagIn := FPU.D
1268      csBundle(lmul).fpu.typeTagOut := FPU.D
1269      csBundle(lmul).fpu.fromInt := false.B
1270      csBundle(lmul).fpu.wflags := false.B
1271      csBundle(lmul).fpu.fpWen := false.B
1272      csBundle(lmul).fpu.div := false.B
1273      csBundle(lmul).fpu.sqrt := false.B
1274      csBundle(lmul).fpu.fcvt := false.B
1275    }
1276
1277    is(UopSplitType.VEC_MVV) {
1278      // LMUL
1279      for (i <- 0 until MAX_VLMUL) {
1280        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1281        csBundle(i * 2 + 0).srcType(0) := srcType0
1282        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1283        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1284        csBundle(i * 2 + 0).lsrc(1) := src2
1285        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1286        csBundle(i * 2 + 0).ldest := dest + i.U
1287        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1288
1289        csBundle(i * 2 + 1).srcType(0) := srcType0
1290        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1291        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1292        csBundle(i * 2 + 1).lsrc(1) := src2
1293        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1294        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1295        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1296      }
1297    }
1298
1299    is(UopSplitType.VEC_M0X_VFIRST) {
1300      // LMUL
1301      csBundle(0).rfWen := false.B
1302      csBundle(0).fpWen := true.B
1303      csBundle(0).ldest := FP_TMP_REG_MV.U
1304      // FMV_X_D
1305      csBundle(1).srcType(0) := SrcType.fp
1306      csBundle(1).srcType(1) := SrcType.imm
1307      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
1308      csBundle(1).lsrc(1) := 0.U
1309      csBundle(1).ldest := dest
1310      csBundle(1).fuType := FuType.fmisc.U
1311      csBundle(1).rfWen := true.B
1312      csBundle(1).fpWen := false.B
1313      csBundle(1).vecWen := false.B
1314      csBundle(1).fpu.isAddSub := false.B
1315      csBundle(1).fpu.typeTagIn := FPU.D
1316      csBundle(1).fpu.typeTagOut := FPU.D
1317      csBundle(1).fpu.fromInt := false.B
1318      csBundle(1).fpu.wflags := false.B
1319      csBundle(1).fpu.fpWen := false.B
1320      csBundle(1).fpu.div := false.B
1321      csBundle(1).fpu.sqrt := false.B
1322      csBundle(1).fpu.fcvt := false.B
1323    }
1324    is(UopSplitType.VEC_VWW) {
1325      for (i <- 0 until MAX_VLMUL*2) {
1326        when(i.U < lmul){
1327          csBundle(i).srcType(2) := SrcType.DC
1328          csBundle(i).lsrc(0) := src2 + i.U
1329          csBundle(i).lsrc(1) := src2 + i.U
1330          // csBundle(i).lsrc(2) := dest + (2 * i).U
1331          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1332          csBundle(i).uopIdx :=  i.U
1333        } otherwise {
1334          csBundle(i).srcType(2) := SrcType.DC
1335          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1336          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1337          // csBundle(i).lsrc(2) := dest + (2 * i).U
1338          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1339          csBundle(i).uopIdx := i.U
1340        }
1341        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1342        csBundle(numOfUop-1.U).lsrc(0) := src1
1343        csBundle(numOfUop-1.U).lsrc(2) := dest
1344        csBundle(numOfUop-1.U).ldest := dest
1345      }
1346    }
1347    is(UopSplitType.VEC_RGATHER) {
1348      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1349        for (i <- 0 until len)
1350          for (j <- 0 until len) {
1351            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1352            // csBundle(i * len + j).srcType(1) := SrcType.vp
1353            // csBundle(i * len + j).srcType(2) := SrcType.vp
1354            csBundle(i * len + j).lsrc(0) := src1 + i.U
1355            csBundle(i * len + j).lsrc(1) := src2 + j.U
1356            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1357            csBundle(i * len + j).lsrc(2) := vd_old
1358            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1359            csBundle(i * len + j).ldest := vd
1360            csBundle(i * len + j).uopIdx := (i * len + j).U
1361          }
1362      }
1363      switch(vlmulReg) {
1364        is("b001".U ){
1365          genCsBundle_VEC_RGATHER(2)
1366        }
1367        is("b010".U ){
1368          genCsBundle_VEC_RGATHER(4)
1369        }
1370        is("b011".U ){
1371          genCsBundle_VEC_RGATHER(8)
1372        }
1373      }
1374    }
1375    is(UopSplitType.VEC_RGATHER_VX) {
1376      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1377        for (i <- 0 until len)
1378          for (j <- 0 until len) {
1379            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1380            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1381            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1382            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1383            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1384            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1385            csBundle(i * len + j + 1).lsrc(2) := vd_old
1386            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1387            csBundle(i * len + j + 1).ldest := vd
1388            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1389          }
1390      }
1391      // i to vector move
1392      csBundle(0).srcType(0) := SrcType.reg
1393      csBundle(0).srcType(1) := SrcType.imm
1394      csBundle(0).lsrc(1) := 0.U
1395      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1396      csBundle(0).fuType := FuType.i2v.U
1397      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.permImm2vector(2, 0), IF2VectorType.i2vector(2, 0)), vsewReg)
1398      csBundle(0).vecWen := true.B
1399      switch(vlmulReg) {
1400        is("b000".U ){
1401          genCsBundle_RGATHER_VX(1)
1402        }
1403        is("b001".U ){
1404          genCsBundle_RGATHER_VX(2)
1405        }
1406        is("b010".U ){
1407          genCsBundle_RGATHER_VX(4)
1408        }
1409        is("b011".U ){
1410          genCsBundle_RGATHER_VX(8)
1411        }
1412      }
1413    }
1414    is(UopSplitType.VEC_RGATHEREI16) {
1415      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1416        for (i <- 0 until len)
1417          for (j <- 0 until len) {
1418            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1419            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1420            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1421            // csBundle(i * len + j).srcType(1) := SrcType.vp
1422            // csBundle(i * len + j).srcType(2) := SrcType.vp
1423            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1424            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1425            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1426            csBundle((i * len + j)*2+0).ldest := vd0
1427            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1428            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1429            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1430            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1431            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1432            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1433            csBundle((i * len + j)*2+1).ldest := vd1
1434            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1435          }
1436      }
1437      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1438        for (i <- 0 until len)
1439          for (j <- 0 until len) {
1440            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1441            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1442            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1443            // csBundle(i * len + j).srcType(1) := SrcType.vp
1444            // csBundle(i * len + j).srcType(2) := SrcType.vp
1445            csBundle(i * len + j).lsrc(0) := src1 + i.U
1446            csBundle(i * len + j).lsrc(1) := src2 + j.U
1447            csBundle(i * len + j).lsrc(2) := vd_old
1448            csBundle(i * len + j).ldest := vd
1449            csBundle(i * len + j).uopIdx := (i * len + j).U
1450          }
1451      }
1452      switch(vlmulReg) {
1453        is("b000".U ){
1454          when(!vsewReg.orR){
1455            genCsBundle_VEC_RGATHEREI16_SEW8(1)
1456          } .otherwise{
1457            genCsBundle_VEC_RGATHEREI16(1)
1458          }
1459        }
1460        is("b001".U) {
1461          when(!vsewReg.orR) {
1462            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1463          }.otherwise {
1464            genCsBundle_VEC_RGATHEREI16(2)
1465          }
1466        }
1467        is("b010".U) {
1468          when(!vsewReg.orR) {
1469            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1470          }.otherwise {
1471            genCsBundle_VEC_RGATHEREI16(4)
1472          }
1473        }
1474        is("b011".U) {
1475          genCsBundle_VEC_RGATHEREI16(8)
1476        }
1477      }
1478    }
1479    is(UopSplitType.VEC_COMPRESS) {
1480      def genCsBundle_VEC_COMPRESS(len:Int): Unit ={
1481        for (i <- 0 until len){
1482          val jlen = if (i == len-1) i+1 else i+2
1483          for (j <- 0 until jlen) {
1484            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1485            val vd = if(i==len-1) (dest + j.U) else{
1486              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1487            }
1488            val src23Type = if (j == i+1) DontCare else SrcType.vp
1489            csBundle(i*(i+3)/2 + j).srcType(0) := SrcType.vp
1490            csBundle(i*(i+3)/2 + j).srcType(1) := src23Type
1491            csBundle(i*(i+3)/2 + j).srcType(2) := src23Type
1492            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1493            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1494            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1495            // csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1496            csBundle(i*(i+3)/2 + j).ldest := vd
1497            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1498          }
1499        }
1500      }
1501      switch(vlmulReg) {
1502        is("b001".U ){
1503          genCsBundle_VEC_COMPRESS(2)
1504        }
1505        is("b010".U ){
1506          genCsBundle_VEC_COMPRESS(4)
1507        }
1508        is("b011".U ){
1509          genCsBundle_VEC_COMPRESS(8)
1510        }
1511      }
1512    }
1513    is(UopSplitType.VEC_MVNR) {
1514      for (i <- 0 until MAX_VLMUL) {
1515        csBundle(i).lsrc(0) := src1 + i.U
1516        csBundle(i).lsrc(1) := src2 + i.U
1517        csBundle(i).lsrc(2) := dest + i.U
1518        csBundle(i).ldest := dest + i.U
1519        csBundle(i).uopIdx := i.U
1520      }
1521    }
1522    is(UopSplitType.VEC_US_LDST) {
1523      /*
1524      FMV.D.X
1525       */
1526      csBundle(0).srcType(0) := SrcType.reg
1527      csBundle(0).srcType(1) := SrcType.imm
1528      csBundle(0).lsrc(1) := 0.U
1529      csBundle(0).ldest := FP_TMP_REG_MV.U
1530      csBundle(0).fuType := FuType.i2f.U
1531      csBundle(0).rfWen := false.B
1532      csBundle(0).fpWen := true.B
1533      csBundle(0).vecWen := false.B
1534      csBundle(0).fpu.isAddSub := false.B
1535      csBundle(0).fpu.typeTagIn := FPU.D
1536      csBundle(0).fpu.typeTagOut := FPU.D
1537      csBundle(0).fpu.fromInt := true.B
1538      csBundle(0).fpu.wflags := false.B
1539      csBundle(0).fpu.fpWen := true.B
1540      csBundle(0).fpu.div := false.B
1541      csBundle(0).fpu.sqrt := false.B
1542      csBundle(0).fpu.fcvt := false.B
1543      //LMUL
1544      for (i <- 0 until MAX_VLMUL) {
1545        csBundle(i + 1).srcType(0) := SrcType.fp
1546        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1547        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1548        csBundle(i + 1).ldest := dest + i.U
1549        csBundle(i + 1).uopIdx := i.U
1550      }
1551    }
1552    is(UopSplitType.VEC_S_LDST) {
1553      /*
1554      FMV.D.X
1555       */
1556      csBundle(0).srcType(0) := SrcType.reg
1557      csBundle(0).srcType(1) := SrcType.imm
1558      csBundle(0).lsrc(1) := 0.U
1559      csBundle(0).ldest := FP_TMP_REG_MV.U
1560      csBundle(0).fuType := FuType.i2f.U
1561      csBundle(0).rfWen := false.B
1562      csBundle(0).fpWen := true.B
1563      csBundle(0).vecWen := false.B
1564      csBundle(0).fpu.isAddSub := false.B
1565      csBundle(0).fpu.typeTagIn := FPU.D
1566      csBundle(0).fpu.typeTagOut := FPU.D
1567      csBundle(0).fpu.fromInt := true.B
1568      csBundle(0).fpu.wflags := false.B
1569      csBundle(0).fpu.fpWen := true.B
1570      csBundle(0).fpu.div := false.B
1571      csBundle(0).fpu.sqrt := false.B
1572      csBundle(0).fpu.fcvt := false.B
1573
1574      csBundle(1).srcType(0) := SrcType.reg
1575      csBundle(1).srcType(1) := SrcType.imm
1576      csBundle(1).lsrc(0) := decodedInstsSimple.lsrc(1)
1577      csBundle(1).lsrc(1) := 0.U
1578      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1579      csBundle(1).fuType := FuType.i2f.U
1580      csBundle(1).rfWen := false.B
1581      csBundle(1).fpWen := true.B
1582      csBundle(1).vecWen := false.B
1583      csBundle(1).fpu.isAddSub := false.B
1584      csBundle(1).fpu.typeTagIn := FPU.D
1585      csBundle(1).fpu.typeTagOut := FPU.D
1586      csBundle(1).fpu.fromInt := true.B
1587      csBundle(1).fpu.wflags := false.B
1588      csBundle(1).fpu.fpWen := true.B
1589      csBundle(1).fpu.div := false.B
1590      csBundle(1).fpu.sqrt := false.B
1591      csBundle(1).fpu.fcvt := false.B
1592
1593      //LMUL
1594      for (i <- 0 until MAX_VLMUL) {
1595        csBundle(i + 2).srcType(0) := SrcType.fp
1596        csBundle(i + 2).srcType(1) := SrcType.fp
1597        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1598        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1599        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1600        csBundle(i + 2).ldest := dest + i.U
1601        csBundle(i + 2).uopIdx := i.U
1602      }
1603    }
1604    is(UopSplitType.VEC_I_LDST) {
1605    /*
1606      FMV.D.X
1607       */
1608      val vlmul = vlmulReg
1609      val vsew = Cat(0.U(1.W), vsewReg)
1610      val veew = Cat(0.U(1.W), width)
1611      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1612      val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array(
1613        "b001".U -> 1.U,
1614        "b010".U -> 2.U,
1615        "b011".U -> 3.U
1616      ))
1617      val simple_emul = MuxLookup(vemul, 0.U(2.W), Array(
1618        "b001".U -> 1.U,
1619        "b010".U -> 2.U,
1620        "b011".U -> 3.U
1621      ))
1622      csBundle(0).srcType(0) := SrcType.reg
1623      csBundle(0).srcType(1) := SrcType.imm
1624      csBundle(0).lsrc(1) := 0.U
1625      csBundle(0).ldest := FP_TMP_REG_MV.U
1626      csBundle(0).fuType := FuType.i2f.U
1627      csBundle(0).rfWen := false.B
1628      csBundle(0).fpWen := true.B
1629      csBundle(0).vecWen := false.B
1630      csBundle(0).fpu.isAddSub := false.B
1631      csBundle(0).fpu.typeTagIn := FPU.D
1632      csBundle(0).fpu.typeTagOut := FPU.D
1633      csBundle(0).fpu.fromInt := true.B
1634      csBundle(0).fpu.wflags := false.B
1635      csBundle(0).fpu.fpWen := true.B
1636      csBundle(0).fpu.div := false.B
1637      csBundle(0).fpu.sqrt := false.B
1638      csBundle(0).fpu.fcvt := false.B
1639
1640      //LMUL
1641      for (i <- 0 until MAX_INDEXED_LS_UOPNUM) {
1642        indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul, nf)
1643        val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1644        val offsetVd = indexedLSRegOffset(i).outOffsetVd
1645        val isFirstUopInVd = indexedLSRegOffset(i).outIsFirstUopInVd
1646        csBundle(i + 1).srcType(0) := SrcType.fp
1647        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1648        csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1649        /**
1650          * For indexed instructions, VLSU will concatenate all the uops that write the same logic vd register and
1651          * writeback only once for all these uops. However, these uops share the same lsrc(2)/old vd and the same
1652          * ldest/vd that is equal to old vd, which leads to data dependence between the uops. Therefore there will be
1653          * deadlock for indexed instructions with emul > lmul.
1654          *
1655          * Assume N = emul/lmul. To break the deadlock, only the first uop will read old vd as lsrc(2), and the rest
1656          * N-1 uops will read temporary vector register.
1657          */
1658        // csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1659        csBundle(i + 1).lsrc(2) := Mux(
1660          isFirstUopInVd,
1661          Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U)),
1662          VECTOR_TMP_REG_LMUL.U
1663        )
1664        csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1665        csBundle(i + 1).uopIdx := i.U
1666      }
1667    }
1668  }
1669
1670  //uops dispatch
1671  val s_normal :: s_ext :: Nil = Enum(2)
1672  val state = RegInit(s_normal)
1673  val state_next = WireDefault(state)
1674  val uopRes = RegInit(0.U)
1675
1676  //readyFromRename Counter
1677  val readyCounter = PriorityMuxDefault(io.readyFromRename.map(x => !x).zip((0 to (RenameWidth - 1)).map(_.U)), RenameWidth.U)
1678
1679  switch(state) {
1680    is(s_normal) {
1681      state_next := Mux(io.validFromIBuf(0) && (numOfUop > readyCounter) && (readyCounter =/= 0.U), s_ext, s_normal)
1682    }
1683    is(s_ext) {
1684      state_next := Mux(io.validFromIBuf(0) && (uopRes > readyCounter), s_ext, s_normal)
1685    }
1686  }
1687
1688  state := state_next
1689
1690  val uopRes0 = Mux(state === s_normal, numOfUop, uopRes)
1691  val uopResJudge = Mux(state === s_normal,
1692    io.validFromIBuf(0) && (readyCounter =/= 0.U) && (uopRes0 > readyCounter),
1693    io.validFromIBuf(0) && (uopRes0 > readyCounter))
1694  uopRes := Mux(uopResJudge, uopRes0 - readyCounter, 0.U)
1695
1696  for(i <- 0 until RenameWidth) {
1697    decodedInsts(i) := MuxCase(csBundle(i), Seq(
1698      (state === s_normal) -> csBundle(i),
1699      (state === s_ext) -> Mux((i.U + numOfUop -uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1700    ).toSeq)
1701  }
1702
1703  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1704  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1705  val notInf = Wire(Vec(DecodeWidth, Bool()))
1706  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1707  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1708  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1709  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1710
1711  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1712    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1713    0.U)
1714  validToRename.zipWithIndex.foreach{
1715    case(dst, i) =>
1716      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1717      dst := MuxCase(false.B, Seq(
1718        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1719        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1720      ).toSeq)
1721  }
1722
1723  readyToIBuf.zipWithIndex.foreach {
1724    case (dst, i) =>
1725      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1726      dst := MuxCase(true.B, Seq(
1727        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1728        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1729      ).toSeq)
1730  }
1731
1732  io.deq.decodedInsts := decodedInsts
1733  io.deq.isVset := isVsetSimple
1734  io.deq.complexNum := complexNum
1735  io.deq.validToRename := validToRename
1736  io.deq.readyToIBuf := readyToIBuf
1737
1738}
1739