xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision e88008978ecb7073b9e8126282e0a0b913d9e6f7)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35
36import scala.collection.Seq
37
38trait VectorConstants {
39  val MAX_VLMUL = 8
40  val FP_TMP_REG_MV = 32
41  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
42}
43
44class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
45  val simple = new Bundle {
46    val decodedInst = Input(new DecodedInst)
47    val isComplex = Input(Bool())
48    val uopInfo = Input(new UopInfo)
49  }
50  val vtype = Input(new VType)
51  val in0pc = Input(UInt(VAddrBits.W))
52  val isComplex = Input(Vec(DecodeWidth, Bool()))
53  val validFromIBuf = Input(Vec(DecodeWidth, Bool()))
54  val readyFromRename = Input(Vec(RenameWidth, Bool()))
55  val deq = new Bundle {
56    val decodedInsts = Output(Vec(RenameWidth, new DecodedInst))
57    val isVset = Output(Bool())
58    val readyToIBuf = Output(Vec(DecodeWidth, Bool()))
59    val validToRename = Output(Vec(RenameWidth, Bool()))
60    val complexNum = Output(UInt(3.W))
61  }
62  val csrCtrl = Input(new CustomCSRCtrlIO)
63}
64
65/**
66  * @author zly
67  */
68class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
69  val io = IO(new DecodeUnitCompIO)
70
71  val maxUopSize = MaxUopSize
72  //input bits
73  private val inst: XSInstBitFields = io.simple.decodedInst.instr.asTypeOf(new XSInstBitFields)
74
75  val src1 = Cat(0.U(1.W), inst.RS1)
76  val src2 = Cat(0.U(1.W), inst.RS2)
77  val dest = Cat(0.U(1.W), inst.RD)
78
79
80  //output bits
81  val decodedInsts = Wire(Vec(RenameWidth, new DecodedInst))
82  val validToRename = Wire(Vec(RenameWidth, Bool()))
83  val readyToIBuf = Wire(Vec(DecodeWidth, Bool()))
84  val complexNum = Wire(UInt(3.W))
85
86  //output of DecodeUnit
87  val decodedInstsSimple = Wire(new DecodedInst)
88  val numOfUop = Wire(UInt(log2Up(maxUopSize+1).W))
89  val lmul = Wire(UInt(4.W))
90  val isVsetSimple = Wire(Bool())
91
92  //pre decode
93  decodedInstsSimple := io.simple.decodedInst
94  lmul := io.simple.uopInfo.lmul
95  isVsetSimple := io.simple.decodedInst.isVset
96  val vlmulReg = io.simple.decodedInst.vpu.vlmul
97  val vsewReg = io.simple.decodedInst.vpu.vsew
98  when(isVsetSimple) {
99    when(dest === 0.U && src1 === 0.U) {
100      decodedInstsSimple.fuOpType := VSETOpType.keepVl(io.simple.decodedInst.fuOpType)
101    }.elsewhen(src1 === 0.U) {
102      decodedInstsSimple.fuOpType := VSETOpType.setVlmax(io.simple.decodedInst.fuOpType)
103    }
104    when(io.vtype.illegal){
105      decodedInstsSimple.flushPipe := true.B
106    }
107  }
108  //Type of uop Div
109  val typeOfSplit = decodedInstsSimple.uopSplitType
110
111  when(typeOfSplit === UopSplitType.DIR) {
112    numOfUop := Mux(dest =/= 0.U, 2.U,
113      Mux(src1 =/= 0.U, 1.U,
114        Mux(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType), 2.U, 1.U)))
115  } .otherwise {
116    numOfUop := io.simple.uopInfo.numOfUop
117  }
118
119
120  //uop div up to maxUopSize
121  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
122  csBundle.map { case dst =>
123    dst := decodedInstsSimple
124    dst.firstUop := false.B
125    dst.lastUop := false.B
126  }
127
128  csBundle(0).numUops := numOfUop
129  csBundle(0).firstUop := true.B
130  csBundle(numOfUop - 1.U).lastUop := true.B
131
132  switch(typeOfSplit) {
133    is(UopSplitType.DIR) {
134      when(isVsetSimple) {
135        when(dest =/= 0.U) {
136          csBundle(0).fuType := FuType.vsetiwi.U
137          csBundle(0).fuOpType := VSETOpType.switchDest(decodedInstsSimple.fuOpType)
138          csBundle(0).flushPipe := false.B
139          csBundle(0).rfWen := true.B
140          csBundle(0).vecWen := false.B
141          csBundle(1).ldest := VCONFIG_IDX.U
142          csBundle(1).rfWen := false.B
143          csBundle(1).vecWen := true.B
144        }.elsewhen(src1 =/= 0.U) {
145          csBundle(0).ldest := VCONFIG_IDX.U
146        }.elsewhen(VSETOpType.isVsetvli(decodedInstsSimple.fuOpType)) {
147          csBundle(0).fuType := FuType.vsetfwf.U
148          csBundle(0).srcType(0) := SrcType.vp
149          csBundle(0).lsrc(0) := VCONFIG_IDX.U
150        }.elsewhen(VSETOpType.isVsetvl(decodedInstsSimple.fuOpType)) {
151          csBundle(0).srcType(0) := SrcType.reg
152          csBundle(0).srcType(1) := SrcType.imm
153          csBundle(0).lsrc(1) := 0.U
154          csBundle(0).ldest := FP_TMP_REG_MV.U
155          csBundle(0).fuType := FuType.i2f.U
156          csBundle(0).rfWen := false.B
157          csBundle(0).fpWen := true.B
158          csBundle(0).vecWen := false.B
159          csBundle(0).fpu.isAddSub := false.B
160          csBundle(0).fpu.typeTagIn := FPU.D
161          csBundle(0).fpu.typeTagOut := FPU.D
162          csBundle(0).fpu.fromInt := true.B
163          csBundle(0).fpu.wflags := false.B
164          csBundle(0).fpu.fpWen := true.B
165          csBundle(0).fpu.div := false.B
166          csBundle(0).fpu.sqrt := false.B
167          csBundle(0).fpu.fcvt := false.B
168          csBundle(0).flushPipe := false.B
169          csBundle(1).fuType := FuType.vsetfwf.U
170          csBundle(1).srcType(0) := SrcType.vp
171          csBundle(1).lsrc(0) := VCONFIG_IDX.U
172          csBundle(1).srcType(1) := SrcType.fp
173          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
174          csBundle(1).ldest := VCONFIG_IDX.U
175        }
176      }
177    }
178    is(UopSplitType.VEC_VVV) {
179      for (i <- 0 until MAX_VLMUL) {
180        csBundle(i).lsrc(0) := src1 + i.U
181        csBundle(i).lsrc(1) := src2 + i.U
182        csBundle(i).lsrc(2) := dest + i.U
183        csBundle(i).ldest := dest + i.U
184        csBundle(i).uopIdx := i.U
185      }
186    }
187    is(UopSplitType.VEC_VFV) {
188      for (i <- 0 until MAX_VLMUL) {
189        csBundle(i).lsrc(1) := src2 + i.U
190        csBundle(i).lsrc(2) := dest + i.U
191        csBundle(i).ldest := dest + i.U
192        csBundle(i).uopIdx := i.U
193      }
194    }
195    is(UopSplitType.VEC_EXT2) {
196      for (i <- 0 until MAX_VLMUL / 2) {
197        csBundle(2 * i).lsrc(1) := src2 + i.U
198        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
199        csBundle(2 * i).ldest := dest + (2 * i).U
200        csBundle(2 * i).uopIdx := (2 * i).U
201        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
202        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
203        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
204        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
205      }
206    }
207    is(UopSplitType.VEC_EXT4) {
208      for (i <- 0 until MAX_VLMUL / 4) {
209        csBundle(4 * i).lsrc(1) := src2 + i.U
210        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
211        csBundle(4 * i).ldest := dest + (4 * i).U
212        csBundle(4 * i).uopIdx := (4 * i).U
213        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
214        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
215        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
216        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
217        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
218        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
219        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
220        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
221        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
222        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
223        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
224        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
225      }
226    }
227    is(UopSplitType.VEC_EXT8) {
228      for (i <- 0 until MAX_VLMUL) {
229        csBundle(i).lsrc(1) := src2
230        csBundle(i).lsrc(2) := dest + i.U
231        csBundle(i).ldest := dest + i.U
232        csBundle(i).uopIdx := i.U
233      }
234    }
235    is(UopSplitType.VEC_0XV) {
236      /*
237      FMV.D.X
238       */
239      csBundle(0).srcType(0) := SrcType.reg
240      csBundle(0).srcType(1) := SrcType.imm
241      csBundle(0).lsrc(1) := 0.U
242      csBundle(0).ldest := FP_TMP_REG_MV.U
243      csBundle(0).fuType := FuType.i2f.U
244      csBundle(0).rfWen := false.B
245      csBundle(0).fpWen := true.B
246      csBundle(0).vecWen := false.B
247      csBundle(0).fpu.isAddSub := false.B
248      csBundle(0).fpu.typeTagIn := FPU.D
249      csBundle(0).fpu.typeTagOut := FPU.D
250      csBundle(0).fpu.fromInt := true.B
251      csBundle(0).fpu.wflags := false.B
252      csBundle(0).fpu.fpWen := true.B
253      csBundle(0).fpu.div := false.B
254      csBundle(0).fpu.sqrt := false.B
255      csBundle(0).fpu.fcvt := false.B
256      /*
257      vfmv.s.f
258       */
259      csBundle(1).srcType(0) := SrcType.fp
260      csBundle(1).srcType(1) := SrcType.vp
261      csBundle(1).srcType(2) := SrcType.vp
262      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
263      csBundle(1).lsrc(1) := 0.U
264      csBundle(1).lsrc(2) := dest
265      csBundle(1).ldest := dest
266      csBundle(1).fuType := FuType.vppu.U
267      csBundle(1).fuOpType := VpermType.dummy
268      csBundle(1).rfWen := false.B
269      csBundle(1).fpWen := false.B
270      csBundle(1).vecWen := true.B
271    }
272    is(UopSplitType.VEC_VXV) {
273      /*
274      FMV.D.X
275       */
276      csBundle(0).srcType(0) := SrcType.reg
277      csBundle(0).srcType(1) := SrcType.imm
278      csBundle(0).lsrc(1) := 0.U
279      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
280      csBundle(0).fuType := FuType.i2v.U
281      csBundle(0).fuOpType := vsewReg
282      csBundle(0).vecWen := true.B
283      /*
284      LMUL
285       */
286      for (i <- 0 until MAX_VLMUL) {
287        csBundle(i + 1).srcType(0) := SrcType.vp
288        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
289        csBundle(i + 1).lsrc(1) := src2 + i.U
290        csBundle(i + 1).lsrc(2) := dest + i.U
291        csBundle(i + 1).ldest := dest + i.U
292        csBundle(i + 1).uopIdx := i.U
293      }
294    }
295    is(UopSplitType.VEC_VIV) {
296      /*
297      FMV.D.X
298       */
299      csBundle(0).srcType(0) := SrcType.imm
300      csBundle(0).srcType(1) := SrcType.imm
301      csBundle(0).lsrc(0) := 0.U
302      csBundle(0).lsrc(1) := 0.U
303      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
304      csBundle(0).fuType := FuType.i2v.U
305      csBundle(0).fuOpType := vsewReg
306      csBundle(0).vecWen := true.B
307      /*
308      LMUL
309       */
310      for (i <- 0 until MAX_VLMUL) {
311        csBundle(i + 1).srcType(0) := SrcType.vp
312        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
313        csBundle(i + 1).lsrc(1) := src2 + i.U
314        csBundle(i + 1).lsrc(2) := dest + i.U
315        csBundle(i + 1).ldest := dest + i.U
316        csBundle(i + 1).uopIdx := i.U
317      }
318    }
319    is(UopSplitType.VEC_VVW) {
320      for (i <- 0 until MAX_VLMUL / 2) {
321        csBundle(2 * i).lsrc(0) := src1 + i.U
322        csBundle(2 * i).lsrc(1) := src2 + i.U
323        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
324        csBundle(2 * i).ldest := dest + (2 * i).U
325        csBundle(2 * i).uopIdx := (2 * i).U
326        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
327        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
328        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
329        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
330        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
331      }
332    }
333    is(UopSplitType.VEC_VFW) {
334      for (i <- 0 until MAX_VLMUL / 2) {
335        csBundle(2 * i).lsrc(0) := src1
336        csBundle(2 * i).lsrc(1) := src2 + i.U
337        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
338        csBundle(2 * i).ldest := dest + (2 * i).U
339        csBundle(2 * i).uopIdx := (2 * i).U
340        csBundle(2 * i + 1).lsrc(0) := src1
341        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
342        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
343        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
344        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
345      }
346    }
347    is(UopSplitType.VEC_WVW) {
348      for (i <- 0 until MAX_VLMUL / 2) {
349        csBundle(2 * i).lsrc(0) := src1 + i.U
350        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
351        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
352        csBundle(2 * i).ldest := dest + (2 * i).U
353        csBundle(2 * i).uopIdx := (2 * i).U
354        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
355        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
356        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
357        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
358        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
359      }
360    }
361    is(UopSplitType.VEC_VXW) {
362      /*
363      FMV.D.X
364       */
365      csBundle(0).srcType(0) := SrcType.reg
366      csBundle(0).srcType(1) := SrcType.imm
367      csBundle(0).lsrc(1) := 0.U
368      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
369      csBundle(0).fuType := FuType.i2v.U
370      csBundle(0).fuOpType := vsewReg
371      csBundle(0).vecWen := true.B
372
373      for (i <- 0 until MAX_VLMUL / 2) {
374        csBundle(2 * i + 1).srcType(0) := SrcType.vp
375        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
376        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
377        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
378        csBundle(2 * i + 1).ldest := dest + (2 * i).U
379        csBundle(2 * i + 1).uopIdx := (2 * i).U
380        csBundle(2 * i + 2).srcType(0) := SrcType.vp
381        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
382        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
383        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
384        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
385        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
386      }
387    }
388    is(UopSplitType.VEC_WXW) {
389      /*
390      FMV.D.X
391       */
392      csBundle(0).srcType(0) := SrcType.reg
393      csBundle(0).srcType(1) := SrcType.imm
394      csBundle(0).lsrc(1) := 0.U
395      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
396      csBundle(0).fuType := FuType.i2v.U
397      csBundle(0).fuOpType := vsewReg
398      csBundle(0).vecWen := true.B
399
400      for (i <- 0 until MAX_VLMUL / 2) {
401        csBundle(2 * i + 1).srcType(0) := SrcType.vp
402        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
403        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
404        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
405        csBundle(2 * i + 1).ldest := dest + (2 * i).U
406        csBundle(2 * i + 1).uopIdx := (2 * i).U
407        csBundle(2 * i + 2).srcType(0) := SrcType.vp
408        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
409        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
410        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
411        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
412        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
413      }
414    }
415    is(UopSplitType.VEC_WVV) {
416      for (i <- 0 until MAX_VLMUL / 2) {
417
418        csBundle(2 * i).lsrc(0) := src1 + i.U
419        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
420        csBundle(2 * i).lsrc(2) := dest + i.U
421        csBundle(2 * i).ldest := dest + i.U
422        csBundle(2 * i).uopIdx := (2 * i).U
423        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
424        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
425        csBundle(2 * i + 1).lsrc(2) := dest + i.U
426        csBundle(2 * i + 1).ldest := dest + i.U
427        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
428      }
429    }
430    is(UopSplitType.VEC_WFW) {
431      for (i <- 0 until MAX_VLMUL / 2) {
432        csBundle(2 * i).lsrc(0) := src1
433        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
434        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
435        csBundle(2 * i).ldest := dest + (2 * i).U
436        csBundle(2 * i).uopIdx := (2 * i).U
437        csBundle(2 * i + 1).lsrc(0) := src1
438        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
439        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
440        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
441        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
442      }
443    }
444    is(UopSplitType.VEC_WXV) {
445      /*
446      FMV.D.X
447       */
448      csBundle(0).srcType(0) := SrcType.reg
449      csBundle(0).srcType(1) := SrcType.imm
450      csBundle(0).lsrc(1) := 0.U
451      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
452      csBundle(0).fuType := FuType.i2v.U
453      csBundle(0).fuOpType := vsewReg
454      csBundle(0).vecWen := true.B
455
456      for (i <- 0 until MAX_VLMUL / 2) {
457        csBundle(2 * i + 1).srcType(0) := SrcType.vp
458        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
459        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
460        csBundle(2 * i + 1).lsrc(2) := dest + i.U
461        csBundle(2 * i + 1).ldest := dest + i.U
462        csBundle(2 * i + 1).uopIdx := (2 * i).U
463        csBundle(2 * i + 2).srcType(0) := SrcType.vp
464        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
465        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
466        csBundle(2 * i + 2).lsrc(2) := dest + i.U
467        csBundle(2 * i + 2).ldest := dest + i.U
468        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
469      }
470    }
471    is(UopSplitType.VEC_VVM) {
472      csBundle(0).lsrc(2) := dest
473      csBundle(0).ldest := dest
474      csBundle(0).uopIdx := 0.U
475      for (i <- 1 until MAX_VLMUL) {
476        csBundle(i).lsrc(0) := src1 + i.U
477        csBundle(i).lsrc(1) := src2 + i.U
478        csBundle(i).lsrc(2) := dest
479        csBundle(i).ldest := dest
480        csBundle(i).uopIdx := i.U
481      }
482      csBundle(numOfUop - 1.U).ldest := dest
483    }
484    is(UopSplitType.VEC_VFM) {
485      csBundle(0).lsrc(2) := dest
486      csBundle(0).ldest := dest
487      csBundle(0).uopIdx := 0.U
488      for (i <- 1 until MAX_VLMUL) {
489        csBundle(i).lsrc(0) := src1
490        csBundle(i).lsrc(1) := src2 + i.U
491        csBundle(i).lsrc(2) := dest
492        csBundle(i).ldest := dest
493        csBundle(i).uopIdx := i.U
494      }
495      csBundle(numOfUop - 1.U).ldest := dest
496    }
497    is(UopSplitType.VEC_VXM) {
498      /*
499      FMV.D.X
500       */
501      csBundle(0).srcType(0) := SrcType.reg
502      csBundle(0).srcType(1) := SrcType.imm
503      csBundle(0).lsrc(1) := 0.U
504      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
505      csBundle(0).fuType := FuType.i2v.U
506      csBundle(0).fuOpType := vsewReg
507      csBundle(0).vecWen := true.B
508      //LMUL
509      csBundle(1).srcType(0) := SrcType.vp
510      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
511      csBundle(1).lsrc(2) := dest
512      csBundle(1).ldest := dest
513      csBundle(1).uopIdx := 0.U
514      for (i <- 1 until MAX_VLMUL) {
515        csBundle(i + 1).srcType(0) := SrcType.vp
516        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
517        csBundle(i + 1).lsrc(1) := src2 + i.U
518        csBundle(i + 1).lsrc(2) := dest
519        csBundle(i + 1).ldest := dest
520        csBundle(i + 1).uopIdx := i.U
521      }
522      csBundle(numOfUop - 1.U).ldest := dest
523    }
524    is(UopSplitType.VEC_SLIDE1UP) {
525      /*
526      FMV.D.X
527       */
528      csBundle(0).srcType(0) := SrcType.reg
529      csBundle(0).srcType(1) := SrcType.imm
530      csBundle(0).lsrc(1) := 0.U
531      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
532      csBundle(0).fuType := FuType.i2v.U
533      csBundle(0).fuOpType := vsewReg
534      csBundle(0).vecWen := true.B
535      //LMUL
536      csBundle(1).srcType(0) := SrcType.vp
537      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
538      csBundle(1).lsrc(2) := dest
539      csBundle(1).ldest := dest
540      csBundle(1).uopIdx := 0.U
541      for (i <- 1 until MAX_VLMUL) {
542        csBundle(i + 1).srcType(0) := SrcType.vp
543        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
544        csBundle(i + 1).lsrc(1) := src2 + i.U
545        csBundle(i + 1).lsrc(2) := dest + i.U
546        csBundle(i + 1).ldest := dest + i.U
547        csBundle(i + 1).uopIdx := i.U
548      }
549    }
550    is(UopSplitType.VEC_FSLIDE1UP) {
551      //LMUL
552      csBundle(0).srcType(0) := SrcType.fp
553      csBundle(0).lsrc(0) := src1
554      csBundle(0).lsrc(1) := src2
555      csBundle(0).lsrc(2) := dest
556      csBundle(0).ldest := dest
557      csBundle(0).uopIdx := 0.U
558      for (i <- 1 until MAX_VLMUL) {
559        csBundle(i).srcType(0) := SrcType.vp
560        csBundle(i).lsrc(0) := src2 + (i - 1).U
561        csBundle(i).lsrc(1) := src2 + i.U
562        csBundle(i).lsrc(2) := dest + i.U
563        csBundle(i).ldest := dest + i.U
564        csBundle(i).uopIdx := i.U
565      }
566    }
567    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
568      /*
569      FMV.D.X
570       */
571      csBundle(0).srcType(0) := SrcType.reg
572      csBundle(0).srcType(1) := SrcType.imm
573      csBundle(0).lsrc(1) := 0.U
574      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
575      csBundle(0).fuType := FuType.i2v.U
576      csBundle(0).fuOpType := vsewReg
577      csBundle(0).vecWen := true.B
578      //LMUL
579      for (i <- 0 until MAX_VLMUL) {
580        csBundle(2 * i + 1).srcType(0) := SrcType.vp
581        csBundle(2 * i + 1).srcType(1) := SrcType.vp
582        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
583        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
584        csBundle(2 * i + 1).lsrc(2) := dest + i.U
585        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
586        csBundle(2 * i + 1).uopIdx := (2 * i).U
587        if (2 * i + 2 < MAX_VLMUL * 2) {
588          csBundle(2 * i + 2).srcType(0) := SrcType.vp
589          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
590          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
591          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
592          csBundle(2 * i + 2).ldest := dest + i.U
593          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
594        }
595      }
596      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
597      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
598      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
599    }
600    is(UopSplitType.VEC_FSLIDE1DOWN) {
601      //LMUL
602      for (i <- 0 until MAX_VLMUL) {
603        csBundle(2 * i).srcType(0) := SrcType.vp
604        csBundle(2 * i).srcType(1) := SrcType.vp
605        csBundle(2 * i).lsrc(0) := src2 + (i + 1).U
606        csBundle(2 * i).lsrc(1) := src2 + i.U
607        csBundle(2 * i).lsrc(2) := dest + i.U
608        csBundle(2 * i).ldest := VECTOR_TMP_REG_LMUL.U
609        csBundle(2 * i).uopIdx := (2 * i).U
610        csBundle(2 * i + 1).srcType(0) := SrcType.fp
611        csBundle(2 * i + 1).lsrc(0) := src1
612        csBundle(2 * i + 1).lsrc(2) := VECTOR_TMP_REG_LMUL.U
613        csBundle(2 * i + 1).ldest := dest + i.U
614        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
615      }
616      csBundle(numOfUop - 1.U).srcType(0) := SrcType.fp
617      csBundle(numOfUop - 1.U).lsrc(0) := src1
618      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
619    }
620    is(UopSplitType.VEC_VRED) {
621      when(vlmulReg === "b001".U) {
622        csBundle(0).srcType(2) := SrcType.DC
623        csBundle(0).lsrc(0) := src2 + 1.U
624        csBundle(0).lsrc(1) := src2
625        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
626        csBundle(0).uopIdx := 0.U
627      }
628      when(vlmulReg === "b010".U) {
629        csBundle(0).srcType(2) := SrcType.DC
630        csBundle(0).lsrc(0) := src2 + 1.U
631        csBundle(0).lsrc(1) := src2
632        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
633        csBundle(0).uopIdx := 0.U
634
635        csBundle(1).srcType(2) := SrcType.DC
636        csBundle(1).lsrc(0) := src2 + 3.U
637        csBundle(1).lsrc(1) := src2 + 2.U
638        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
639        csBundle(1).uopIdx := 1.U
640
641        csBundle(2).srcType(2) := SrcType.DC
642        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
643        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
644        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
645        csBundle(2).uopIdx := 2.U
646      }
647      when(vlmulReg === "b011".U) {
648        for (i <- 0 until MAX_VLMUL) {
649          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
650            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
651            csBundle(i).lsrc(1) := src2 + (i * 2).U
652            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
653          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
654            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
655            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
656            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
657          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
658            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
659            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
660            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
661          }
662          csBundle(i).srcType(2) := SrcType.DC
663          csBundle(i).uopIdx := i.U
664        }
665      }
666      when(vlmulReg.orR) {
667        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
668        csBundle(numOfUop - 1.U).lsrc(0) := src1
669        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
670        csBundle(numOfUop - 1.U).lsrc(2) := dest
671        csBundle(numOfUop - 1.U).ldest := dest
672        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
673      }
674    }
675    is(UopSplitType.VEC_VFRED) {
676      val vlmul = vlmulReg
677      val vsew = vsewReg
678      when(vlmul === VLmul.m8){
679        for (i <- 0 until 4) {
680          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
681          csBundle(i).lsrc(1) := src2 + (i * 2).U
682          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
683          csBundle(i).uopIdx := i.U
684        }
685        for (i <- 4 until 6) {
686          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
687          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
688          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
689          csBundle(i).uopIdx := i.U
690        }
691        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
692        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
693        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
694        csBundle(6).uopIdx := 6.U
695        when(vsew === VSew.e64) {
696          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
697          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
698          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
699          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
700          csBundle(7).uopIdx := 7.U
701          csBundle(8).lsrc(0) := src1
702          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
703          csBundle(8).ldest := dest
704          csBundle(8).uopIdx := 8.U
705        }
706        when(vsew === VSew.e32) {
707          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
708          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
709          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
710          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
711          csBundle(7).uopIdx := 7.U
712          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
713          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
714          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
715          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
716          csBundle(8).uopIdx := 8.U
717          csBundle(9).lsrc(0) := src1
718          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
719          csBundle(9).ldest := dest
720          csBundle(9).uopIdx := 9.U
721        }
722        when(vsew === VSew.e16) {
723          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
724          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
725          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
726          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
727          csBundle(7).uopIdx := 7.U
728          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
729          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
730          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
731          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
732          csBundle(8).uopIdx := 8.U
733          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
734          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
735          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
736          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
737          csBundle(9).uopIdx := 9.U
738          csBundle(10).lsrc(0) := src1
739          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
740          csBundle(10).ldest := dest
741          csBundle(10).uopIdx := 10.U
742        }
743      }
744      when(vlmul === VLmul.m4) {
745        for (i <- 0 until 2) {
746          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
747          csBundle(i).lsrc(1) := src2 + (i * 2).U
748          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
749          csBundle(i).uopIdx := i.U
750        }
751        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
752        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
753        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
754        csBundle(2).uopIdx := 2.U
755        when(vsew === VSew.e64) {
756          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
757          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
758          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
759          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
760          csBundle(3).uopIdx := 3.U
761          csBundle(4).lsrc(0) := src1
762          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
763          csBundle(4).ldest := dest
764          csBundle(4).uopIdx := 4.U
765        }
766        when(vsew === VSew.e32) {
767          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
768          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
769          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
770          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
771          csBundle(3).uopIdx := 3.U
772          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
773          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
774          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
775          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
776          csBundle(4).uopIdx := 4.U
777          csBundle(5).lsrc(0) := src1
778          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
779          csBundle(5).ldest := dest
780          csBundle(5).uopIdx := 5.U
781        }
782        when(vsew === VSew.e16) {
783          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
784          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
785          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
786          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
787          csBundle(3).uopIdx := 3.U
788          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
789          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
790          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
791          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
792          csBundle(4).uopIdx := 4.U
793          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
794          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
795          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
796          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
797          csBundle(5).uopIdx := 5.U
798          csBundle(6).lsrc(0) := src1
799          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
800          csBundle(6).ldest := dest
801          csBundle(6).uopIdx := 6.U
802        }
803      }
804      when(vlmul === VLmul.m2) {
805        csBundle(0).lsrc(0) := src2 + 1.U
806        csBundle(0).lsrc(1) := src2 + 0.U
807        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
808        csBundle(0).uopIdx := 0.U
809        when(vsew === VSew.e64) {
810          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
811          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
812          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
813          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
814          csBundle(1).uopIdx := 1.U
815          csBundle(2).lsrc(0) := src1
816          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
817          csBundle(2).ldest := dest
818          csBundle(2).uopIdx := 2.U
819        }
820        when(vsew === VSew.e32) {
821          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
822          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
823          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
824          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
825          csBundle(1).uopIdx := 1.U
826          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
827          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
828          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
829          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
830          csBundle(2).uopIdx := 2.U
831          csBundle(3).lsrc(0) := src1
832          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
833          csBundle(3).ldest := dest
834          csBundle(3).uopIdx := 3.U
835        }
836        when(vsew === VSew.e16) {
837          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
838          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
839          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
840          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
841          csBundle(1).uopIdx := 1.U
842          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
843          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
844          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
845          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
846          csBundle(2).uopIdx := 2.U
847          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
848          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
849          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
850          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
851          csBundle(3).uopIdx := 3.U
852          csBundle(4).lsrc(0) := src1
853          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
854          csBundle(4).ldest := dest
855          csBundle(4).uopIdx := 4.U
856        }
857      }
858      when(vlmul === VLmul.m1) {
859        when(vsew === VSew.e64) {
860          csBundle(0).lsrc(0) := src2
861          csBundle(0).lsrc(1) := src2
862          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
863          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
864          csBundle(0).uopIdx := 0.U
865          csBundle(1).lsrc(0) := src1
866          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
867          csBundle(1).ldest := dest
868          csBundle(1).uopIdx := 1.U
869        }
870        when(vsew === VSew.e32) {
871          csBundle(0).lsrc(0) := src2
872          csBundle(0).lsrc(1) := src2
873          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
874          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
875          csBundle(0).uopIdx := 0.U
876          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
877          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
878          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
879          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
880          csBundle(1).uopIdx := 1.U
881          csBundle(2).lsrc(0) := src1
882          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
883          csBundle(2).ldest := dest
884          csBundle(2).uopIdx := 2.U
885        }
886        when(vsew === VSew.e16) {
887          csBundle(0).lsrc(0) := src2
888          csBundle(0).lsrc(1) := src2
889          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
890          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
891          csBundle(0).uopIdx := 0.U
892          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
893          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
894          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
895          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
896          csBundle(1).uopIdx := 1.U
897          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
898          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
899          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
900          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
901          csBundle(2).uopIdx := 2.U
902          csBundle(3).lsrc(0) := src1
903          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
904          csBundle(3).ldest := dest
905          csBundle(3).uopIdx := 3.U
906        }
907      }
908      when(vlmul === VLmul.mf2) {
909        when(vsew === VSew.e32) {
910          csBundle(0).lsrc(0) := src2
911          csBundle(0).lsrc(1) := src2
912          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
913          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
914          csBundle(0).uopIdx := 0.U
915          csBundle(1).lsrc(0) := src1
916          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
917          csBundle(1).ldest := dest
918          csBundle(1).uopIdx := 1.U
919        }
920        when(vsew === VSew.e16) {
921          csBundle(0).lsrc(0) := src2
922          csBundle(0).lsrc(1) := src2
923          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
924          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
925          csBundle(0).uopIdx := 0.U
926          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
927          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
928          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
929          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
930          csBundle(1).uopIdx := 1.U
931          csBundle(2).lsrc(0) := src1
932          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
933          csBundle(2).ldest := dest
934          csBundle(2).uopIdx := 2.U
935        }
936      }
937      when(vlmul === VLmul.mf4) {
938        when(vsew === VSew.e16) {
939          csBundle(0).lsrc(0) := src2
940          csBundle(0).lsrc(1) := src2
941          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
942          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
943          csBundle(0).uopIdx := 0.U
944          csBundle(1).lsrc(0) := src1
945          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
946          csBundle(1).ldest := dest
947          csBundle(1).uopIdx := 1.U
948        }
949      }
950    }
951
952    is(UopSplitType.VEC_VFREDOSUM) {
953      import yunsuan.VfaluType
954      val vlmul = vlmulReg
955      val vsew = vsewReg
956      val isWiden = decodedInstsSimple.fuOpType === VfaluType.vfwredosum
957      when(vlmul === VLmul.m8) {
958        when(vsew === VSew.e64) {
959          val vlmax = 16
960          for (i <- 0 until vlmax) {
961            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
962            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
963            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
964            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
965            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
966            csBundle(i).uopIdx := i.U
967          }
968        }
969        when(vsew === VSew.e32) {
970          val vlmax = 32
971          for (i <- 0 until vlmax) {
972            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
973            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
974            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
975            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
976            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
977            csBundle(i).uopIdx := i.U
978          }
979        }
980        when(vsew === VSew.e16) {
981          val vlmax = 64
982          for (i <- 0 until vlmax) {
983            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
984            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
985            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
986            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
987            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
988            csBundle(i).uopIdx := i.U
989          }
990        }
991      }
992      when(vlmul === VLmul.m4) {
993        when(vsew === VSew.e64) {
994          val vlmax = 8
995          for (i <- 0 until vlmax) {
996            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
997            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
998            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
999            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1000            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1001            csBundle(i).uopIdx := i.U
1002          }
1003        }
1004        when(vsew === VSew.e32) {
1005          val vlmax = 16
1006          for (i <- 0 until vlmax) {
1007            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1008            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1009            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1010            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1011            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1012            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1013            csBundle(i).uopIdx := i.U
1014          }
1015        }
1016        when(vsew === VSew.e16) {
1017          val vlmax = 32
1018          for (i <- 0 until vlmax) {
1019            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1020            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1021            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1022            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1023            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1024            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1025            csBundle(i).uopIdx := i.U
1026          }
1027        }
1028      }
1029      when(vlmul === VLmul.m2) {
1030        when(vsew === VSew.e64) {
1031          val vlmax = 4
1032          for (i <- 0 until vlmax) {
1033            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1034            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1035            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1036            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1037            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1038            csBundle(i).uopIdx := i.U
1039          }
1040        }
1041        when(vsew === VSew.e32) {
1042          val vlmax = 8
1043          for (i <- 0 until vlmax) {
1044            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1045            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1046            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1047            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1048            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1049            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1050            csBundle(i).uopIdx := i.U
1051          }
1052        }
1053        when(vsew === VSew.e16) {
1054          val vlmax = 16
1055          for (i <- 0 until vlmax) {
1056            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1057            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1058            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1059            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1060            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1061            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1062            csBundle(i).uopIdx := i.U
1063          }
1064        }
1065      }
1066      when(vlmul === VLmul.m1) {
1067        when(vsew === VSew.e64) {
1068          val vlmax = 2
1069          for (i <- 0 until vlmax) {
1070            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1071            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1072            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1073            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1074            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1075            csBundle(i).uopIdx := i.U
1076          }
1077        }
1078        when(vsew === VSew.e32) {
1079          val vlmax = 4
1080          for (i <- 0 until vlmax) {
1081            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1082            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1083            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1084            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1085            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1086            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1087            csBundle(i).uopIdx := i.U
1088          }
1089        }
1090        when(vsew === VSew.e16) {
1091          val vlmax = 8
1092          for (i <- 0 until vlmax) {
1093            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1094            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1095            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1096            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1097            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1098            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1099            csBundle(i).uopIdx := i.U
1100          }
1101        }
1102      }
1103      when(vlmul === VLmul.mf2) {
1104        when(vsew === VSew.e32) {
1105          val vlmax = 2
1106          for (i <- 0 until vlmax) {
1107            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1108            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1109            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1110            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1111            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1112            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1113            csBundle(i).uopIdx := i.U
1114          }
1115        }
1116        when(vsew === VSew.e16) {
1117          val vlmax = 4
1118          for (i <- 0 until vlmax) {
1119            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1120            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1121            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1122            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1123            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1124            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1125            csBundle(i).uopIdx := i.U
1126          }
1127        }
1128      }
1129      when(vlmul === VLmul.mf4) {
1130        when(vsew === VSew.e16) {
1131          val vlmax = 2
1132          for (i <- 0 until vlmax) {
1133            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1134            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1135            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1136            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1137            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1138            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1139            csBundle(i).uopIdx := i.U
1140          }
1141        }
1142      }
1143    }
1144    is(UopSplitType.VEC_SLIDEUP) {
1145      // FMV.D.X
1146      csBundle(0).srcType(0) := SrcType.reg
1147      csBundle(0).srcType(1) := SrcType.imm
1148      csBundle(0).lsrc(1) := 0.U
1149      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1150      csBundle(0).fuType := FuType.i2v.U
1151      csBundle(0).fuOpType := vsewReg
1152      csBundle(0).vecWen := true.B
1153      // LMUL
1154      for (i <- 0 until MAX_VLMUL)
1155        for (j <- 0 to i) {
1156          val old_vd = if (j == 0) {
1157            dest + i.U
1158          } else (VECTOR_TMP_REG_LMUL + j).U
1159          val vd = if (j == i) {
1160            dest + i.U
1161          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1162          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1163          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1164          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1165          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1166          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1167          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1168        }
1169    }
1170
1171    is(UopSplitType.VEC_ISLIDEUP) {
1172      // LMUL
1173      for (i <- 0 until MAX_VLMUL)
1174        for (j <- 0 to i) {
1175          val old_vd = if (j == 0) {
1176            dest + i.U
1177          } else (VECTOR_TMP_REG_LMUL + j - 1).U
1178          val vd = if (j == i) {
1179            dest + i.U
1180          } else (VECTOR_TMP_REG_LMUL + j).U
1181          csBundle(i * (i + 1) / 2 + j).lsrc(1) := src2 + j.U
1182          csBundle(i * (i + 1) / 2 + j).lsrc(2) := old_vd
1183          csBundle(i * (i + 1) / 2 + j).ldest := vd
1184          csBundle(i * (i + 1) / 2 + j).uopIdx := (i * (i + 1) / 2 + j).U
1185        }
1186    }
1187
1188    is(UopSplitType.VEC_SLIDEDOWN) {
1189      // FMV.D.X
1190      csBundle(0).srcType(0) := SrcType.reg
1191      csBundle(0).srcType(1) := SrcType.imm
1192      csBundle(0).lsrc(1) := 0.U
1193      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1194      csBundle(0).fuType := FuType.i2v.U
1195      csBundle(0).fuOpType := vsewReg
1196      csBundle(0).vecWen := true.B
1197      // LMUL
1198      for (i <- 0 until MAX_VLMUL)
1199        for (j <- (0 to i).reverse) {
1200          when(i.U < lmul) {
1201            val old_vd = if (j == 0) {
1202              dest + lmul - 1.U - i.U
1203            } else (VECTOR_TMP_REG_LMUL + j).U
1204            val vd = if (j == i) {
1205              dest + lmul - 1.U - i.U
1206            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1207            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1208            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1209            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1210            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1211            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1212            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1213          }
1214        }
1215    }
1216
1217    is(UopSplitType.VEC_ISLIDEDOWN) {
1218      // LMUL
1219      for (i <- 0 until MAX_VLMUL)
1220        for (j <- (0 to i).reverse) {
1221          when(i.U < lmul) {
1222            val old_vd = if (j == 0) {
1223              dest + lmul - 1.U - i.U
1224            } else (VECTOR_TMP_REG_LMUL + j - 1).U
1225            val vd = if (j == i) {
1226              dest + lmul - 1.U - i.U
1227            } else (VECTOR_TMP_REG_LMUL + j).U
1228            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1229            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1230            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1231            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 1).U
1232          }
1233        }
1234    }
1235
1236    is(UopSplitType.VEC_M0X) {
1237      // LMUL
1238      for (i <- 0 until MAX_VLMUL) {
1239        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1240        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1241        csBundle(i).srcType(0) := srcType0
1242        csBundle(i).srcType(1) := SrcType.vp
1243        csBundle(i).rfWen := false.B
1244        csBundle(i).vecWen := true.B
1245        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1246        csBundle(i).lsrc(1) := src2
1247        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1248        csBundle(i).ldest := ldest
1249        csBundle(i).uopIdx := i.U
1250      }
1251      csBundle(lmul - 1.U).vecWen := false.B
1252      csBundle(lmul - 1.U).fpWen := true.B
1253      csBundle(lmul - 1.U).ldest := FP_TMP_REG_MV.U
1254      // FMV_X_D
1255      csBundle(lmul).srcType(0) := SrcType.fp
1256      csBundle(lmul).srcType(1) := SrcType.imm
1257      csBundle(lmul).lsrc(0) := FP_TMP_REG_MV.U
1258      csBundle(lmul).lsrc(1) := 0.U
1259      csBundle(lmul).ldest := dest
1260      csBundle(lmul).fuType := FuType.fmisc.U
1261      csBundle(lmul).rfWen := true.B
1262      csBundle(lmul).fpWen := false.B
1263      csBundle(lmul).vecWen := false.B
1264      csBundle(lmul).fpu.isAddSub := false.B
1265      csBundle(lmul).fpu.typeTagIn := FPU.D
1266      csBundle(lmul).fpu.typeTagOut := FPU.D
1267      csBundle(lmul).fpu.fromInt := false.B
1268      csBundle(lmul).fpu.wflags := false.B
1269      csBundle(lmul).fpu.fpWen := false.B
1270      csBundle(lmul).fpu.div := false.B
1271      csBundle(lmul).fpu.sqrt := false.B
1272      csBundle(lmul).fpu.fcvt := false.B
1273    }
1274
1275    is(UopSplitType.VEC_MVV) {
1276      // LMUL
1277      for (i <- 0 until MAX_VLMUL) {
1278        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1279        csBundle(i * 2 + 0).srcType(0) := srcType0
1280        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1281        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1282        csBundle(i * 2 + 0).lsrc(1) := src2
1283        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1284        csBundle(i * 2 + 0).ldest := dest + i.U
1285        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1286
1287        csBundle(i * 2 + 1).srcType(0) := srcType0
1288        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1289        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1290        csBundle(i * 2 + 1).lsrc(1) := src2
1291        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1292        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1293        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1294      }
1295    }
1296
1297    is(UopSplitType.VEC_M0X_VFIRST) {
1298      // LMUL
1299      csBundle(0).rfWen := false.B
1300      csBundle(0).fpWen := true.B
1301      csBundle(0).ldest := FP_TMP_REG_MV.U
1302      // FMV_X_D
1303      csBundle(1).srcType(0) := SrcType.fp
1304      csBundle(1).srcType(1) := SrcType.imm
1305      csBundle(1).lsrc(0) := FP_TMP_REG_MV.U
1306      csBundle(1).lsrc(1) := 0.U
1307      csBundle(1).ldest := dest
1308      csBundle(1).fuType := FuType.fmisc.U
1309      csBundle(1).rfWen := true.B
1310      csBundle(1).fpWen := false.B
1311      csBundle(1).vecWen := false.B
1312      csBundle(1).fpu.isAddSub := false.B
1313      csBundle(1).fpu.typeTagIn := FPU.D
1314      csBundle(1).fpu.typeTagOut := FPU.D
1315      csBundle(1).fpu.fromInt := false.B
1316      csBundle(1).fpu.wflags := false.B
1317      csBundle(1).fpu.fpWen := false.B
1318      csBundle(1).fpu.div := false.B
1319      csBundle(1).fpu.sqrt := false.B
1320      csBundle(1).fpu.fcvt := false.B
1321    }
1322    is(UopSplitType.VEC_VWW) {
1323      for (i <- 0 until MAX_VLMUL*2) {
1324        when(i.U < lmul){
1325          csBundle(i).srcType(2) := SrcType.DC
1326          csBundle(i).lsrc(0) := src2 + i.U
1327          csBundle(i).lsrc(1) := src2 + i.U
1328          // csBundle(i).lsrc(2) := dest + (2 * i).U
1329          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1330          csBundle(i).uopIdx :=  i.U
1331        } otherwise {
1332          csBundle(i).srcType(2) := SrcType.DC
1333          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1334          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1335          // csBundle(i).lsrc(2) := dest + (2 * i).U
1336          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1337          csBundle(i).uopIdx := i.U
1338        }
1339        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1340        csBundle(numOfUop-1.U).lsrc(0) := src1
1341        csBundle(numOfUop-1.U).lsrc(2) := dest
1342        csBundle(numOfUop-1.U).ldest := dest
1343      }
1344    }
1345    is(UopSplitType.VEC_RGATHER) {
1346      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1347        for (i <- 0 until len)
1348          for (j <- 0 until len) {
1349            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1350            // csBundle(i * len + j).srcType(1) := SrcType.vp
1351            // csBundle(i * len + j).srcType(2) := SrcType.vp
1352            csBundle(i * len + j).lsrc(0) := src1 + i.U
1353            csBundle(i * len + j).lsrc(1) := src2 + j.U
1354            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1355            csBundle(i * len + j).lsrc(2) := vd_old
1356            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1357            csBundle(i * len + j).ldest := vd
1358            csBundle(i * len + j).uopIdx := (i * len + j).U
1359          }
1360      }
1361      switch(vlmulReg) {
1362        is("b001".U ){
1363          genCsBundle_VEC_RGATHER(2)
1364        }
1365        is("b010".U ){
1366          genCsBundle_VEC_RGATHER(4)
1367        }
1368        is("b011".U ){
1369          genCsBundle_VEC_RGATHER(8)
1370        }
1371      }
1372    }
1373    is(UopSplitType.VEC_RGATHER_VX) {
1374      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1375        for (i <- 0 until len)
1376          for (j <- 0 until len) {
1377            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1378            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1379            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1380            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1381            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1382            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1383            csBundle(i * len + j + 1).lsrc(2) := vd_old
1384            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1385            csBundle(i * len + j + 1).ldest := vd
1386            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1387          }
1388      }
1389      // FMV.D.X
1390      csBundle(0).srcType(0) := SrcType.reg
1391      csBundle(0).srcType(1) := SrcType.imm
1392      csBundle(0).lsrc(1) := 0.U
1393      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1394      csBundle(0).fuType := FuType.i2v.U
1395      csBundle(0).fuOpType := vsewReg
1396      csBundle(0).vecWen := true.B
1397      switch(vlmulReg) {
1398        is("b000".U ){
1399          genCsBundle_RGATHER_VX(1)
1400        }
1401        is("b001".U ){
1402          genCsBundle_RGATHER_VX(2)
1403        }
1404        is("b010".U ){
1405          genCsBundle_RGATHER_VX(4)
1406        }
1407        is("b011".U ){
1408          genCsBundle_RGATHER_VX(8)
1409        }
1410      }
1411    }
1412    is(UopSplitType.VEC_RGATHEREI16) {
1413      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1414        for (i <- 0 until len)
1415          for (j <- 0 until len) {
1416            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1417            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1418            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1419            // csBundle(i * len + j).srcType(1) := SrcType.vp
1420            // csBundle(i * len + j).srcType(2) := SrcType.vp
1421            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1422            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1423            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1424            csBundle((i * len + j)*2+0).ldest := vd0
1425            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1426            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1427            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1428            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1429            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1430            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1431            csBundle((i * len + j)*2+1).ldest := vd1
1432            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1433          }
1434      }
1435      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1436        for (i <- 0 until len)
1437          for (j <- 0 until len) {
1438            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1439            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1440            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1441            // csBundle(i * len + j).srcType(1) := SrcType.vp
1442            // csBundle(i * len + j).srcType(2) := SrcType.vp
1443            csBundle(i * len + j).lsrc(0) := src1 + i.U
1444            csBundle(i * len + j).lsrc(1) := src2 + j.U
1445            csBundle(i * len + j).lsrc(2) := vd_old
1446            csBundle(i * len + j).ldest := vd
1447            csBundle(i * len + j).uopIdx := (i * len + j).U
1448          }
1449      }
1450      switch(vlmulReg) {
1451        is("b000".U ){
1452          when(!vsewReg.orR){
1453            genCsBundle_VEC_RGATHEREI16_SEW8(1)
1454          } .otherwise{
1455            genCsBundle_VEC_RGATHEREI16(1)
1456          }
1457        }
1458        is("b001".U) {
1459          when(!vsewReg.orR) {
1460            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1461          }.otherwise {
1462            genCsBundle_VEC_RGATHEREI16(2)
1463          }
1464        }
1465        is("b010".U) {
1466          when(!vsewReg.orR) {
1467            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1468          }.otherwise {
1469            genCsBundle_VEC_RGATHEREI16(4)
1470          }
1471        }
1472        is("b011".U) {
1473          genCsBundle_VEC_RGATHEREI16(8)
1474        }
1475      }
1476    }
1477    is(UopSplitType.VEC_COMPRESS) {
1478      def genCsBundle_VEC_COMPRESS(len:Int): Unit ={
1479        for (i <- 0 until len){
1480          val jlen = if (i == len-1) i+1 else i+2
1481          for (j <- 0 until jlen) {
1482            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1483            val vd = if(i==len-1) (dest + j.U) else{
1484              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1485            }
1486            val src23Type = if (j == i+1) DontCare else SrcType.vp
1487            csBundle(i*(i+3)/2 + j).srcType(0) := SrcType.vp
1488            csBundle(i*(i+3)/2 + j).srcType(1) := src23Type
1489            csBundle(i*(i+3)/2 + j).srcType(2) := src23Type
1490            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1491            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1492            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1493            // csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1494            csBundle(i*(i+3)/2 + j).ldest := vd
1495            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1496          }
1497        }
1498      }
1499      switch(vlmulReg) {
1500        is("b001".U ){
1501          genCsBundle_VEC_COMPRESS(2)
1502        }
1503        is("b010".U ){
1504          genCsBundle_VEC_COMPRESS(4)
1505        }
1506        is("b011".U ){
1507          genCsBundle_VEC_COMPRESS(8)
1508        }
1509      }
1510    }
1511    is(UopSplitType.VEC_MVNR) {
1512      for (i <- 0 until MAX_VLMUL) {
1513        csBundle(i).lsrc(0) := src1 + i.U
1514        csBundle(i).lsrc(1) := src2 + i.U
1515        csBundle(i).lsrc(2) := dest + i.U
1516        csBundle(i).ldest := dest + i.U
1517        csBundle(i).uopIdx := i.U
1518      }
1519    }
1520    is(UopSplitType.VEC_US_LD) {
1521      /*
1522      FMV.D.X
1523       */
1524      csBundle(0).srcType(0) := SrcType.reg
1525      csBundle(0).srcType(1) := SrcType.imm
1526      csBundle(0).lsrc(1) := 0.U
1527      csBundle(0).ldest := FP_TMP_REG_MV.U
1528      csBundle(0).fuType := FuType.i2f.U
1529      csBundle(0).rfWen := false.B
1530      csBundle(0).fpWen := true.B
1531      csBundle(0).vecWen := false.B
1532      csBundle(0).fpu.isAddSub := false.B
1533      csBundle(0).fpu.typeTagIn := FPU.D
1534      csBundle(0).fpu.typeTagOut := FPU.D
1535      csBundle(0).fpu.fromInt := true.B
1536      csBundle(0).fpu.wflags := false.B
1537      csBundle(0).fpu.fpWen := true.B
1538      csBundle(0).fpu.div := false.B
1539      csBundle(0).fpu.sqrt := false.B
1540      csBundle(0).fpu.fcvt := false.B
1541      //LMUL
1542      for (i <- 0 until MAX_VLMUL) {
1543        csBundle(i + 1).srcType(0) := SrcType.fp
1544        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1545        csBundle(i + 1).ldest := dest + i.U
1546        csBundle(i + 1).uopIdx := i.U
1547      }
1548    }
1549  }
1550
1551  //uops dispatch
1552  val s_normal :: s_ext :: Nil = Enum(2)
1553  val state = RegInit(s_normal)
1554  val state_next = WireDefault(state)
1555  val uopRes = RegInit(0.U)
1556
1557  //readyFromRename Counter
1558  val readyCounter = PriorityMuxDefault(io.readyFromRename.map(x => !x).zip((0 to (RenameWidth - 1)).map(_.U)), RenameWidth.U)
1559
1560  switch(state) {
1561    is(s_normal) {
1562      state_next := Mux(io.validFromIBuf(0) && (numOfUop > readyCounter) && (readyCounter =/= 0.U), s_ext, s_normal)
1563    }
1564    is(s_ext) {
1565      state_next := Mux(io.validFromIBuf(0) && (uopRes > readyCounter), s_ext, s_normal)
1566    }
1567  }
1568
1569  state := state_next
1570
1571  val uopRes0 = Mux(state === s_normal, numOfUop, uopRes)
1572  val uopResJudge = Mux(state === s_normal,
1573    io.validFromIBuf(0) && (readyCounter =/= 0.U) && (uopRes0 > readyCounter),
1574    io.validFromIBuf(0) && (uopRes0 > readyCounter))
1575  uopRes := Mux(uopResJudge, uopRes0 - readyCounter, 0.U)
1576
1577  for(i <- 0 until RenameWidth) {
1578    decodedInsts(i) := MuxCase(csBundle(i), Seq(
1579      (state === s_normal) -> csBundle(i),
1580      (state === s_ext) -> Mux((i.U + numOfUop -uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1581    ).toSeq)
1582  }
1583
1584  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1585  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1586  val notInf = Wire(Vec(DecodeWidth, Bool()))
1587  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1588  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1589  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1590  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1591
1592  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1593    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1594    0.U)
1595  validToRename.zipWithIndex.foreach{
1596    case(dst, i) =>
1597      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1598      dst := MuxCase(false.B, Seq(
1599        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1600        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1601      ).toSeq)
1602  }
1603
1604  readyToIBuf.zipWithIndex.foreach {
1605    case (dst, i) =>
1606      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1607      dst := MuxCase(true.B, Seq(
1608        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1609        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1610      ).toSeq)
1611  }
1612
1613  io.deq.decodedInsts := decodedInsts
1614  io.deq.isVset := isVsetSimple
1615  io.deq.complexNum := complexNum
1616  io.deq.validToRename := validToRename
1617  io.deq.readyToIBuf := readyToIBuf
1618
1619}
1620