xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 195ef4a53ab54326d879e884c4e1568f424f2668)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
82  val VECTOR_COMPRESS = 1 // in v0 regfile
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153  val vstartReg = latchedInst.vpu.vstart
154
155  //Type of uop Div
156  val typeOfSplit = latchedInst.uopSplitType
157  val src1Type = latchedInst.srcType(0)
158  val src1IsImm = src1Type === SrcType.imm
159  val src1IsFp = src1Type === SrcType.fp
160
161  val isVstore = FuType.isVStore(latchedInst.fuType)
162
163  numOfUop := latchedUopInfo.numOfUop
164  numOfWB := latchedUopInfo.numOfWB
165
166  //uops dispatch
167  val s_idle :: s_active :: Nil = Enum(2)
168  val state = RegInit(s_idle)
169  val stateNext = WireDefault(state)
170  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
171  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
172  val uopResNext = WireInit(uopRes)
173  val e64 = 3.U(2.W)
174  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
175  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
176  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
177
178  //uop div up to maxUopSize
179  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
180  csBundle.foreach { case dst =>
181    dst := latchedInst
182    dst.numUops := latchedUopInfo.numOfUop
183    dst.numWB := latchedUopInfo.numOfWB
184    dst.firstUop := false.B
185    dst.lastUop := false.B
186    dst.vlsInstr := false.B
187  }
188
189  csBundle(0).firstUop := true.B
190  csBundle(numOfUop - 1.U).lastUop := true.B
191
192  // when vstart is not zero, the last uop will modify vstart to zero
193  // therefore, blockback and flush pipe
194  csBundle(numOfUop - 1.U).blockBackward := vstartReg =/= 0.U
195  csBundle(numOfUop - 1.U).flushPipe := vstartReg =/= 0.U
196
197  switch(typeOfSplit) {
198    is(UopSplitType.VSET) {
199      // In simple decoder, rfWen and vecWen are not set
200      when(isVsetSimple) {
201        // Default
202        // uop0 set rd, never flushPipe
203        csBundle(0).fuType := FuType.vsetiwi.U
204        csBundle(0).flushPipe := false.B
205        csBundle(0).blockBackward := false.B
206        csBundle(0).rfWen := true.B
207        // uop1 set vl, vsetvl will flushPipe
208        csBundle(1).ldest := Vl_IDX.U
209        csBundle(1).vecWen := false.B
210        csBundle(1).vlWen := true.B
211        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
212          // write nothing, uop0 is a nop instruction
213          csBundle(0).rfWen := false.B
214          csBundle(0).fpWen := false.B
215          csBundle(0).vecWen := false.B
216          csBundle(0).vlWen := false.B
217          csBundle(1).fuType := FuType.vsetfwf.U
218          csBundle(1).srcType(0) := SrcType.no
219          csBundle(1).srcType(2) := SrcType.no
220          csBundle(1).srcType(3) := SrcType.no
221          csBundle(1).srcType(4) := SrcType.vp
222          csBundle(1).lsrc(4) := Vl_IDX.U
223        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
224          // uop0: mv vtype gpr to vector region
225          csBundle(0).srcType(0) := SrcType.xp
226          csBundle(0).srcType(1) := SrcType.no
227          csBundle(0).lsrc(0) := src2
228          csBundle(0).lsrc(1) := 0.U
229          csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
230          csBundle(0).fuType := FuType.i2v.U
231          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
232          csBundle(0).rfWen := false.B
233          csBundle(0).fpWen := false.B
234          csBundle(0).vecWen := true.B
235          csBundle(0).vlWen := false.B
236          // uop1: uvsetvcfg_vv
237          csBundle(1).fuType := FuType.vsetfwf.U
238          // vl
239          csBundle(1).srcType(0) := SrcType.no
240          csBundle(1).srcType(2) := SrcType.no
241          csBundle(1).srcType(3) := SrcType.no
242          csBundle(1).srcType(4) := SrcType.vp
243          csBundle(1).lsrc(4) := Vl_IDX.U
244          // vtype
245          csBundle(1).srcType(1) := SrcType.vp
246          csBundle(1).lsrc(1) := VECTOR_TMP_REG_LMUL.U
247          csBundle(1).vecWen := false.B
248          csBundle(1).vlWen := true.B
249          csBundle(1).ldest := Vl_IDX.U
250        }.elsewhen(dest === 0.U) {
251          // write nothing, uop0 is a nop instruction
252          csBundle(0).rfWen := false.B
253          csBundle(0).fpWen := false.B
254          csBundle(0).vecWen := false.B
255          csBundle(0).vlWen := false.B
256        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType)) {
257          // because vsetvl may modified src2 when src2 == rd,
258          // we need to modify vd in second uop to avoid dependency
259          // uop0 set vl
260          csBundle(0).fuType := FuType.vsetiwf.U
261          csBundle(0).ldest := Vl_IDX.U
262          csBundle(0).rfWen := false.B
263          csBundle(0).vlWen := true.B
264          // uop1 set rd
265          csBundle(1).fuType := FuType.vsetiwi.U
266          csBundle(1).ldest := dest
267          csBundle(1).rfWen := true.B
268          csBundle(1).vlWen := false.B
269        }
270        // use bypass vtype from vtypeGen
271        csBundle(0).vpu.connectVType(io.vtypeBypass)
272        csBundle(1).vpu.connectVType(io.vtypeBypass)
273      }
274    }
275    is(UopSplitType.VEC_VVV) {
276      for (i <- 0 until MAX_VLMUL) {
277        csBundle(i).lsrc(0) := src1 + i.U
278        csBundle(i).lsrc(1) := src2 + i.U
279        csBundle(i).lsrc(2) := dest + i.U
280        csBundle(i).ldest := dest + i.U
281        csBundle(i).uopIdx := i.U
282      }
283    }
284    is(UopSplitType.VEC_VFV) {
285      /*
286      f to vector move
287       */
288      csBundle(0).srcType(0) := SrcType.fp
289      csBundle(0).srcType(1) := SrcType.imm
290      csBundle(0).srcType(2) := SrcType.imm
291      csBundle(0).lsrc(1) := 0.U
292      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
293      csBundle(0).fuType := FuType.f2v.U
294      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
295      csBundle(0).vecWen := true.B
296      csBundle(0).vpu.isReverse := false.B
297      /*
298      LMUL
299       */
300      for (i <- 0 until MAX_VLMUL) {
301        csBundle(i + 1).srcType(0) := SrcType.vp
302        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
303        csBundle(i + 1).lsrc(1) := src2 + i.U
304        csBundle(i + 1).lsrc(2) := dest + i.U
305        csBundle(i + 1).ldest := dest + i.U
306        csBundle(i + 1).uopIdx := i.U
307      }
308    }
309    is(UopSplitType.VEC_EXT2) {
310      for (i <- 0 until MAX_VLMUL / 2) {
311        csBundle(2 * i).lsrc(1) := src2 + i.U
312        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
313        csBundle(2 * i).ldest := dest + (2 * i).U
314        csBundle(2 * i).uopIdx := (2 * i).U
315        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
316        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
317        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
318        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
319      }
320    }
321    is(UopSplitType.VEC_EXT4) {
322      for (i <- 0 until MAX_VLMUL / 4) {
323        csBundle(4 * i).lsrc(1) := src2 + i.U
324        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
325        csBundle(4 * i).ldest := dest + (4 * i).U
326        csBundle(4 * i).uopIdx := (4 * i).U
327        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
328        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
329        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
330        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
331        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
332        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
333        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
334        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
335        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
336        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
337        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
338        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
339      }
340    }
341    is(UopSplitType.VEC_EXT8) {
342      for (i <- 0 until MAX_VLMUL) {
343        csBundle(i).lsrc(1) := src2
344        csBundle(i).lsrc(2) := dest + i.U
345        csBundle(i).ldest := dest + i.U
346        csBundle(i).uopIdx := i.U
347      }
348    }
349    is(UopSplitType.VEC_0XV) {
350      /*
351      i/f to vector move
352       */
353      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
354      csBundle(0).srcType(1) := SrcType.imm
355      csBundle(0).srcType(2) := SrcType.imm
356      csBundle(0).lsrc(1) := 0.U
357      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
358      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
359      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
360      csBundle(0).rfWen := false.B
361      csBundle(0).fpWen := false.B
362      csBundle(0).vecWen := true.B
363      /*
364      vmv.s.x
365       */
366      csBundle(1).srcType(0) := SrcType.vp
367      csBundle(1).srcType(1) := SrcType.imm
368      csBundle(1).srcType(2) := SrcType.vp
369      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
370      csBundle(1).lsrc(1) := 0.U
371      csBundle(1).lsrc(2) := dest
372      csBundle(1).ldest := dest
373      csBundle(1).rfWen := false.B
374      csBundle(1).fpWen := false.B
375      csBundle(1).vecWen := true.B
376      csBundle(1).uopIdx := 0.U
377    }
378    is(UopSplitType.VEC_VXV) {
379      /*
380      i to vector move
381       */
382      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
383      csBundle(0).srcType(1) := SrcType.imm
384      csBundle(0).srcType(2) := SrcType.imm
385      csBundle(0).lsrc(1) := 0.U
386      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
387      csBundle(0).fuType := FuType.i2v.U
388      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
389      csBundle(0).vecWen := true.B
390      csBundle(0).vpu.isReverse := false.B
391      /*
392      LMUL
393       */
394      for (i <- 0 until MAX_VLMUL) {
395        csBundle(i + 1).srcType(0) := SrcType.vp
396        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
397        csBundle(i + 1).lsrc(1) := src2 + i.U
398        csBundle(i + 1).lsrc(2) := dest + i.U
399        csBundle(i + 1).ldest := dest + i.U
400        csBundle(i + 1).uopIdx := i.U
401      }
402    }
403    is(UopSplitType.VEC_VVW) {
404      for (i <- 0 until MAX_VLMUL / 2) {
405        csBundle(2 * i).lsrc(0) := src1 + i.U
406        csBundle(2 * i).lsrc(1) := src2 + i.U
407        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
408        csBundle(2 * i).ldest := dest + (2 * i).U
409        csBundle(2 * i).uopIdx := (2 * i).U
410        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
411        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
412        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
413        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
414        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
415      }
416    }
417    is(UopSplitType.VEC_VFW) {
418      /*
419      f to vector move
420       */
421      csBundle(0).srcType(0) := SrcType.fp
422      csBundle(0).srcType(1) := SrcType.imm
423      csBundle(0).srcType(2) := SrcType.imm
424      csBundle(0).lsrc(1) := 0.U
425      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
426      csBundle(0).fuType := FuType.f2v.U
427      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
428      csBundle(0).rfWen := false.B
429      csBundle(0).fpWen := false.B
430      csBundle(0).vecWen := true.B
431
432      for (i <- 0 until MAX_VLMUL / 2) {
433        csBundle(2 * i + 1).srcType(0) := SrcType.vp
434        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
435        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
436        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
437        csBundle(2 * i + 1).ldest := dest + (2 * i).U
438        csBundle(2 * i + 1).uopIdx := (2 * i).U
439        csBundle(2 * i + 2).srcType(0) := SrcType.vp
440        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
441        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
442        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
443        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
444        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
445      }
446    }
447    is(UopSplitType.VEC_WVW) {
448      for (i <- 0 until MAX_VLMUL / 2) {
449        csBundle(2 * i).lsrc(0) := src1 + i.U
450        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
451        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
452        csBundle(2 * i).ldest := dest + (2 * i).U
453        csBundle(2 * i).uopIdx := (2 * i).U
454        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
455        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
456        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
457        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
458        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
459      }
460    }
461    is(UopSplitType.VEC_VXW) {
462      /*
463      i to vector move
464       */
465      csBundle(0).srcType(0) := SrcType.reg
466      csBundle(0).srcType(1) := SrcType.imm
467      csBundle(0).srcType(2) := SrcType.imm
468      csBundle(0).lsrc(1) := 0.U
469      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
470      csBundle(0).fuType := FuType.i2v.U
471      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
472      csBundle(0).vecWen := true.B
473
474      for (i <- 0 until MAX_VLMUL / 2) {
475        csBundle(2 * i + 1).srcType(0) := SrcType.vp
476        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
477        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
478        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
479        csBundle(2 * i + 1).ldest := dest + (2 * i).U
480        csBundle(2 * i + 1).uopIdx := (2 * i).U
481        csBundle(2 * i + 2).srcType(0) := SrcType.vp
482        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
483        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
484        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
485        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
486        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
487      }
488    }
489    is(UopSplitType.VEC_WXW) {
490      /*
491      i to vector move
492       */
493      csBundle(0).srcType(0) := SrcType.reg
494      csBundle(0).srcType(1) := SrcType.imm
495      csBundle(0).srcType(2) := SrcType.imm
496      csBundle(0).lsrc(1) := 0.U
497      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
498      csBundle(0).fuType := FuType.i2v.U
499      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
500      csBundle(0).vecWen := true.B
501
502      for (i <- 0 until MAX_VLMUL / 2) {
503        csBundle(2 * i + 1).srcType(0) := SrcType.vp
504        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
505        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
506        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
507        csBundle(2 * i + 1).ldest := dest + (2 * i).U
508        csBundle(2 * i + 1).uopIdx := (2 * i).U
509        csBundle(2 * i + 2).srcType(0) := SrcType.vp
510        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
511        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
512        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
513        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
514        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
515      }
516    }
517    is(UopSplitType.VEC_WVV) {
518      for (i <- 0 until MAX_VLMUL / 2) {
519
520        csBundle(2 * i).lsrc(0) := src1 + i.U
521        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
522        csBundle(2 * i).lsrc(2) := dest + i.U
523        csBundle(2 * i).ldest := dest + i.U
524        csBundle(2 * i).uopIdx := (2 * i).U
525        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
526        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
527        csBundle(2 * i + 1).lsrc(2) := dest + i.U
528        csBundle(2 * i + 1).ldest := dest + i.U
529        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
530      }
531    }
532    is(UopSplitType.VEC_WFW) {
533      /*
534      f to vector move
535       */
536      csBundle(0).srcType(0) := SrcType.fp
537      csBundle(0).srcType(1) := SrcType.imm
538      csBundle(0).srcType(2) := SrcType.imm
539      csBundle(0).lsrc(1) := 0.U
540      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
541      csBundle(0).fuType := FuType.f2v.U
542      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
543      csBundle(0).rfWen := false.B
544      csBundle(0).fpWen := false.B
545      csBundle(0).vecWen := true.B
546
547      for (i <- 0 until MAX_VLMUL / 2) {
548        csBundle(2 * i + 1).srcType(0) := SrcType.vp
549        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
550        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
551        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
552        csBundle(2 * i + 1).ldest := dest + (2 * i).U
553        csBundle(2 * i + 1).uopIdx := (2 * i).U
554        csBundle(2 * i + 2).srcType(0) := SrcType.vp
555        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
556        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
557        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
558        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
559        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
560      }
561    }
562    is(UopSplitType.VEC_WXV) {
563      /*
564      i to vector move
565       */
566      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
567      csBundle(0).srcType(1) := SrcType.imm
568      csBundle(0).srcType(2) := SrcType.imm
569      csBundle(0).lsrc(1) := 0.U
570      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
571      csBundle(0).fuType := FuType.i2v.U
572      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
573      csBundle(0).vecWen := true.B
574
575      for (i <- 0 until MAX_VLMUL / 2) {
576        csBundle(2 * i + 1).srcType(0) := SrcType.vp
577        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
578        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
579        csBundle(2 * i + 1).lsrc(2) := dest + i.U
580        csBundle(2 * i + 1).ldest := dest + i.U
581        csBundle(2 * i + 1).uopIdx := (2 * i).U
582        csBundle(2 * i + 2).srcType(0) := SrcType.vp
583        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
584        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
585        csBundle(2 * i + 2).lsrc(2) := dest + i.U
586        csBundle(2 * i + 2).ldest := dest + i.U
587        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
588      }
589    }
590    is(UopSplitType.VEC_VVM) {
591      csBundle(0).lsrc(2) := dest
592      csBundle(0).ldest := dest
593      csBundle(0).uopIdx := 0.U
594      for (i <- 1 until MAX_VLMUL) {
595        csBundle(i).lsrc(0) := src1 + i.U
596        csBundle(i).lsrc(1) := src2 + i.U
597        csBundle(i).lsrc(2) := dest
598        csBundle(i).ldest := dest
599        csBundle(i).uopIdx := i.U
600      }
601    }
602    is(UopSplitType.VEC_VFM) {
603      /*
604      f to vector move
605       */
606      csBundle(0).srcType(0) := SrcType.fp
607      csBundle(0).srcType(1) := SrcType.imm
608      csBundle(0).srcType(2) := SrcType.imm
609      csBundle(0).lsrc(1) := 0.U
610      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
611      csBundle(0).fuType := FuType.f2v.U
612      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
613      csBundle(0).rfWen := false.B
614      csBundle(0).fpWen := false.B
615      csBundle(0).vecWen := true.B
616      //LMUL
617      csBundle(1).srcType(0) := SrcType.vp
618      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
619      csBundle(1).lsrc(2) := dest
620      csBundle(1).ldest := dest
621      csBundle(1).uopIdx := 0.U
622      for (i <- 1 until MAX_VLMUL) {
623        csBundle(i + 1).srcType(0) := SrcType.vp
624        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
625        csBundle(i + 1).lsrc(1) := src2 + i.U
626        csBundle(i + 1).lsrc(2) := dest
627        csBundle(i + 1).ldest := dest
628        csBundle(i + 1).uopIdx := i.U
629      }
630      csBundle(numOfUop - 1.U).ldest := dest
631    }
632    is(UopSplitType.VEC_VXM) {
633      /*
634      i to vector move
635       */
636      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
637      csBundle(0).srcType(1) := SrcType.imm
638      csBundle(0).srcType(2) := SrcType.imm
639      csBundle(0).lsrc(1) := 0.U
640      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
641      csBundle(0).fuType := FuType.i2v.U
642      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
643      csBundle(0).vecWen := true.B
644      //LMUL
645      csBundle(1).srcType(0) := SrcType.vp
646      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
647      csBundle(1).lsrc(2) := dest
648      csBundle(1).ldest := dest
649      csBundle(1).uopIdx := 0.U
650      for (i <- 1 until MAX_VLMUL) {
651        csBundle(i + 1).srcType(0) := SrcType.vp
652        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
653        csBundle(i + 1).lsrc(1) := src2 + i.U
654        csBundle(i + 1).lsrc(2) := dest
655        csBundle(i + 1).ldest := dest
656        csBundle(i + 1).uopIdx := i.U
657      }
658      csBundle(numOfUop - 1.U).ldest := dest
659    }
660    is(UopSplitType.VEC_SLIDE1UP) {
661      /*
662      i to vector move
663       */
664      csBundle(0).srcType(0) := SrcType.reg
665      csBundle(0).srcType(1) := SrcType.imm
666      csBundle(0).srcType(2) := SrcType.imm
667      csBundle(0).lsrc(1) := 0.U
668      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
669      csBundle(0).fuType := FuType.i2v.U
670      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
671      csBundle(0).vecWen := true.B
672      //LMUL
673      csBundle(1).srcType(0) := SrcType.vp
674      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
675      csBundle(1).lsrc(2) := dest
676      csBundle(1).ldest := dest
677      csBundle(1).uopIdx := 0.U
678      for (i <- 1 until MAX_VLMUL) {
679        csBundle(i + 1).srcType(0) := SrcType.vp
680        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
681        csBundle(i + 1).lsrc(1) := src2 + i.U
682        csBundle(i + 1).lsrc(2) := dest + i.U
683        csBundle(i + 1).ldest := dest + i.U
684        csBundle(i + 1).uopIdx := i.U
685      }
686    }
687    is(UopSplitType.VEC_FSLIDE1UP) {
688      /*
689      f to vector move
690       */
691      csBundle(0).srcType(0) := SrcType.fp
692      csBundle(0).srcType(1) := SrcType.imm
693      csBundle(0).srcType(2) := SrcType.imm
694      csBundle(0).lsrc(1) := 0.U
695      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
696      csBundle(0).fuType := FuType.f2v.U
697      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
698      csBundle(0).rfWen := false.B
699      csBundle(0).fpWen := false.B
700      csBundle(0).vecWen := true.B
701      //LMUL
702      csBundle(1).srcType(0) := SrcType.vp
703      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
704      csBundle(1).lsrc(1) := src2
705      csBundle(1).lsrc(2) := dest
706      csBundle(1).ldest := dest
707      csBundle(1).uopIdx := 0.U
708      for (i <- 1 until MAX_VLMUL) {
709        csBundle(i + 1).srcType(0) := SrcType.vp
710        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
711        csBundle(i + 1).lsrc(1) := src2 + i.U
712        csBundle(i + 1).lsrc(2) := dest + i.U
713        csBundle(i + 1).ldest := dest + i.U
714        csBundle(i + 1).uopIdx := i.U
715      }
716    }
717    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
718      /*
719      i to vector move
720       */
721      csBundle(0).srcType(0) := SrcType.reg
722      csBundle(0).srcType(1) := SrcType.imm
723      csBundle(0).srcType(2) := SrcType.imm
724      csBundle(0).lsrc(1) := 0.U
725      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
726      csBundle(0).fuType := FuType.i2v.U
727      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
728      csBundle(0).vecWen := true.B
729      //LMUL
730      for (i <- 0 until MAX_VLMUL) {
731        csBundle(2 * i + 1).srcType(0) := SrcType.vp
732        csBundle(2 * i + 1).srcType(1) := SrcType.vp
733        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
734        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
735        csBundle(2 * i + 1).lsrc(2) := dest + i.U
736        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
737        csBundle(2 * i + 1).uopIdx := (2 * i).U
738        if (2 * i + 2 < MAX_VLMUL * 2) {
739          csBundle(2 * i + 2).srcType(0) := SrcType.vp
740          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
741          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
742          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
743          csBundle(2 * i + 2).ldest := dest + i.U
744          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
745        }
746      }
747      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
748      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
749      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
750    }
751    is(UopSplitType.VEC_FSLIDE1DOWN) {
752      /*
753      f to vector move
754       */
755      csBundle(0).srcType(0) := SrcType.fp
756      csBundle(0).srcType(1) := SrcType.imm
757      csBundle(0).srcType(2) := SrcType.imm
758      csBundle(0).lsrc(1) := 0.U
759      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
760      csBundle(0).fuType := FuType.f2v.U
761      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
762      csBundle(0).rfWen := false.B
763      csBundle(0).fpWen := false.B
764      csBundle(0).vecWen := true.B
765      //LMUL
766      for (i <- 0 until MAX_VLMUL) {
767        csBundle(2 * i + 1).srcType(0) := SrcType.vp
768        csBundle(2 * i + 1).srcType(1) := SrcType.vp
769        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
770        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
771        csBundle(2 * i + 1).lsrc(2) := dest + i.U
772        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
773        csBundle(2 * i + 1).uopIdx := (2 * i).U
774        if (2 * i + 2 < MAX_VLMUL * 2) {
775          csBundle(2 * i + 2).srcType(0) := SrcType.vp
776          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
777          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
778          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
779          csBundle(2 * i + 2).ldest := dest + i.U
780          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
781        }
782      }
783      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
784      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
785      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
786    }
787    is(UopSplitType.VEC_VRED) {
788      when(vlmulReg === "b001".U) {
789        csBundle(0).srcType(2) := SrcType.DC
790        csBundle(0).lsrc(0) := src2 + 1.U
791        csBundle(0).lsrc(1) := src2
792        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
793        csBundle(0).uopIdx := 0.U
794      }
795      when(vlmulReg === "b010".U) {
796        csBundle(0).srcType(2) := SrcType.DC
797        csBundle(0).lsrc(0) := src2 + 1.U
798        csBundle(0).lsrc(1) := src2
799        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
800        csBundle(0).uopIdx := 0.U
801
802        csBundle(1).srcType(2) := SrcType.DC
803        csBundle(1).lsrc(0) := src2 + 3.U
804        csBundle(1).lsrc(1) := src2 + 2.U
805        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
806        csBundle(1).uopIdx := 1.U
807
808        csBundle(2).srcType(2) := SrcType.DC
809        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
810        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
811        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
812        csBundle(2).uopIdx := 2.U
813      }
814      when(vlmulReg === "b011".U) {
815        for (i <- 0 until MAX_VLMUL) {
816          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
817            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
818            csBundle(i).lsrc(1) := src2 + (i * 2).U
819            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
820          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
821            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
822            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
823            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
824          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
825            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
826            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
827            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
828          }
829          csBundle(i).srcType(2) := SrcType.DC
830          csBundle(i).uopIdx := i.U
831        }
832      }
833      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
834        /*
835         * 2 <= vlmul <= 8
836         */
837        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
838        csBundle(numOfUop - 1.U).lsrc(0) := src1
839        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
840        csBundle(numOfUop - 1.U).lsrc(2) := dest
841        csBundle(numOfUop - 1.U).ldest := dest
842        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
843      }
844    }
845    is(UopSplitType.VEC_VFRED) {
846      val vlmul = vlmulReg
847      val vsew = vsewReg
848      when(vlmul === VLmul.m8){
849        for (i <- 0 until 4) {
850          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
851          csBundle(i).lsrc(1) := src2 + (i * 2).U
852          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
853          csBundle(i).uopIdx := i.U
854        }
855        for (i <- 4 until 6) {
856          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
857          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
858          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
859          csBundle(i).uopIdx := i.U
860        }
861        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
862        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
863        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
864        csBundle(6).uopIdx := 6.U
865        when(vsew === VSew.e64) {
866          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
867          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
868          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
869          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
870          csBundle(7).uopIdx := 7.U
871          csBundle(8).lsrc(0) := src1
872          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
873          csBundle(8).ldest := dest
874          csBundle(8).uopIdx := 8.U
875        }
876        when(vsew === VSew.e32) {
877          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
878          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
879          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
880          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
881          csBundle(7).uopIdx := 7.U
882          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
883          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
884          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
885          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
886          csBundle(8).uopIdx := 8.U
887          csBundle(9).lsrc(0) := src1
888          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
889          csBundle(9).ldest := dest
890          csBundle(9).uopIdx := 9.U
891        }
892        when(vsew === VSew.e16) {
893          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
894          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
895          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
896          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
897          csBundle(7).uopIdx := 7.U
898          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
899          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
900          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
901          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
902          csBundle(8).uopIdx := 8.U
903          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
904          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
905          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
906          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
907          csBundle(9).uopIdx := 9.U
908          csBundle(10).lsrc(0) := src1
909          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
910          csBundle(10).ldest := dest
911          csBundle(10).uopIdx := 10.U
912        }
913      }
914      when(vlmul === VLmul.m4) {
915        for (i <- 0 until 2) {
916          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
917          csBundle(i).lsrc(1) := src2 + (i * 2).U
918          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
919          csBundle(i).uopIdx := i.U
920        }
921        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
922        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
923        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
924        csBundle(2).uopIdx := 2.U
925        when(vsew === VSew.e64) {
926          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
927          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
928          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
929          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
930          csBundle(3).uopIdx := 3.U
931          csBundle(4).lsrc(0) := src1
932          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
933          csBundle(4).ldest := dest
934          csBundle(4).uopIdx := 4.U
935        }
936        when(vsew === VSew.e32) {
937          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
938          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
939          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
940          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
941          csBundle(3).uopIdx := 3.U
942          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
943          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
944          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
945          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
946          csBundle(4).uopIdx := 4.U
947          csBundle(5).lsrc(0) := src1
948          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
949          csBundle(5).ldest := dest
950          csBundle(5).uopIdx := 5.U
951        }
952        when(vsew === VSew.e16) {
953          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
954          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
955          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
956          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
957          csBundle(3).uopIdx := 3.U
958          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
959          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
960          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
961          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
962          csBundle(4).uopIdx := 4.U
963          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
964          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
965          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
966          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
967          csBundle(5).uopIdx := 5.U
968          csBundle(6).lsrc(0) := src1
969          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
970          csBundle(6).ldest := dest
971          csBundle(6).uopIdx := 6.U
972        }
973      }
974      when(vlmul === VLmul.m2) {
975        csBundle(0).lsrc(0) := src2 + 1.U
976        csBundle(0).lsrc(1) := src2 + 0.U
977        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
978        csBundle(0).uopIdx := 0.U
979        when(vsew === VSew.e64) {
980          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
981          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
982          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
983          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
984          csBundle(1).uopIdx := 1.U
985          csBundle(2).lsrc(0) := src1
986          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
987          csBundle(2).ldest := dest
988          csBundle(2).uopIdx := 2.U
989        }
990        when(vsew === VSew.e32) {
991          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
992          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
993          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
994          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
995          csBundle(1).uopIdx := 1.U
996          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
997          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
998          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
999          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1000          csBundle(2).uopIdx := 2.U
1001          csBundle(3).lsrc(0) := src1
1002          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1003          csBundle(3).ldest := dest
1004          csBundle(3).uopIdx := 3.U
1005        }
1006        when(vsew === VSew.e16) {
1007          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1008          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1009          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1010          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
1011          csBundle(1).uopIdx := 1.U
1012          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1013          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1014          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1015          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
1016          csBundle(2).uopIdx := 2.U
1017          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
1018          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1019          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
1020          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
1021          csBundle(3).uopIdx := 3.U
1022          csBundle(4).lsrc(0) := src1
1023          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
1024          csBundle(4).ldest := dest
1025          csBundle(4).uopIdx := 4.U
1026        }
1027      }
1028      when(vlmul === VLmul.m1) {
1029        when(vsew === VSew.e64) {
1030          csBundle(0).lsrc(0) := src2
1031          csBundle(0).lsrc(1) := src2
1032          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1033          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1034          csBundle(0).uopIdx := 0.U
1035          csBundle(1).lsrc(0) := src1
1036          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1037          csBundle(1).ldest := dest
1038          csBundle(1).uopIdx := 1.U
1039        }
1040        when(vsew === VSew.e32) {
1041          csBundle(0).lsrc(0) := src2
1042          csBundle(0).lsrc(1) := src2
1043          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1044          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1045          csBundle(0).uopIdx := 0.U
1046          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1047          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1048          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1049          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1050          csBundle(1).uopIdx := 1.U
1051          csBundle(2).lsrc(0) := src1
1052          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1053          csBundle(2).ldest := dest
1054          csBundle(2).uopIdx := 2.U
1055        }
1056        when(vsew === VSew.e16) {
1057          csBundle(0).lsrc(0) := src2
1058          csBundle(0).lsrc(1) := src2
1059          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1060          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1061          csBundle(0).uopIdx := 0.U
1062          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1063          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1064          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1065          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1066          csBundle(1).uopIdx := 1.U
1067          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1068          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1069          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1070          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1071          csBundle(2).uopIdx := 2.U
1072          csBundle(3).lsrc(0) := src1
1073          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1074          csBundle(3).ldest := dest
1075          csBundle(3).uopIdx := 3.U
1076        }
1077      }
1078      when(vlmul === VLmul.mf2) {
1079        when(vsew === VSew.e32) {
1080          csBundle(0).lsrc(0) := src2
1081          csBundle(0).lsrc(1) := src2
1082          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1083          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1084          csBundle(0).uopIdx := 0.U
1085          csBundle(1).lsrc(0) := src1
1086          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1087          csBundle(1).ldest := dest
1088          csBundle(1).uopIdx := 1.U
1089        }
1090        when(vsew === VSew.e16) {
1091          csBundle(0).lsrc(0) := src2
1092          csBundle(0).lsrc(1) := src2
1093          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1094          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1095          csBundle(0).uopIdx := 0.U
1096          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1097          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1098          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1099          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1100          csBundle(1).uopIdx := 1.U
1101          csBundle(2).lsrc(0) := src1
1102          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1103          csBundle(2).ldest := dest
1104          csBundle(2).uopIdx := 2.U
1105        }
1106      }
1107      when(vlmul === VLmul.mf4) {
1108        when(vsew === VSew.e16) {
1109          csBundle(0).lsrc(0) := src2
1110          csBundle(0).lsrc(1) := src2
1111          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1112          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1113          csBundle(0).uopIdx := 0.U
1114          csBundle(1).lsrc(0) := src1
1115          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1116          csBundle(1).ldest := dest
1117          csBundle(1).uopIdx := 1.U
1118        }
1119      }
1120    }
1121
1122    is(UopSplitType.VEC_VFREDOSUM) {
1123      import yunsuan.VfaluType
1124      val vlmul = vlmulReg
1125      val vsew = vsewReg
1126      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1127      when(vlmul === VLmul.m8) {
1128        when(vsew === VSew.e64) {
1129          val vlmax = 16
1130          for (i <- 0 until vlmax) {
1131            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1132            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1133            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1134            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1135            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1136            csBundle(i).uopIdx := i.U
1137          }
1138        }
1139        when(vsew === VSew.e32) {
1140          val vlmax = 32
1141          for (i <- 0 until vlmax) {
1142            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1143            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1144            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1145            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1146            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1147            csBundle(i).uopIdx := i.U
1148          }
1149        }
1150        when(vsew === VSew.e16) {
1151          val vlmax = 64
1152          for (i <- 0 until vlmax) {
1153            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1154            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1155            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1156            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1157            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1158            csBundle(i).uopIdx := i.U
1159          }
1160        }
1161      }
1162      when(vlmul === VLmul.m4) {
1163        when(vsew === VSew.e64) {
1164          val vlmax = 8
1165          for (i <- 0 until vlmax) {
1166            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1167            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1168            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1169            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1170            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1171            csBundle(i).uopIdx := i.U
1172          }
1173        }
1174        when(vsew === VSew.e32) {
1175          val vlmax = 16
1176          for (i <- 0 until vlmax) {
1177            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1178            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1179            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1180            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1181            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1182            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1183            csBundle(i).uopIdx := i.U
1184          }
1185        }
1186        when(vsew === VSew.e16) {
1187          val vlmax = 32
1188          for (i <- 0 until vlmax) {
1189            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1190            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1191            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1192            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1193            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1194            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1195            csBundle(i).uopIdx := i.U
1196          }
1197        }
1198      }
1199      when(vlmul === VLmul.m2) {
1200        when(vsew === VSew.e64) {
1201          val vlmax = 4
1202          for (i <- 0 until vlmax) {
1203            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1204            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1205            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1206            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1207            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1208            csBundle(i).uopIdx := i.U
1209          }
1210        }
1211        when(vsew === VSew.e32) {
1212          val vlmax = 8
1213          for (i <- 0 until vlmax) {
1214            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1215            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1216            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1217            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1218            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1219            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1220            csBundle(i).uopIdx := i.U
1221          }
1222        }
1223        when(vsew === VSew.e16) {
1224          val vlmax = 16
1225          for (i <- 0 until vlmax) {
1226            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1227            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1228            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1229            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1230            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1231            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1232            csBundle(i).uopIdx := i.U
1233          }
1234        }
1235      }
1236      when(vlmul === VLmul.m1) {
1237        when(vsew === VSew.e64) {
1238          val vlmax = 2
1239          for (i <- 0 until vlmax) {
1240            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1241            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1242            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1243            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1244            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1245            csBundle(i).uopIdx := i.U
1246          }
1247        }
1248        when(vsew === VSew.e32) {
1249          val vlmax = 4
1250          for (i <- 0 until vlmax) {
1251            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1252            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1253            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1254            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1255            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1256            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1257            csBundle(i).uopIdx := i.U
1258          }
1259        }
1260        when(vsew === VSew.e16) {
1261          val vlmax = 8
1262          for (i <- 0 until vlmax) {
1263            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1264            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1265            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1266            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1267            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1268            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1269            csBundle(i).uopIdx := i.U
1270          }
1271        }
1272      }
1273      when(vlmul === VLmul.mf2) {
1274        when(vsew === VSew.e32) {
1275          val vlmax = 2
1276          for (i <- 0 until vlmax) {
1277            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1278            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1279            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1280            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1281            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1282            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1283            csBundle(i).uopIdx := i.U
1284          }
1285        }
1286        when(vsew === VSew.e16) {
1287          val vlmax = 4
1288          for (i <- 0 until vlmax) {
1289            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1290            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1291            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1292            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1293            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1294            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1295            csBundle(i).uopIdx := i.U
1296          }
1297        }
1298      }
1299      when(vlmul === VLmul.mf4) {
1300        when(vsew === VSew.e16) {
1301          val vlmax = 2
1302          for (i <- 0 until vlmax) {
1303            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1304            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1305            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1306            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1307            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1308            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1309            csBundle(i).uopIdx := i.U
1310          }
1311        }
1312      }
1313    }
1314
1315    is(UopSplitType.VEC_SLIDEUP) {
1316      // i to vector move
1317      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1318      csBundle(0).srcType(1) := SrcType.imm
1319      csBundle(0).srcType(2) := SrcType.imm
1320      csBundle(0).lsrc(1) := 0.U
1321      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1322      csBundle(0).fuType := FuType.i2v.U
1323      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1324      csBundle(0).vecWen := true.B
1325      // LMUL
1326      for (i <- 0 until MAX_VLMUL)
1327        for (j <- 0 to i) {
1328          val old_vd = if (j == 0) {
1329            dest + i.U
1330          } else (VECTOR_TMP_REG_LMUL + j).U
1331          val vd = if (j == i) {
1332            dest + i.U
1333          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1334          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1335          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1336          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1337          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1338          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1339          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1340        }
1341    }
1342
1343    is(UopSplitType.VEC_SLIDEDOWN) {
1344      // i to vector move
1345      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1346      csBundle(0).srcType(1) := SrcType.imm
1347      csBundle(0).srcType(2) := SrcType.imm
1348      csBundle(0).lsrc(1) := 0.U
1349      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1350      csBundle(0).fuType := FuType.i2v.U
1351      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1352      csBundle(0).vecWen := true.B
1353      // LMUL
1354      for (i <- 0 until MAX_VLMUL)
1355        for (j <- (0 to i).reverse) {
1356          when(i.U < lmul) {
1357            val old_vd = if (j == 0) {
1358              dest + lmul - 1.U - i.U
1359            } else (VECTOR_TMP_REG_LMUL + j).U
1360            val vd = if (j == i) {
1361              dest + lmul - 1.U - i.U
1362            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1363            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1364            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1365            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1366            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1367            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1368            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1369          }
1370        }
1371    }
1372
1373    is(UopSplitType.VEC_M0X) {
1374      // LMUL
1375      for (i <- 0 until MAX_VLMUL) {
1376        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1377        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1378        csBundle(i).srcType(0) := srcType0
1379        csBundle(i).srcType(1) := SrcType.vp
1380        csBundle(i).rfWen := false.B
1381        csBundle(i).fpWen := false.B
1382        csBundle(i).vecWen := true.B
1383        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1384        csBundle(i).lsrc(1) := src2
1385        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1386        csBundle(i).ldest := ldest
1387        csBundle(i).uopIdx := i.U
1388      }
1389      csBundle(numOfUop - 1.U).rfWen := Mux(dest === 0.U, false.B, true.B)
1390      csBundle(numOfUop - 1.U).fpWen := false.B
1391      csBundle(numOfUop - 1.U).vecWen := false.B
1392      csBundle(numOfUop - 1.U).ldest := dest
1393    }
1394
1395    is(UopSplitType.VEC_MVV) {
1396      // LMUL
1397      for (i <- 0 until MAX_VLMUL) {
1398        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1399        csBundle(i * 2 + 0).srcType(0) := srcType0
1400        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1401        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1402        csBundle(i * 2 + 0).lsrc(1) := src2
1403        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1404        csBundle(i * 2 + 0).ldest := dest + i.U
1405        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1406
1407        csBundle(i * 2 + 1).srcType(0) := srcType0
1408        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1409        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1410        csBundle(i * 2 + 1).lsrc(1) := src2
1411        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1412        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1413        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1414      }
1415    }
1416    is(UopSplitType.VEC_VWW) {
1417      for (i <- 0 until MAX_VLMUL*2) {
1418        when(i.U < lmul){
1419          csBundle(i).srcType(2) := SrcType.DC
1420          csBundle(i).lsrc(0) := src2 + i.U
1421          csBundle(i).lsrc(1) := src2 + i.U
1422          // csBundle(i).lsrc(2) := dest + (2 * i).U
1423          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1424          csBundle(i).uopIdx :=  i.U
1425        } otherwise {
1426          csBundle(i).srcType(2) := SrcType.DC
1427          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1428          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1429          // csBundle(i).lsrc(2) := dest + (2 * i).U
1430          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1431          csBundle(i).uopIdx := i.U
1432        }
1433        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1434        csBundle(numOfUop-1.U).lsrc(0) := src1
1435        csBundle(numOfUop-1.U).lsrc(2) := dest
1436        csBundle(numOfUop-1.U).ldest := dest
1437      }
1438    }
1439    is(UopSplitType.VEC_RGATHER) {
1440      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1441        for (i <- 0 until len)
1442          for (j <- 0 until len) {
1443            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1444            // csBundle(i * len + j).srcType(1) := SrcType.vp
1445            // csBundle(i * len + j).srcType(2) := SrcType.vp
1446            csBundle(i * len + j).lsrc(0) := src1 + i.U
1447            csBundle(i * len + j).lsrc(1) := src2 + j.U
1448            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1449            csBundle(i * len + j).lsrc(2) := vd_old
1450            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1451            csBundle(i * len + j).ldest := vd
1452            csBundle(i * len + j).uopIdx := (i * len + j).U
1453          }
1454      }
1455      switch(vlmulReg) {
1456        is("b001".U ){
1457          genCsBundle_VEC_RGATHER(2)
1458        }
1459        is("b010".U ){
1460          genCsBundle_VEC_RGATHER(4)
1461        }
1462        is("b011".U ){
1463          genCsBundle_VEC_RGATHER(8)
1464        }
1465      }
1466    }
1467    is(UopSplitType.VEC_RGATHER_VX) {
1468      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1469        for (i <- 0 until len)
1470          for (j <- 0 until len) {
1471            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1472            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1473            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1474            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1475            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1476            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1477            csBundle(i * len + j + 1).lsrc(2) := vd_old
1478            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1479            csBundle(i * len + j + 1).ldest := vd
1480            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1481          }
1482      }
1483      // i to vector move
1484      csBundle(0).srcType(0) := Mux(src1IsImm, SrcType.imm, SrcType.reg)
1485      csBundle(0).srcType(1) := SrcType.imm
1486      csBundle(0).srcType(2) := SrcType.imm
1487      csBundle(0).lsrc(1) := 0.U
1488      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1489      csBundle(0).fuType := FuType.i2v.U
1490      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1491      csBundle(0).rfWen := false.B
1492      csBundle(0).fpWen := false.B
1493      csBundle(0).vecWen := true.B
1494      genCsBundle_RGATHER_VX(1)
1495      switch(vlmulReg) {
1496        is("b001".U ){
1497          genCsBundle_RGATHER_VX(2)
1498        }
1499        is("b010".U ){
1500          genCsBundle_RGATHER_VX(4)
1501        }
1502        is("b011".U ){
1503          genCsBundle_RGATHER_VX(8)
1504        }
1505      }
1506    }
1507    is(UopSplitType.VEC_RGATHEREI16) {
1508      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1509        for (i <- 0 until len)
1510          for (j <- 0 until len) {
1511            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1512            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1513            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1514            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1515            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1516            csBundle((i * len + j)*2+0).ldest := vd0
1517            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1518            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1519            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1520            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1521            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1522            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1523            csBundle((i * len + j)*2+1).ldest := vd1
1524            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1525          }
1526      }
1527      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1528        for (i <- 0 until len)
1529          for (j <- 0 until len) {
1530            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1531            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1532            csBundle(i * len + j).lsrc(0) := src1 + i.U
1533            csBundle(i * len + j).lsrc(1) := src2 + j.U
1534            csBundle(i * len + j).lsrc(2) := vd_old
1535            csBundle(i * len + j).ldest := vd
1536            csBundle(i * len + j).uopIdx := (i * len + j).U
1537          }
1538      }
1539      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1540        for (i <- 0 until len)
1541          for (j <- 0 until len) {
1542            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1543            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1544            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1545            csBundle(i * len + j).lsrc(1) := src2 + j.U
1546            csBundle(i * len + j).lsrc(2) := vd_old
1547            csBundle(i * len + j).ldest := vd
1548            csBundle(i * len + j).uopIdx := (i * len + j).U
1549          }
1550      }
1551      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1552        for (i <- 0 until len)
1553          for (j <- 0 until len) {
1554            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1555            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1556            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1557            csBundle(i * len + j).lsrc(1) := src2 + j.U
1558            csBundle(i * len + j).lsrc(2) := vd_old
1559            csBundle(i * len + j).ldest := vd
1560            csBundle(i * len + j).uopIdx := (i * len + j).U
1561          }
1562      }
1563      when(!vsewReg.orR){
1564        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1565      }.elsewhen(vsewReg === VSew.e32){
1566        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1567      }.elsewhen(vsewReg === VSew.e64){
1568        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1569      }.otherwise{
1570        genCsBundle_VEC_RGATHEREI16(1)
1571      }
1572      switch(vlmulReg) {
1573        is("b001".U) {
1574          when(!vsewReg.orR) {
1575            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1576          }.elsewhen(vsewReg === VSew.e32){
1577            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1578          }.elsewhen(vsewReg === VSew.e64){
1579            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1580          }.otherwise{
1581            genCsBundle_VEC_RGATHEREI16(2)
1582          }
1583        }
1584        is("b010".U) {
1585          when(!vsewReg.orR) {
1586            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1587          }.elsewhen(vsewReg === VSew.e32){
1588            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1589          }.elsewhen(vsewReg === VSew.e64){
1590            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1591          }.otherwise{
1592            genCsBundle_VEC_RGATHEREI16(4)
1593          }
1594        }
1595        is("b011".U) {
1596          when(vsewReg === VSew.e32){
1597            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1598          }.elsewhen(vsewReg === VSew.e64){
1599            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1600          }.otherwise{
1601            genCsBundle_VEC_RGATHEREI16(8)
1602          }
1603        }
1604      }
1605    }
1606    is(UopSplitType.VEC_COMPRESS) {
1607      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1608        for (i <- 0 until len) {
1609          val jlen = if (i == len-1) i+1 else i+2
1610          for (j <- 0 until jlen) {
1611            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1612            val vd = if(i==len-1) (dest + j.U) else {
1613              if (j == i+1) VECTOR_TMP_REG_LMUL.U  else (VECTOR_TMP_REG_LMUL + j + 1).U
1614            }
1615            csBundle(i*(i+3)/2 + j).vecWen := true.B
1616            csBundle(i*(i+3)/2 + j).v0Wen := false.B
1617            val src13Type = if (j == i+1) DontCare else SrcType.vp
1618            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1619            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1620            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1621            if (i == 0) {
1622              csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1623            } else {
1624              csBundle(i*(i+3)/2 + j).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1625            }
1626            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1627            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1628            csBundle(i*(i+3)/2 + j).ldest := vd
1629            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1630          }
1631        }
1632      }
1633      switch(vlmulReg) {
1634        is("b001".U ){
1635          genCsBundle_VEC_COMPRESS(2)
1636        }
1637        is("b010".U ){
1638          genCsBundle_VEC_COMPRESS(4)
1639        }
1640        is("b011".U ){
1641          genCsBundle_VEC_COMPRESS(8)
1642        }
1643      }
1644    }
1645    is(UopSplitType.VEC_MVNR) {
1646      for (i <- 0 until MAX_VLMUL) {
1647        csBundle(i).lsrc(0) := src1 + i.U
1648        csBundle(i).lsrc(1) := src2 + i.U
1649        csBundle(i).lsrc(2) := dest + i.U
1650        csBundle(i).ldest := dest + i.U
1651        csBundle(i).uopIdx := i.U
1652      }
1653    }
1654    is(UopSplitType.VEC_US_LDST) {
1655      /*
1656      FMV.D.X
1657       */
1658      csBundle(0).srcType(0) := SrcType.reg
1659      csBundle(0).srcType(1) := SrcType.imm
1660      csBundle(0).lsrc(1) := 0.U
1661      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1662      csBundle(0).fuType := FuType.i2v.U
1663      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1664      csBundle(0).rfWen := false.B
1665      csBundle(0).fpWen := false.B
1666      csBundle(0).vecWen := true.B
1667      csBundle(0).vlsInstr := true.B
1668      //LMUL
1669      for (i <- 0 until MAX_VLMUL) {
1670        csBundle(i + 1).srcType(0) := SrcType.vp
1671        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1672        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1673        csBundle(i + 1).ldest := dest + i.U
1674        csBundle(i + 1).uopIdx := i.U
1675        csBundle(i + 1).vlsInstr := true.B
1676      }
1677      csBundle.head.waitForward := isUsSegment
1678      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1679    }
1680    is(UopSplitType.VEC_S_LDST) {
1681      /*
1682      FMV.D.X
1683       */
1684      csBundle(0).srcType(0) := SrcType.reg
1685      csBundle(0).srcType(1) := SrcType.imm
1686      csBundle(0).lsrc(1) := 0.U
1687      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1688      csBundle(0).fuType := FuType.i2v.U
1689      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1690      csBundle(0).rfWen := false.B
1691      csBundle(0).fpWen := false.B
1692      csBundle(0).vecWen := true.B
1693      csBundle(0).vlsInstr := true.B
1694
1695      csBundle(1).srcType(0) := SrcType.reg
1696      csBundle(1).srcType(1) := SrcType.imm
1697      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1698      csBundle(1).lsrc(1) := 0.U
1699      csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1700      csBundle(1).fuType := FuType.i2v.U
1701      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1702      csBundle(1).rfWen := false.B
1703      csBundle(1).fpWen := false.B
1704      csBundle(1).vecWen := true.B
1705      csBundle(1).vlsInstr := true.B
1706
1707      //LMUL
1708      for (i <- 0 until MAX_VLMUL) {
1709        csBundle(i + 2).srcType(0) := SrcType.vp
1710        csBundle(i + 2).srcType(1) := SrcType.vp
1711        csBundle(i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1712        csBundle(i + 2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1713        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1714        csBundle(i + 2).ldest := dest + i.U
1715        csBundle(i + 2).uopIdx := i.U
1716        csBundle(i + 2).vlsInstr := true.B
1717      }
1718      csBundle.head.waitForward := isSdSegment
1719      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1720    }
1721    is(UopSplitType.VEC_I_LDST) {
1722      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(lmul:Int, nf:Int): Unit ={
1723        for (i <- 0 until MAX_VLMUL) {
1724          val vecWen = if (i < lmul * nf) true.B else false.B
1725          val src2Type = if (i < lmul * nf) SrcType.vp else SrcType.no
1726          csBundle(i + 1).srcType(0) := SrcType.vp
1727          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1728          csBundle(i + 1).srcType(1) := SrcType.no
1729          csBundle(i + 1).lsrc(1) := src2 + i.U
1730          csBundle(i + 1).srcType(2) := src2Type
1731          csBundle(i + 1).lsrc(2) := dest + i.U
1732          csBundle(i + 1).ldest := dest + i.U
1733          csBundle(i + 1).rfWen := false.B
1734          csBundle(i + 1).fpWen := false.B
1735          csBundle(i + 1).vecWen := vecWen
1736          csBundle(i + 1).uopIdx := i.U
1737          csBundle(i + 1).vlsInstr := true.B
1738        }
1739      }
1740      def genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(emul:Int): Unit ={
1741        for (i <- 0 until MAX_VLMUL) {
1742          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1743          csBundle(i + 1).srcType(1) := src1Type
1744          csBundle(i + 1).lsrc(1) := src2 + i.U
1745        }
1746      }
1747
1748      val vlmul = vlmulReg
1749      val vsew = Cat(0.U(1.W), vsewReg)
1750      val veew = Cat(0.U(1.W), width)
1751      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1752      val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array(
1753        "b001".U -> 1.U,
1754        "b010".U -> 2.U,
1755        "b011".U -> 3.U
1756      ))
1757      val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array(
1758        "b001".U -> 1.U,
1759        "b010".U -> 2.U,
1760        "b011".U -> 3.U
1761      ))
1762      csBundle(0).srcType(0) := SrcType.reg
1763      csBundle(0).srcType(1) := SrcType.imm
1764      csBundle(0).lsrc(1) := 0.U
1765      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1766      csBundle(0).fuType := FuType.i2v.U
1767      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1768      csBundle(0).rfWen := false.B
1769      csBundle(0).fpWen := false.B
1770      csBundle(0).vecWen := true.B
1771      csBundle(0).vlsInstr := true.B
1772
1773      //LMUL
1774      when(nf === 0.U) {
1775        for (i <- 0 until MAX_VLMUL) {
1776          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1777          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1778          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1779          csBundle(i + 1).srcType(0) := SrcType.vp
1780          csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1781          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1782          csBundle(i + 1).srcType(2) := SrcType.vp
1783          // lsrc2 is old vd
1784          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1785          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1786          csBundle(i + 1).uopIdx := i.U
1787          csBundle(i + 1).vlsInstr := true.B
1788        }
1789      }.otherwise{
1790        // nf > 1, is segment indexed load/store
1791        // gen src0, vd
1792        switch(simple_lmul) {
1793          is(0.U) {
1794            switch(nf) {
1795              is(1.U) {
1796                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 2)
1797              }
1798              is(2.U) {
1799                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 3)
1800              }
1801              is(3.U) {
1802                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 4)
1803              }
1804              is(4.U) {
1805                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 5)
1806              }
1807              is(5.U) {
1808                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 6)
1809              }
1810              is(6.U) {
1811                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 7)
1812              }
1813              is(7.U) {
1814                genCsBundle_SEGMENT_INDEXED_LOADSTORE(1, 8)
1815              }
1816            }
1817          }
1818          is(1.U) {
1819            switch(nf) {
1820              is(1.U) {
1821                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 2)
1822              }
1823              is(2.U) {
1824                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 3)
1825              }
1826              is(3.U) {
1827                genCsBundle_SEGMENT_INDEXED_LOADSTORE(2, 4)
1828              }
1829            }
1830          }
1831          is(2.U) {
1832            switch(nf) {
1833              is(1.U) {
1834                genCsBundle_SEGMENT_INDEXED_LOADSTORE(4, 2)
1835              }
1836            }
1837          }
1838        }
1839
1840        // gen src1
1841        switch(simple_emul) {
1842          is(0.U) {
1843            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(1)
1844          }
1845          is(1.U) {
1846            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(2)
1847          }
1848          is(2.U) {
1849            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(4)
1850          }
1851          is(3.U) {
1852            genCsBundle_SEGMENT_INDEXED_LOADSTORE_SRC1(8)
1853          }
1854        }
1855
1856        // when is vstore instructions, not set vecwen
1857        when(isVstore) {
1858          for (i <- 0 until MAX_VLMUL) {
1859            csBundle(i + 1).vecWen := false.B
1860          }
1861        }
1862      }
1863      csBundle.head.waitForward := isIxSegment
1864      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1865    }
1866  }
1867
1868  //readyFromRename Counter
1869  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1870
1871  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1872  val thisAllOut = uopRes <= readyCounter
1873
1874  switch(state) {
1875    is(s_idle) {
1876      when (inValid) {
1877        stateNext := s_active
1878        uopResNext := inUopInfo.numOfUop
1879      }
1880    }
1881    is(s_active) {
1882      when (thisAllOut) {
1883        when (inValid) {
1884          stateNext := s_active
1885          uopResNext := inUopInfo.numOfUop
1886        }.otherwise {
1887          stateNext := s_idle
1888          uopResNext := 0.U
1889        }
1890      }.otherwise {
1891        stateNext := s_active
1892        uopResNext := uopRes - readyCounter
1893      }
1894    }
1895  }
1896
1897  state := Mux(io.redirect, s_idle, stateNext)
1898  uopRes := Mux(io.redirect, 0.U, uopResNext)
1899
1900  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1901
1902  for(i <- 0 until RenameWidth) {
1903    outValids(i) := complexNum > i.U
1904    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1905  }
1906
1907  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1908  inReady := state === s_idle || state === s_active && thisAllOut
1909
1910//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1911//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1912//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1913//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1914//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1915//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1916//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1917//
1918//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1919//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1920//    0.U)
1921//  validToRename.zipWithIndex.foreach{
1922//    case(dst, i) =>
1923//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1924//      dst := MuxCase(false.B, Seq(
1925//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1926//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1927//      ).toSeq)
1928//  }
1929//
1930//  readyToIBuf.zipWithIndex.foreach {
1931//    case (dst, i) =>
1932//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1933//      dst := MuxCase(true.B, Seq(
1934//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1935//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1936//      ).toSeq)
1937//  }
1938//
1939//  io.deq.decodedInsts := decodedInsts
1940//  io.deq.complexNum := complexNum
1941//  io.deq.validToRename := validToRename
1942//  io.deq.readyToIBuf := readyToIBuf
1943}
1944