xref: /XiangShan/src/main/scala/xiangshan/backend/decode/DecodeUnitComp.scala (revision 55f7beda7bc2aab2215e8be32fd14bdd81f943bd)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.decode.isa.bitfield.XSInstBitFields
33import xiangshan.backend.fu.vector.Bundles.{VSew, VType, VLmul}
34import yunsuan.VpermType
35import scala.collection.Seq
36import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
37
38class indexedLSUopTable(uopIdx:Int) extends Module {
39  val src = IO(Input(UInt(4.W)))
40  val outOffsetVs2 = IO(Output(UInt(3.W)))
41  val outOffsetVd = IO(Output(UInt(3.W)))
42  def genCsBundle_VEC_INDEXED_LDST(lmul:Int, emul:Int, uopIdx:Int): (Int, Int) ={
43    // only consider non segment indexed load/store
44    if (lmul < emul) {    // lmul < emul, uop num is depend on emul * nf
45      var offset = 1 << (emul - lmul)
46      for (i <- 0 until (1 << emul)) {
47        if (uopIdx == i) {
48          return (i, i / offset)
49        }
50      }
51    } else {              // lmul > emul, uop num is depend on lmul * nf
52      var offset = 1 << (lmul - emul)
53      for (i <- 0 until (1 << lmul)) {
54        if (uopIdx == i) {
55          return (i / offset, i)
56        }
57      }
58    }
59    return (0, 0)
60  }
61  // strided load/store
62  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
63  for (emul <- 0 until 4) {
64    for (lmul <- 0 until 4) {
65      var offset = genCsBundle_VEC_INDEXED_LDST(lmul, emul, uopIdx)
66      var offsetVs2 = offset._1
67      var offsetVd = offset._2
68      combVemulNf :+= (emul, lmul, offsetVs2, offsetVd)
69    }
70  }
71  val out = decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
72    case (emul, lmul, offsetVs2, offsetVd) =>
73      (BitPat((emul << 2 | lmul).U(4.W)), BitPat((offsetVs2 << 3 | offsetVd).U(6.W)))
74  }, BitPat.N(6)))
75  outOffsetVs2 := out(5, 3)
76  outOffsetVd := out(2, 0)
77}
78
79trait VectorConstants {
80  val MAX_VLMUL = 8
81  val FP_TMP_REG_MV = 32
82  val VECTOR_TMP_REG_LMUL = 33 // 33~47  ->  15
83  val MAX_INDEXED_LS_UOPNUM = 64
84}
85
86class DecodeUnitCompIO(implicit p: Parameters) extends XSBundle {
87  val redirect = Input(Bool())
88  val csrCtrl = Input(new CustomCSRCtrlIO)
89  val vtypeBypass = Input(new VType)
90  // When the first inst in decode vector is complex inst, pass it in
91  val in = Flipped(DecoupledIO(new Bundle {
92    val simpleDecodedInst = new DecodedInst
93    val uopInfo = new UopInfo
94  }))
95  val out = new Bundle {
96    val complexDecodedInsts = Vec(RenameWidth, DecoupledIO(new DecodedInst))
97  }
98  val complexNum = Output(UInt(3.W))
99}
100
101/**
102  * @author zly
103  */
104class DecodeUnitComp()(implicit p : Parameters) extends XSModule with DecodeUnitConstants with VectorConstants {
105  val io = IO(new DecodeUnitCompIO)
106
107  // alias
108  private val inReady = io.in.ready
109  private val inValid = io.in.valid
110  private val inDecodedInst = WireInit(io.in.bits.simpleDecodedInst)
111  private val inInstFields = io.in.bits.simpleDecodedInst.instr.asTypeOf(new XSInstBitFields)
112  private val inUopInfo = io.in.bits.uopInfo
113  private val outValids = io.out.complexDecodedInsts.map(_.valid)
114  private val outReadys = io.out.complexDecodedInsts.map(_.ready)
115  private val outDecodedInsts = io.out.complexDecodedInsts.map(_.bits)
116  private val outComplexNum = io.complexNum
117
118  val maxUopSize = MaxUopSize
119  when (io.in.fire && io.in.bits.simpleDecodedInst.isVset) {
120    when(inInstFields.RD === 0.U && inInstFields.RS1 === 0.U) {
121      inDecodedInst.fuOpType := VSETOpType.keepVl(io.in.bits.simpleDecodedInst.fuOpType)
122    }.elsewhen(inInstFields.RS1 === 0.U) {
123      inDecodedInst.fuOpType := VSETOpType.setVlmax(io.in.bits.simpleDecodedInst.fuOpType)
124    }
125  }
126
127  val latchedInst = RegEnable(inDecodedInst, inValid && inReady)
128  val latchedUopInfo = RegEnable(inUopInfo, inValid && inReady)
129  //input bits
130  private val instFields: XSInstBitFields = latchedInst.instr.asTypeOf(new XSInstBitFields)
131
132  val src1 = Cat(0.U(1.W), instFields.RS1)
133  val src2 = Cat(0.U(1.W), instFields.RS2)
134  val dest = Cat(0.U(1.W), instFields.RD)
135
136  val nf    = instFields.NF
137  val width = instFields.WIDTH(1, 0)
138
139  //output of DecodeUnit
140  val numOfUop = Wire(UInt(log2Up(maxUopSize).W))
141  val numOfWB = Wire(UInt(log2Up(maxUopSize).W))
142  val lmul = Wire(UInt(4.W))
143  val isVsetSimple = Wire(Bool())
144
145  val indexedLSRegOffset = Seq.tabulate(MAX_VLMUL)(i => Module(new indexedLSUopTable(i)))
146  indexedLSRegOffset.map(_.src := 0.U)
147
148  //pre decode
149  lmul := latchedUopInfo.lmul
150  isVsetSimple := latchedInst.isVset
151  val vlmulReg = latchedInst.vpu.vlmul
152  val vsewReg = latchedInst.vpu.vsew
153
154  //Type of uop Div
155  val typeOfSplit = latchedInst.uopSplitType
156  val src1Type = latchedInst.srcType(0)
157  val src1IsImm = src1Type === SrcType.imm
158  val src1IsFp = src1Type === SrcType.fp
159
160  numOfUop := latchedUopInfo.numOfUop
161  numOfWB := latchedUopInfo.numOfWB
162
163  //uops dispatch
164  val s_idle :: s_active :: Nil = Enum(2)
165  val state = RegInit(s_idle)
166  val stateNext = WireDefault(state)
167  val numDecodedUop = RegInit(0.U(log2Up(maxUopSize).W))
168  val uopRes = RegInit(0.U(log2Up(maxUopSize).W))
169  val uopResNext = WireInit(uopRes)
170  val e64 = 3.U(2.W)
171  val isUsSegment = instFields.MOP === 0.U && nf =/= 0.U && (instFields.LUMOP === 0.U || instFields.LUMOP === "b10000".U)
172  val isIxSegment = instFields.MOP(0) === 1.U && nf =/= 0.U
173  val isSdSegment = instFields.MOP === "b10".U && nf =/= 0.U
174
175  //uop div up to maxUopSize
176  val csBundle = Wire(Vec(maxUopSize, new DecodedInst))
177  csBundle.foreach { case dst =>
178    dst := latchedInst
179    dst.numUops := latchedUopInfo.numOfUop
180    dst.numWB := latchedUopInfo.numOfWB
181    dst.firstUop := false.B
182    dst.lastUop := false.B
183    dst.vlsInstr := false.B
184  }
185
186  csBundle(0).firstUop := true.B
187  csBundle(numOfUop - 1.U).lastUop := true.B
188
189  switch(typeOfSplit) {
190    is(UopSplitType.VSET) {
191      // In simple decoder, rfWen and vecWen are not set
192      when(isVsetSimple) {
193        // Default
194        // uop0 set rd, never flushPipe
195        csBundle(0).fuType := FuType.vsetiwi.U
196        csBundle(0).flushPipe := false.B
197        csBundle(0).rfWen := true.B
198        // uop1 set vl, vsetvl will flushPipe
199        csBundle(1).ldest := VCONFIG_IDX.U
200        csBundle(1).vecWen := true.B
201        when(VSETOpType.isVsetvli(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
202          // write nothing, uop0 is a nop instruction
203          csBundle(0).rfWen := false.B
204          csBundle(0).fpWen := false.B
205          csBundle(0).vecWen := false.B
206          csBundle(1).fuType := FuType.vsetfwf.U
207          csBundle(1).srcType(0) := SrcType.vp
208          csBundle(1).lsrc(0) := VCONFIG_IDX.U
209        }.elsewhen(VSETOpType.isVsetvl(latchedInst.fuOpType) && dest === 0.U && src1 === 0.U) {
210          // uop0: mv vtype gpr to vector region
211          csBundle(0).srcType(0) := SrcType.xp
212          csBundle(0).srcType(1) := SrcType.no
213          csBundle(0).lsrc(1) := 0.U
214          csBundle(0).ldest := FP_TMP_REG_MV.U
215          csBundle(0).fuType := FuType.i2v.U
216          csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
217          csBundle(0).rfWen := false.B
218          csBundle(0).fpWen := true.B
219          csBundle(0).vecWen := false.B
220          csBundle(0).flushPipe := false.B
221          // uop1: uvsetvcfg_vv
222          csBundle(1).fuType := FuType.vsetfwf.U
223          // vl
224          csBundle(1).srcType(0) := SrcType.vp
225          csBundle(1).lsrc(0) := VCONFIG_IDX.U
226          // vtype
227          csBundle(1).srcType(1) := SrcType.fp
228          csBundle(1).lsrc(1) := FP_TMP_REG_MV.U
229          csBundle(1).vecWen := true.B
230          csBundle(1).ldest := VCONFIG_IDX.U
231        }
232        // use bypass vtype from vtypeGen
233        csBundle(0).vpu.connectVType(io.vtypeBypass)
234        csBundle(1).vpu.connectVType(io.vtypeBypass)
235      }
236    }
237    is(UopSplitType.VEC_VVV) {
238      for (i <- 0 until MAX_VLMUL) {
239        csBundle(i).lsrc(0) := src1 + i.U
240        csBundle(i).lsrc(1) := src2 + i.U
241        csBundle(i).lsrc(2) := dest + i.U
242        csBundle(i).ldest := dest + i.U
243        csBundle(i).uopIdx := i.U
244      }
245    }
246    is(UopSplitType.VEC_VFV) {
247      /*
248      i to vector move
249       */
250      csBundle(0).srcType(0) := SrcType.fp
251      csBundle(0).srcType(1) := SrcType.imm
252      csBundle(0).lsrc(1) := 0.U
253      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
254      csBundle(0).fuType := FuType.f2v.U
255      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
256      csBundle(0).vecWen := true.B
257      csBundle(0).vpu.isReverse := false.B
258      /*
259      LMUL
260       */
261      for (i <- 0 until MAX_VLMUL) {
262        csBundle(i + 1).srcType(0) := SrcType.vp
263        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
264        csBundle(i + 1).lsrc(1) := src2 + i.U
265        csBundle(i + 1).lsrc(2) := dest + i.U
266        csBundle(i + 1).ldest := dest + i.U
267        csBundle(i + 1).uopIdx := i.U
268      }
269    }
270    is(UopSplitType.VEC_EXT2) {
271      for (i <- 0 until MAX_VLMUL / 2) {
272        csBundle(2 * i).lsrc(1) := src2 + i.U
273        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
274        csBundle(2 * i).ldest := dest + (2 * i).U
275        csBundle(2 * i).uopIdx := (2 * i).U
276        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
277        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
278        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
279        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
280      }
281    }
282    is(UopSplitType.VEC_EXT4) {
283      for (i <- 0 until MAX_VLMUL / 4) {
284        csBundle(4 * i).lsrc(1) := src2 + i.U
285        csBundle(4 * i).lsrc(2) := dest + (4 * i).U
286        csBundle(4 * i).ldest := dest + (4 * i).U
287        csBundle(4 * i).uopIdx := (4 * i).U
288        csBundle(4 * i + 1).lsrc(1) := src2 + i.U
289        csBundle(4 * i + 1).lsrc(2) := dest + (4 * i + 1).U
290        csBundle(4 * i + 1).ldest := dest + (4 * i + 1).U
291        csBundle(4 * i + 1).uopIdx := (4 * i + 1).U
292        csBundle(4 * i + 2).lsrc(1) := src2 + i.U
293        csBundle(4 * i + 2).lsrc(2) := dest + (4 * i + 2).U
294        csBundle(4 * i + 2).ldest := dest + (4 * i + 2).U
295        csBundle(4 * i + 2).uopIdx := (4 * i + 2).U
296        csBundle(4 * i + 3).lsrc(1) := src2 + i.U
297        csBundle(4 * i + 3).lsrc(2) := dest + (4 * i + 3).U
298        csBundle(4 * i + 3).ldest := dest + (4 * i + 3).U
299        csBundle(4 * i + 3).uopIdx := (4 * i + 3).U
300      }
301    }
302    is(UopSplitType.VEC_EXT8) {
303      for (i <- 0 until MAX_VLMUL) {
304        csBundle(i).lsrc(1) := src2
305        csBundle(i).lsrc(2) := dest + i.U
306        csBundle(i).ldest := dest + i.U
307        csBundle(i).uopIdx := i.U
308      }
309    }
310    is(UopSplitType.VEC_0XV) {
311      /*
312      i/f to vector move
313       */
314      csBundle(0).srcType(0) := Mux(src1IsFp, SrcType.fp, SrcType.reg)
315      csBundle(0).srcType(1) := SrcType.imm
316      csBundle(0).lsrc(1) := 0.U
317      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
318      csBundle(0).fuType := Mux(src1IsFp, FuType.f2v.U, FuType.i2v.U)
319      csBundle(0).fuOpType := Cat(Mux(src1IsFp, IF2VectorType.fDup2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
320      csBundle(0).rfWen := false.B
321      csBundle(0).fpWen := false.B
322      csBundle(0).vecWen := true.B
323      /*
324      vmv.s.x
325       */
326      csBundle(1).srcType(0) := SrcType.vp
327      csBundle(1).srcType(1) := SrcType.imm
328      csBundle(1).srcType(2) := SrcType.vp
329      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
330      csBundle(1).lsrc(1) := 0.U
331      csBundle(1).lsrc(2) := dest
332      csBundle(1).ldest := dest
333      csBundle(1).rfWen := false.B
334      csBundle(1).fpWen := false.B
335      csBundle(1).vecWen := true.B
336      csBundle(1).uopIdx := 0.U
337    }
338    is(UopSplitType.VEC_VXV) {
339      /*
340      i to vector move
341       */
342      csBundle(0).srcType(0) := SrcType.reg
343      csBundle(0).srcType(1) := SrcType.imm
344      csBundle(0).lsrc(1) := 0.U
345      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
346      csBundle(0).fuType := FuType.i2v.U
347      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
348      csBundle(0).vecWen := true.B
349      csBundle(0).vpu.isReverse := false.B
350      /*
351      LMUL
352       */
353      for (i <- 0 until MAX_VLMUL) {
354        csBundle(i + 1).srcType(0) := SrcType.vp
355        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
356        csBundle(i + 1).lsrc(1) := src2 + i.U
357        csBundle(i + 1).lsrc(2) := dest + i.U
358        csBundle(i + 1).ldest := dest + i.U
359        csBundle(i + 1).uopIdx := i.U
360      }
361    }
362    is(UopSplitType.VEC_VVW) {
363      for (i <- 0 until MAX_VLMUL / 2) {
364        csBundle(2 * i).lsrc(0) := src1 + i.U
365        csBundle(2 * i).lsrc(1) := src2 + i.U
366        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
367        csBundle(2 * i).ldest := dest + (2 * i).U
368        csBundle(2 * i).uopIdx := (2 * i).U
369        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
370        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
371        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
372        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
373        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
374      }
375    }
376    is(UopSplitType.VEC_VFW) {
377      /*
378      f to vector move
379       */
380      csBundle(0).srcType(0) := SrcType.fp
381      csBundle(0).srcType(1) := SrcType.imm
382      csBundle(0).lsrc(1) := 0.U
383      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
384      csBundle(0).fuType := FuType.f2v.U
385      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
386      csBundle(0).rfWen := false.B
387      csBundle(0).fpWen := false.B
388      csBundle(0).vecWen := true.B
389
390      for (i <- 0 until MAX_VLMUL / 2) {
391        csBundle(2 * i + 1).srcType(0) := SrcType.vp
392        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
393        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
394        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
395        csBundle(2 * i + 1).ldest := dest + (2 * i).U
396        csBundle(2 * i + 1).uopIdx := (2 * i).U
397        csBundle(2 * i + 2).srcType(0) := SrcType.vp
398        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
399        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
400        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
401        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
402        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
403      }
404    }
405    is(UopSplitType.VEC_WVW) {
406      for (i <- 0 until MAX_VLMUL / 2) {
407        csBundle(2 * i).lsrc(0) := src1 + i.U
408        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
409        csBundle(2 * i).lsrc(2) := dest + (2 * i).U
410        csBundle(2 * i).ldest := dest + (2 * i).U
411        csBundle(2 * i).uopIdx := (2 * i).U
412        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
413        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
414        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i + 1).U
415        csBundle(2 * i + 1).ldest := dest + (2 * i + 1).U
416        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
417      }
418    }
419    is(UopSplitType.VEC_VXW) {
420      /*
421      i to vector move
422       */
423      csBundle(0).srcType(0) := SrcType.reg
424      csBundle(0).srcType(1) := SrcType.imm
425      csBundle(0).lsrc(1) := 0.U
426      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
427      csBundle(0).fuType := FuType.i2v.U
428      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
429      csBundle(0).vecWen := true.B
430
431      for (i <- 0 until MAX_VLMUL / 2) {
432        csBundle(2 * i + 1).srcType(0) := SrcType.vp
433        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
434        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
435        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
436        csBundle(2 * i + 1).ldest := dest + (2 * i).U
437        csBundle(2 * i + 1).uopIdx := (2 * i).U
438        csBundle(2 * i + 2).srcType(0) := SrcType.vp
439        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
440        csBundle(2 * i + 2).lsrc(1) := src2 + i.U
441        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
442        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
443        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
444      }
445    }
446    is(UopSplitType.VEC_WXW) {
447      /*
448      i to vector move
449       */
450      csBundle(0).srcType(0) := SrcType.reg
451      csBundle(0).srcType(1) := SrcType.imm
452      csBundle(0).lsrc(1) := 0.U
453      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
454      csBundle(0).fuType := FuType.i2v.U
455      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
456      csBundle(0).vecWen := true.B
457
458      for (i <- 0 until MAX_VLMUL / 2) {
459        csBundle(2 * i + 1).srcType(0) := SrcType.vp
460        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
461        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
462        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
463        csBundle(2 * i + 1).ldest := dest + (2 * i).U
464        csBundle(2 * i + 1).uopIdx := (2 * i).U
465        csBundle(2 * i + 2).srcType(0) := SrcType.vp
466        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
467        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
468        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
469        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
470        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
471      }
472    }
473    is(UopSplitType.VEC_WVV) {
474      for (i <- 0 until MAX_VLMUL / 2) {
475
476        csBundle(2 * i).lsrc(0) := src1 + i.U
477        csBundle(2 * i).lsrc(1) := src2 + (2 * i).U
478        csBundle(2 * i).lsrc(2) := dest + i.U
479        csBundle(2 * i).ldest := dest + i.U
480        csBundle(2 * i).uopIdx := (2 * i).U
481        csBundle(2 * i + 1).lsrc(0) := src1 + i.U
482        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i + 1).U
483        csBundle(2 * i + 1).lsrc(2) := dest + i.U
484        csBundle(2 * i + 1).ldest := dest + i.U
485        csBundle(2 * i + 1).uopIdx := (2 * i + 1).U
486      }
487    }
488    is(UopSplitType.VEC_WFW) {
489      /*
490      f to vector move
491       */
492      csBundle(0).srcType(0) := SrcType.fp
493      csBundle(0).srcType(1) := SrcType.imm
494      csBundle(0).lsrc(1) := 0.U
495      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
496      csBundle(0).fuType := FuType.f2v.U
497      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
498      csBundle(0).rfWen := false.B
499      csBundle(0).fpWen := false.B
500      csBundle(0).vecWen := true.B
501
502      for (i <- 0 until MAX_VLMUL / 2) {
503        csBundle(2 * i + 1).srcType(0) := SrcType.vp
504        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
505        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
506        csBundle(2 * i + 1).lsrc(2) := dest + (2 * i).U
507        csBundle(2 * i + 1).ldest := dest + (2 * i).U
508        csBundle(2 * i + 1).uopIdx := (2 * i).U
509        csBundle(2 * i + 2).srcType(0) := SrcType.vp
510        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
511        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
512        csBundle(2 * i + 2).lsrc(2) := dest + (2 * i + 1).U
513        csBundle(2 * i + 2).ldest := dest + (2 * i + 1).U
514        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
515      }
516    }
517    is(UopSplitType.VEC_WXV) {
518      /*
519      i to vector move
520       */
521      csBundle(0).srcType(0) := SrcType.reg
522      csBundle(0).srcType(1) := SrcType.imm
523      csBundle(0).lsrc(1) := 0.U
524      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
525      csBundle(0).fuType := FuType.i2v.U
526      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
527      csBundle(0).vecWen := true.B
528
529      for (i <- 0 until MAX_VLMUL / 2) {
530        csBundle(2 * i + 1).srcType(0) := SrcType.vp
531        csBundle(2 * i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
532        csBundle(2 * i + 1).lsrc(1) := src2 + (2 * i).U
533        csBundle(2 * i + 1).lsrc(2) := dest + i.U
534        csBundle(2 * i + 1).ldest := dest + i.U
535        csBundle(2 * i + 1).uopIdx := (2 * i).U
536        csBundle(2 * i + 2).srcType(0) := SrcType.vp
537        csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
538        csBundle(2 * i + 2).lsrc(1) := src2 + (2 * i + 1).U
539        csBundle(2 * i + 2).lsrc(2) := dest + i.U
540        csBundle(2 * i + 2).ldest := dest + i.U
541        csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
542      }
543    }
544    is(UopSplitType.VEC_VVM) {
545      csBundle(0).lsrc(2) := dest
546      csBundle(0).ldest := dest
547      csBundle(0).uopIdx := 0.U
548      for (i <- 1 until MAX_VLMUL) {
549        csBundle(i).lsrc(0) := src1 + i.U
550        csBundle(i).lsrc(1) := src2 + i.U
551        csBundle(i).lsrc(2) := dest
552        csBundle(i).ldest := dest
553        csBundle(i).uopIdx := i.U
554      }
555    }
556    is(UopSplitType.VEC_VFM) {
557      /*
558      f to vector move
559       */
560      csBundle(0).srcType(0) := SrcType.fp
561      csBundle(0).srcType(1) := SrcType.imm
562      csBundle(0).lsrc(1) := 0.U
563      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
564      csBundle(0).fuType := FuType.f2v.U
565      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
566      csBundle(0).rfWen := false.B
567      csBundle(0).fpWen := false.B
568      csBundle(0).vecWen := true.B
569      //LMUL
570      csBundle(1).srcType(0) := SrcType.vp
571      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
572      csBundle(1).lsrc(2) := dest
573      csBundle(1).ldest := dest
574      csBundle(1).uopIdx := 0.U
575      for (i <- 1 until MAX_VLMUL) {
576        csBundle(i + 1).srcType(0) := SrcType.vp
577        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
578        csBundle(i + 1).lsrc(1) := src2 + i.U
579        csBundle(i + 1).lsrc(2) := dest
580        csBundle(i + 1).ldest := dest
581        csBundle(i + 1).uopIdx := i.U
582      }
583      csBundle(numOfUop - 1.U).ldest := dest
584    }
585    is(UopSplitType.VEC_VXM) {
586      /*
587      i to vector move
588       */
589      csBundle(0).srcType(0) := SrcType.reg
590      csBundle(0).srcType(1) := SrcType.imm
591      csBundle(0).lsrc(1) := 0.U
592      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
593      csBundle(0).fuType := FuType.i2v.U
594      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.immDup2Vec(2, 0), IF2VectorType.iDup2Vec(2, 0)), vsewReg)
595      csBundle(0).vecWen := true.B
596      //LMUL
597      csBundle(1).srcType(0) := SrcType.vp
598      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
599      csBundle(1).lsrc(2) := dest
600      csBundle(1).ldest := dest
601      csBundle(1).uopIdx := 0.U
602      for (i <- 1 until MAX_VLMUL) {
603        csBundle(i + 1).srcType(0) := SrcType.vp
604        csBundle(i + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
605        csBundle(i + 1).lsrc(1) := src2 + i.U
606        csBundle(i + 1).lsrc(2) := dest
607        csBundle(i + 1).ldest := dest
608        csBundle(i + 1).uopIdx := i.U
609      }
610      csBundle(numOfUop - 1.U).ldest := dest
611    }
612    is(UopSplitType.VEC_SLIDE1UP) {
613      /*
614      i to vector move
615       */
616      csBundle(0).srcType(0) := SrcType.reg
617      csBundle(0).srcType(1) := SrcType.imm
618      csBundle(0).lsrc(1) := 0.U
619      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
620      csBundle(0).fuType := FuType.i2v.U
621      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
622      csBundle(0).vecWen := true.B
623      //LMUL
624      csBundle(1).srcType(0) := SrcType.vp
625      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
626      csBundle(1).lsrc(2) := dest
627      csBundle(1).ldest := dest
628      csBundle(1).uopIdx := 0.U
629      for (i <- 1 until MAX_VLMUL) {
630        csBundle(i + 1).srcType(0) := SrcType.vp
631        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
632        csBundle(i + 1).lsrc(1) := src2 + i.U
633        csBundle(i + 1).lsrc(2) := dest + i.U
634        csBundle(i + 1).ldest := dest + i.U
635        csBundle(i + 1).uopIdx := i.U
636      }
637    }
638    is(UopSplitType.VEC_FSLIDE1UP) {
639      /*
640      i to vector move
641       */
642      csBundle(0).srcType(0) := SrcType.fp
643      csBundle(0).srcType(1) := SrcType.imm
644      csBundle(0).lsrc(1) := 0.U
645      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
646      csBundle(0).fuType := FuType.f2v.U
647      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
648      csBundle(0).rfWen := false.B
649      csBundle(0).fpWen := false.B
650      csBundle(0).vecWen := true.B
651      //LMUL
652      csBundle(1).srcType(0) := SrcType.vp
653      csBundle(1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
654      csBundle(1).lsrc(1) := src2
655      csBundle(1).lsrc(2) := dest
656      csBundle(1).ldest := dest
657      csBundle(1).uopIdx := 0.U
658      for (i <- 1 until MAX_VLMUL) {
659        csBundle(i + 1).srcType(0) := SrcType.vp
660        csBundle(i + 1).lsrc(0) := src2 + (i - 1).U
661        csBundle(i + 1).lsrc(1) := src2 + i.U
662        csBundle(i + 1).lsrc(2) := dest + i.U
663        csBundle(i + 1).ldest := dest + i.U
664        csBundle(i + 1).uopIdx := i.U
665      }
666    }
667    is(UopSplitType.VEC_SLIDE1DOWN) { // lmul+lmul = 16
668      /*
669      i to vector move
670       */
671      csBundle(0).srcType(0) := SrcType.reg
672      csBundle(0).srcType(1) := SrcType.imm
673      csBundle(0).lsrc(1) := 0.U
674      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
675      csBundle(0).fuType := FuType.i2v.U
676      csBundle(0).fuOpType := Cat(IF2VectorType.iDup2Vec(2, 0), vsewReg)
677      csBundle(0).vecWen := true.B
678      //LMUL
679      for (i <- 0 until MAX_VLMUL) {
680        csBundle(2 * i + 1).srcType(0) := SrcType.vp
681        csBundle(2 * i + 1).srcType(1) := SrcType.vp
682        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
683        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
684        csBundle(2 * i + 1).lsrc(2) := dest + i.U
685        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
686        csBundle(2 * i + 1).uopIdx := (2 * i).U
687        if (2 * i + 2 < MAX_VLMUL * 2) {
688          csBundle(2 * i + 2).srcType(0) := SrcType.vp
689          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
690          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
691          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
692          csBundle(2 * i + 2).ldest := dest + i.U
693          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
694        }
695      }
696      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
697      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
698      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
699    }
700    is(UopSplitType.VEC_FSLIDE1DOWN) {
701      /*
702      i to vector move
703       */
704      csBundle(0).srcType(0) := SrcType.fp
705      csBundle(0).srcType(1) := SrcType.imm
706      csBundle(0).lsrc(1) := 0.U
707      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
708      csBundle(0).fuType := FuType.f2v.U
709      csBundle(0).fuOpType := Cat(IF2VectorType.fDup2Vec(2, 0), vsewReg)
710      csBundle(0).rfWen := false.B
711      csBundle(0).fpWen := false.B
712      csBundle(0).vecWen := true.B
713      //LMUL
714      for (i <- 0 until MAX_VLMUL) {
715        csBundle(2 * i + 1).srcType(0) := SrcType.vp
716        csBundle(2 * i + 1).srcType(1) := SrcType.vp
717        csBundle(2 * i + 1).lsrc(0) := src2 + (i + 1).U
718        csBundle(2 * i + 1).lsrc(1) := src2 + i.U
719        csBundle(2 * i + 1).lsrc(2) := dest + i.U
720        csBundle(2 * i + 1).ldest := VECTOR_TMP_REG_LMUL.U + 1.U
721        csBundle(2 * i + 1).uopIdx := (2 * i).U
722        if (2 * i + 2 < MAX_VLMUL * 2) {
723          csBundle(2 * i + 2).srcType(0) := SrcType.vp
724          csBundle(2 * i + 2).lsrc(0) := VECTOR_TMP_REG_LMUL.U
725          // csBundle(2 * i + 2).lsrc(1) := src2 + i.U         // DontCare
726          csBundle(2 * i + 2).lsrc(2) := VECTOR_TMP_REG_LMUL.U + 1.U
727          csBundle(2 * i + 2).ldest := dest + i.U
728          csBundle(2 * i + 2).uopIdx := (2 * i + 1).U
729        }
730      }
731      csBundle(numOfUop - 1.U).srcType(0) := SrcType.vp
732      csBundle(numOfUop - 1.U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
733      csBundle(numOfUop - 1.U).ldest := dest + lmul - 1.U
734    }
735    is(UopSplitType.VEC_VRED) {
736      when(vlmulReg === "b001".U) {
737        csBundle(0).srcType(2) := SrcType.DC
738        csBundle(0).lsrc(0) := src2 + 1.U
739        csBundle(0).lsrc(1) := src2
740        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
741        csBundle(0).uopIdx := 0.U
742      }
743      when(vlmulReg === "b010".U) {
744        csBundle(0).srcType(2) := SrcType.DC
745        csBundle(0).lsrc(0) := src2 + 1.U
746        csBundle(0).lsrc(1) := src2
747        csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
748        csBundle(0).uopIdx := 0.U
749
750        csBundle(1).srcType(2) := SrcType.DC
751        csBundle(1).lsrc(0) := src2 + 3.U
752        csBundle(1).lsrc(1) := src2 + 2.U
753        csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
754        csBundle(1).uopIdx := 1.U
755
756        csBundle(2).srcType(2) := SrcType.DC
757        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
758        csBundle(2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
759        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
760        csBundle(2).uopIdx := 2.U
761      }
762      when(vlmulReg === "b011".U) {
763        for (i <- 0 until MAX_VLMUL) {
764          if (i < MAX_VLMUL - MAX_VLMUL / 2) {
765            csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
766            csBundle(i).lsrc(1) := src2 + (i * 2).U
767            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
768          } else if (i < MAX_VLMUL - MAX_VLMUL / 4) {
769            csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2 + 1).U
770            csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - MAX_VLMUL / 2) * 2).U
771            csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
772          } else if (i < MAX_VLMUL - MAX_VLMUL / 8) {
773            csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
774            csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
775            csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
776          }
777          csBundle(i).srcType(2) := SrcType.DC
778          csBundle(i).uopIdx := i.U
779        }
780      }
781      when(vlmulReg(2) === 0.U && vlmulReg(1, 0).orR) {
782        /*
783         * 2 <= vlmul <= 8
784         */
785        csBundle(numOfUop - 1.U).srcType(2) := SrcType.vp
786        csBundle(numOfUop - 1.U).lsrc(0) := src1
787        csBundle(numOfUop - 1.U).lsrc(1) := VECTOR_TMP_REG_LMUL.U + numOfUop - 2.U
788        csBundle(numOfUop - 1.U).lsrc(2) := dest
789        csBundle(numOfUop - 1.U).ldest := dest
790        csBundle(numOfUop - 1.U).uopIdx := numOfUop - 1.U
791      }
792    }
793    is(UopSplitType.VEC_VFRED) {
794      val vlmul = vlmulReg
795      val vsew = vsewReg
796      when(vlmul === VLmul.m8){
797        for (i <- 0 until 4) {
798          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
799          csBundle(i).lsrc(1) := src2 + (i * 2).U
800          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
801          csBundle(i).uopIdx := i.U
802        }
803        for (i <- 4 until 6) {
804          csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2 + 1).U
805          csBundle(i).lsrc(1) := (VECTOR_TMP_REG_LMUL + (i - 4) * 2).U
806          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
807          csBundle(i).uopIdx := i.U
808        }
809        csBundle(6).lsrc(0) := (VECTOR_TMP_REG_LMUL + 5).U
810        csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
811        csBundle(6).ldest := (VECTOR_TMP_REG_LMUL + 6).U
812        csBundle(6).uopIdx := 6.U
813        when(vsew === VSew.e64) {
814          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
815          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
816          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
817          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
818          csBundle(7).uopIdx := 7.U
819          csBundle(8).lsrc(0) := src1
820          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
821          csBundle(8).ldest := dest
822          csBundle(8).uopIdx := 8.U
823        }
824        when(vsew === VSew.e32) {
825          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
826          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
827          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
828          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
829          csBundle(7).uopIdx := 7.U
830          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
831          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
832          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
833          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
834          csBundle(8).uopIdx := 8.U
835          csBundle(9).lsrc(0) := src1
836          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
837          csBundle(9).ldest := dest
838          csBundle(9).uopIdx := 9.U
839        }
840        when(vsew === VSew.e16) {
841          csBundle(7).lsrc(0) := (VECTOR_TMP_REG_LMUL + 6).U
842          csBundle(7).lsrc(1) := (VECTOR_TMP_REG_LMUL + 6).U
843          csBundle(7).ldest := (VECTOR_TMP_REG_LMUL + 7).U
844          csBundle(7).vpu.fpu.isFoldTo1_2 := true.B
845          csBundle(7).uopIdx := 7.U
846          csBundle(8).lsrc(0) := (VECTOR_TMP_REG_LMUL + 7).U
847          csBundle(8).lsrc(1) := (VECTOR_TMP_REG_LMUL + 7).U
848          csBundle(8).ldest := (VECTOR_TMP_REG_LMUL + 8).U
849          csBundle(8).vpu.fpu.isFoldTo1_4 := true.B
850          csBundle(8).uopIdx := 8.U
851          csBundle(9).lsrc(0) := (VECTOR_TMP_REG_LMUL + 8).U
852          csBundle(9).lsrc(1) := (VECTOR_TMP_REG_LMUL + 8).U
853          csBundle(9).ldest := (VECTOR_TMP_REG_LMUL + 9).U
854          csBundle(9).vpu.fpu.isFoldTo1_8 := true.B
855          csBundle(9).uopIdx := 9.U
856          csBundle(10).lsrc(0) := src1
857          csBundle(10).lsrc(1) := (VECTOR_TMP_REG_LMUL + 9).U
858          csBundle(10).ldest := dest
859          csBundle(10).uopIdx := 10.U
860        }
861      }
862      when(vlmul === VLmul.m4) {
863        for (i <- 0 until 2) {
864          csBundle(i).lsrc(0) := src2 + (i * 2 + 1).U
865          csBundle(i).lsrc(1) := src2 + (i * 2).U
866          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
867          csBundle(i).uopIdx := i.U
868        }
869        csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
870        csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
871        csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
872        csBundle(2).uopIdx := 2.U
873        when(vsew === VSew.e64) {
874          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
875          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
876          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
877          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
878          csBundle(3).uopIdx := 3.U
879          csBundle(4).lsrc(0) := src1
880          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
881          csBundle(4).ldest := dest
882          csBundle(4).uopIdx := 4.U
883        }
884        when(vsew === VSew.e32) {
885          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
886          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
887          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
888          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
889          csBundle(3).uopIdx := 3.U
890          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
891          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
892          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
893          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
894          csBundle(4).uopIdx := 4.U
895          csBundle(5).lsrc(0) := src1
896          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
897          csBundle(5).ldest := dest
898          csBundle(5).uopIdx := 5.U
899        }
900        when(vsew === VSew.e16) {
901          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
902          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
903          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
904          csBundle(3).vpu.fpu.isFoldTo1_2 := true.B
905          csBundle(3).uopIdx := 3.U
906          csBundle(4).lsrc(0) := (VECTOR_TMP_REG_LMUL + 3).U
907          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
908          csBundle(4).ldest := (VECTOR_TMP_REG_LMUL + 4).U
909          csBundle(4).vpu.fpu.isFoldTo1_4 := true.B
910          csBundle(4).uopIdx := 4.U
911          csBundle(5).lsrc(0) := (VECTOR_TMP_REG_LMUL + 4).U
912          csBundle(5).lsrc(1) := (VECTOR_TMP_REG_LMUL + 4).U
913          csBundle(5).ldest := (VECTOR_TMP_REG_LMUL + 5).U
914          csBundle(5).vpu.fpu.isFoldTo1_8 := true.B
915          csBundle(5).uopIdx := 5.U
916          csBundle(6).lsrc(0) := src1
917          csBundle(6).lsrc(1) := (VECTOR_TMP_REG_LMUL + 5).U
918          csBundle(6).ldest := dest
919          csBundle(6).uopIdx := 6.U
920        }
921      }
922      when(vlmul === VLmul.m2) {
923        csBundle(0).lsrc(0) := src2 + 1.U
924        csBundle(0).lsrc(1) := src2 + 0.U
925        csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
926        csBundle(0).uopIdx := 0.U
927        when(vsew === VSew.e64) {
928          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
929          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
930          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
931          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
932          csBundle(1).uopIdx := 1.U
933          csBundle(2).lsrc(0) := src1
934          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
935          csBundle(2).ldest := dest
936          csBundle(2).uopIdx := 2.U
937        }
938        when(vsew === VSew.e32) {
939          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
940          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
941          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
942          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
943          csBundle(1).uopIdx := 1.U
944          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
945          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
946          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
947          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
948          csBundle(2).uopIdx := 2.U
949          csBundle(3).lsrc(0) := src1
950          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
951          csBundle(3).ldest := dest
952          csBundle(3).uopIdx := 3.U
953        }
954        when(vsew === VSew.e16) {
955          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
956          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
957          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
958          csBundle(1).vpu.fpu.isFoldTo1_2 := true.B
959          csBundle(1).uopIdx := 1.U
960          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
961          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
962          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
963          csBundle(2).vpu.fpu.isFoldTo1_4 := true.B
964          csBundle(2).uopIdx := 2.U
965          csBundle(3).lsrc(0) := (VECTOR_TMP_REG_LMUL + 2).U
966          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
967          csBundle(3).ldest := (VECTOR_TMP_REG_LMUL + 3).U
968          csBundle(3).vpu.fpu.isFoldTo1_8 := true.B
969          csBundle(3).uopIdx := 3.U
970          csBundle(4).lsrc(0) := src1
971          csBundle(4).lsrc(1) := (VECTOR_TMP_REG_LMUL + 3).U
972          csBundle(4).ldest := dest
973          csBundle(4).uopIdx := 4.U
974        }
975      }
976      when(vlmul === VLmul.m1) {
977        when(vsew === VSew.e64) {
978          csBundle(0).lsrc(0) := src2
979          csBundle(0).lsrc(1) := src2
980          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
981          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
982          csBundle(0).uopIdx := 0.U
983          csBundle(1).lsrc(0) := src1
984          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
985          csBundle(1).ldest := dest
986          csBundle(1).uopIdx := 1.U
987        }
988        when(vsew === VSew.e32) {
989          csBundle(0).lsrc(0) := src2
990          csBundle(0).lsrc(1) := src2
991          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
992          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
993          csBundle(0).uopIdx := 0.U
994          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
995          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
996          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
997          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
998          csBundle(1).uopIdx := 1.U
999          csBundle(2).lsrc(0) := src1
1000          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1001          csBundle(2).ldest := dest
1002          csBundle(2).uopIdx := 2.U
1003        }
1004        when(vsew === VSew.e16) {
1005          csBundle(0).lsrc(0) := src2
1006          csBundle(0).lsrc(1) := src2
1007          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1008          csBundle(0).vpu.fpu.isFoldTo1_2 := true.B
1009          csBundle(0).uopIdx := 0.U
1010          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1011          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1012          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1013          csBundle(1).vpu.fpu.isFoldTo1_4 := true.B
1014          csBundle(1).uopIdx := 1.U
1015          csBundle(2).lsrc(0) := (VECTOR_TMP_REG_LMUL + 1).U
1016          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1017          csBundle(2).ldest := (VECTOR_TMP_REG_LMUL + 2).U
1018          csBundle(2).vpu.fpu.isFoldTo1_8 := true.B
1019          csBundle(2).uopIdx := 2.U
1020          csBundle(3).lsrc(0) := src1
1021          csBundle(3).lsrc(1) := (VECTOR_TMP_REG_LMUL + 2).U
1022          csBundle(3).ldest := dest
1023          csBundle(3).uopIdx := 3.U
1024        }
1025      }
1026      when(vlmul === VLmul.mf2) {
1027        when(vsew === VSew.e32) {
1028          csBundle(0).lsrc(0) := src2
1029          csBundle(0).lsrc(1) := src2
1030          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1031          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1032          csBundle(0).uopIdx := 0.U
1033          csBundle(1).lsrc(0) := src1
1034          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1035          csBundle(1).ldest := dest
1036          csBundle(1).uopIdx := 1.U
1037        }
1038        when(vsew === VSew.e16) {
1039          csBundle(0).lsrc(0) := src2
1040          csBundle(0).lsrc(1) := src2
1041          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1042          csBundle(0).vpu.fpu.isFoldTo1_4 := true.B
1043          csBundle(0).uopIdx := 0.U
1044          csBundle(1).lsrc(0) := (VECTOR_TMP_REG_LMUL + 0).U
1045          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1046          csBundle(1).ldest := (VECTOR_TMP_REG_LMUL + 1).U
1047          csBundle(1).vpu.fpu.isFoldTo1_8 := true.B
1048          csBundle(1).uopIdx := 1.U
1049          csBundle(2).lsrc(0) := src1
1050          csBundle(2).lsrc(1) := (VECTOR_TMP_REG_LMUL + 1).U
1051          csBundle(2).ldest := dest
1052          csBundle(2).uopIdx := 2.U
1053        }
1054      }
1055      when(vlmul === VLmul.mf4) {
1056        when(vsew === VSew.e16) {
1057          csBundle(0).lsrc(0) := src2
1058          csBundle(0).lsrc(1) := src2
1059          csBundle(0).ldest := (VECTOR_TMP_REG_LMUL + 0).U
1060          csBundle(0).vpu.fpu.isFoldTo1_8 := true.B
1061          csBundle(0).uopIdx := 0.U
1062          csBundle(1).lsrc(0) := src1
1063          csBundle(1).lsrc(1) := (VECTOR_TMP_REG_LMUL + 0).U
1064          csBundle(1).ldest := dest
1065          csBundle(1).uopIdx := 1.U
1066        }
1067      }
1068    }
1069
1070    is(UopSplitType.VEC_VFREDOSUM) {
1071      import yunsuan.VfaluType
1072      val vlmul = vlmulReg
1073      val vsew = vsewReg
1074      val isWiden = latchedInst.fuOpType === VfaluType.vfwredosum
1075      when(vlmul === VLmul.m8) {
1076        when(vsew === VSew.e64) {
1077          val vlmax = 16
1078          for (i <- 0 until vlmax) {
1079            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1080            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1081            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1082            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1083            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1084            csBundle(i).uopIdx := i.U
1085          }
1086        }
1087        when(vsew === VSew.e32) {
1088          val vlmax = 32
1089          for (i <- 0 until vlmax) {
1090            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1091            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1092            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1093            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1094            csBundle(i).vpu.fpu.isFoldTo1_4 := (if (i % 4 == 0) false.B else true.B)
1095            csBundle(i).uopIdx := i.U
1096          }
1097        }
1098        when(vsew === VSew.e16) {
1099          val vlmax = 64
1100          for (i <- 0 until vlmax) {
1101            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1102            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1103            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1104            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1105            csBundle(i).vpu.fpu.isFoldTo1_8 := (if (i % 8 == 0) false.B else true.B)
1106            csBundle(i).uopIdx := i.U
1107          }
1108        }
1109      }
1110      when(vlmul === VLmul.m4) {
1111        when(vsew === VSew.e64) {
1112          val vlmax = 8
1113          for (i <- 0 until vlmax) {
1114            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1115            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1116            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1117            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1118            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1119            csBundle(i).uopIdx := i.U
1120          }
1121        }
1122        when(vsew === VSew.e32) {
1123          val vlmax = 16
1124          for (i <- 0 until vlmax) {
1125            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1126            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1127            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1128            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1129            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1130            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1131            csBundle(i).uopIdx := i.U
1132          }
1133        }
1134        when(vsew === VSew.e16) {
1135          val vlmax = 32
1136          for (i <- 0 until vlmax) {
1137            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1138            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1139            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1140            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1141            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1142            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1143            csBundle(i).uopIdx := i.U
1144          }
1145        }
1146      }
1147      when(vlmul === VLmul.m2) {
1148        when(vsew === VSew.e64) {
1149          val vlmax = 4
1150          for (i <- 0 until vlmax) {
1151            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1152            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1153            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1154            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1155            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1156            csBundle(i).uopIdx := i.U
1157          }
1158        }
1159        when(vsew === VSew.e32) {
1160          val vlmax = 8
1161          for (i <- 0 until vlmax) {
1162            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1163            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1164            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1165            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1166            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1167            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1168            csBundle(i).uopIdx := i.U
1169          }
1170        }
1171        when(vsew === VSew.e16) {
1172          val vlmax = 16
1173          for (i <- 0 until vlmax) {
1174            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1175            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1176            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1177            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1178            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1179            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1180            csBundle(i).uopIdx := i.U
1181          }
1182        }
1183      }
1184      when(vlmul === VLmul.m1) {
1185        when(vsew === VSew.e64) {
1186          val vlmax = 2
1187          for (i <- 0 until vlmax) {
1188            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1189            csBundle(i).lsrc(1) := (if (i % 2 == 0) src2 + (i/2).U else VECTOR_TMP_REG_LMUL.U)
1190            csBundle(i).lsrc(2) := (if (i % 2 == 0) src2 + (i/2).U else if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1191            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1192            csBundle(i).vpu.fpu.isFoldTo1_2 := (if (i % 2 == 0) false.B else true.B)
1193            csBundle(i).uopIdx := i.U
1194          }
1195        }
1196        when(vsew === VSew.e32) {
1197          val vlmax = 4
1198          for (i <- 0 until vlmax) {
1199            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1200            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1201            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1202            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1203            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1204            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1205            csBundle(i).uopIdx := i.U
1206          }
1207        }
1208        when(vsew === VSew.e16) {
1209          val vlmax = 8
1210          for (i <- 0 until vlmax) {
1211            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1212            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1213            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1214            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1215            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1216            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1217            csBundle(i).uopIdx := i.U
1218          }
1219        }
1220      }
1221      when(vlmul === VLmul.mf2) {
1222        when(vsew === VSew.e32) {
1223          val vlmax = 2
1224          for (i <- 0 until vlmax) {
1225            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1226            csBundle(i).lsrc(1) := (if (i % 4 == 0) src2 + (i/4).U else VECTOR_TMP_REG_LMUL.U)
1227            csBundle(i).lsrc(2) := (if (i % 4 == 0) src2 + (i/4).U else if (i == vlmax - 1) dest else if (i % 4 == 1) Mux(isWiden, src2 + (i/4).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1228            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1229            csBundle(i).vpu.fpu.isFoldTo1_2 := isWiden && (if (i % 4 == 0) false.B else true.B)
1230            csBundle(i).vpu.fpu.isFoldTo1_4 := !isWiden && (if (i % 4 == 0) false.B else true.B)
1231            csBundle(i).uopIdx := i.U
1232          }
1233        }
1234        when(vsew === VSew.e16) {
1235          val vlmax = 4
1236          for (i <- 0 until vlmax) {
1237            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1238            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1239            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1240            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1241            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1242            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1243            csBundle(i).uopIdx := i.U
1244          }
1245        }
1246      }
1247      when(vlmul === VLmul.mf4) {
1248        when(vsew === VSew.e16) {
1249          val vlmax = 2
1250          for (i <- 0 until vlmax) {
1251            csBundle(i).lsrc(0) := (if (i == 0) src1 else VECTOR_TMP_REG_LMUL.U)
1252            csBundle(i).lsrc(1) := (if (i % 8 == 0) src2 + (i/8).U else VECTOR_TMP_REG_LMUL.U)
1253            csBundle(i).lsrc(2) := (if (i % 8 == 0) src2 + (i/8).U else if (i == vlmax - 1) dest else if (i % 8 == 1) Mux(isWiden, src2 + (i/8).U, VECTOR_TMP_REG_LMUL.U) else VECTOR_TMP_REG_LMUL.U)
1254            csBundle(i).ldest := (if (i == vlmax - 1) dest else VECTOR_TMP_REG_LMUL.U)
1255            csBundle(i).vpu.fpu.isFoldTo1_4 := isWiden && (if (i % 8 == 0) false.B else true.B)
1256            csBundle(i).vpu.fpu.isFoldTo1_8 := !isWiden && (if (i % 8 == 0) false.B else true.B)
1257            csBundle(i).uopIdx := i.U
1258          }
1259        }
1260      }
1261    }
1262
1263    is(UopSplitType.VEC_SLIDEUP) {
1264      // i to vector move
1265      csBundle(0).srcType(0) := SrcType.reg
1266      csBundle(0).srcType(1) := SrcType.imm
1267      csBundle(0).lsrc(1) := 0.U
1268      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1269      csBundle(0).fuType := FuType.i2v.U
1270      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1271      csBundle(0).vecWen := true.B
1272      // LMUL
1273      for (i <- 0 until MAX_VLMUL)
1274        for (j <- 0 to i) {
1275          val old_vd = if (j == 0) {
1276            dest + i.U
1277          } else (VECTOR_TMP_REG_LMUL + j).U
1278          val vd = if (j == i) {
1279            dest + i.U
1280          } else (VECTOR_TMP_REG_LMUL + j + 1).U
1281          csBundle(i * (i + 1) / 2 + j + 1).srcType(0) := SrcType.vp
1282          csBundle(i * (i + 1) / 2 + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1283          csBundle(i * (i + 1) / 2 + j + 1).lsrc(1) := src2 + j.U
1284          csBundle(i * (i + 1) / 2 + j + 1).lsrc(2) := old_vd
1285          csBundle(i * (i + 1) / 2 + j + 1).ldest := vd
1286          csBundle(i * (i + 1) / 2 + j + 1).uopIdx := (i * (i + 1) / 2 + j).U
1287        }
1288    }
1289
1290    is(UopSplitType.VEC_SLIDEDOWN) {
1291      // i to vector move
1292      csBundle(0).srcType(0) := SrcType.reg
1293      csBundle(0).srcType(1) := SrcType.imm
1294      csBundle(0).lsrc(1) := 0.U
1295      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1296      csBundle(0).fuType := FuType.i2v.U
1297      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1298      csBundle(0).vecWen := true.B
1299      // LMUL
1300      for (i <- 0 until MAX_VLMUL)
1301        for (j <- (0 to i).reverse) {
1302          when(i.U < lmul) {
1303            val old_vd = if (j == 0) {
1304              dest + lmul - 1.U - i.U
1305            } else (VECTOR_TMP_REG_LMUL + j).U
1306            val vd = if (j == i) {
1307              dest + lmul - 1.U - i.U
1308            } else (VECTOR_TMP_REG_LMUL + j + 1).U
1309            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).srcType(0) := SrcType.vp
1310            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1311            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(1) := src2 + lmul - 1.U - j.U
1312            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).lsrc(2) := old_vd
1313            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).ldest := vd
1314            csBundle(numOfUop - (i * (i + 1) / 2 + i - j + 1).U).uopIdx := numOfUop - (i * (i + 1) / 2 + i - j + 2).U
1315          }
1316        }
1317    }
1318
1319    is(UopSplitType.VEC_M0X) {
1320      // LMUL
1321      for (i <- 0 until MAX_VLMUL) {
1322        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1323        val ldest = (VECTOR_TMP_REG_LMUL + i).U
1324        csBundle(i).srcType(0) := srcType0
1325        csBundle(i).srcType(1) := SrcType.vp
1326        csBundle(i).rfWen := false.B
1327        csBundle(i).fpWen := false.B
1328        csBundle(i).vecWen := true.B
1329        csBundle(i).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1330        csBundle(i).lsrc(1) := src2
1331        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1332        csBundle(i).ldest := ldest
1333        csBundle(i).uopIdx := i.U
1334      }
1335      csBundle(lmul - 1.U).rfWen := true.B
1336      csBundle(lmul - 1.U).fpWen := false.B
1337      csBundle(lmul - 1.U).vecWen := false.B
1338      csBundle(lmul - 1.U).ldest := dest
1339    }
1340
1341    is(UopSplitType.VEC_MVV) {
1342      // LMUL
1343      for (i <- 0 until MAX_VLMUL) {
1344        val srcType0 = if (i == 0) SrcType.DC else SrcType.vp
1345        csBundle(i * 2 + 0).srcType(0) := srcType0
1346        csBundle(i * 2 + 0).srcType(1) := SrcType.vp
1347        csBundle(i * 2 + 0).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1348        csBundle(i * 2 + 0).lsrc(1) := src2
1349        csBundle(i * 2 + 0).lsrc(2) := dest + i.U
1350        csBundle(i * 2 + 0).ldest := dest + i.U
1351        csBundle(i * 2 + 0).uopIdx := (i * 2 + 0).U
1352
1353        csBundle(i * 2 + 1).srcType(0) := srcType0
1354        csBundle(i * 2 + 1).srcType(1) := SrcType.vp
1355        csBundle(i * 2 + 1).lsrc(0) := (VECTOR_TMP_REG_LMUL + i - 1).U
1356        csBundle(i * 2 + 1).lsrc(1) := src2
1357        // csBundle(i).lsrc(2) := dest + i.U  DontCare
1358        csBundle(i * 2 + 1).ldest := (VECTOR_TMP_REG_LMUL + i).U
1359        csBundle(i * 2 + 1).uopIdx := (i * 2 + 1).U
1360      }
1361    }
1362
1363    is(UopSplitType.VEC_M0X_VFIRST) {
1364      // LMUL
1365      csBundle(0).rfWen := true.B
1366      csBundle(0).fpWen := false.B
1367      csBundle(0).vecWen := false.B
1368      csBundle(0).ldest := dest
1369    }
1370    is(UopSplitType.VEC_VWW) {
1371      for (i <- 0 until MAX_VLMUL*2) {
1372        when(i.U < lmul){
1373          csBundle(i).srcType(2) := SrcType.DC
1374          csBundle(i).lsrc(0) := src2 + i.U
1375          csBundle(i).lsrc(1) := src2 + i.U
1376          // csBundle(i).lsrc(2) := dest + (2 * i).U
1377          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1378          csBundle(i).uopIdx :=  i.U
1379        } otherwise {
1380          csBundle(i).srcType(2) := SrcType.DC
1381          csBundle(i).lsrc(0) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W)) + 1.U
1382          csBundle(i).lsrc(1) := VECTOR_TMP_REG_LMUL.U + Cat((i.U-lmul),0.U(1.W))
1383          // csBundle(i).lsrc(2) := dest + (2 * i).U
1384          csBundle(i).ldest := (VECTOR_TMP_REG_LMUL + i).U
1385          csBundle(i).uopIdx := i.U
1386        }
1387        csBundle(numOfUop-1.U).srcType(2) := SrcType.vp
1388        csBundle(numOfUop-1.U).lsrc(0) := src1
1389        csBundle(numOfUop-1.U).lsrc(2) := dest
1390        csBundle(numOfUop-1.U).ldest := dest
1391      }
1392    }
1393    is(UopSplitType.VEC_RGATHER) {
1394      def genCsBundle_VEC_RGATHER(len:Int): Unit ={
1395        for (i <- 0 until len)
1396          for (j <- 0 until len) {
1397            // csBundle(i * len + j).srcType(0) := SrcType.vp // SrcType.imm
1398            // csBundle(i * len + j).srcType(1) := SrcType.vp
1399            // csBundle(i * len + j).srcType(2) := SrcType.vp
1400            csBundle(i * len + j).lsrc(0) := src1 + i.U
1401            csBundle(i * len + j).lsrc(1) := src2 + j.U
1402            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j - 1).U
1403            csBundle(i * len + j).lsrc(2) := vd_old
1404            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1405            csBundle(i * len + j).ldest := vd
1406            csBundle(i * len + j).uopIdx := (i * len + j).U
1407          }
1408      }
1409      switch(vlmulReg) {
1410        is("b001".U ){
1411          genCsBundle_VEC_RGATHER(2)
1412        }
1413        is("b010".U ){
1414          genCsBundle_VEC_RGATHER(4)
1415        }
1416        is("b011".U ){
1417          genCsBundle_VEC_RGATHER(8)
1418        }
1419      }
1420    }
1421    is(UopSplitType.VEC_RGATHER_VX) {
1422      def genCsBundle_RGATHER_VX(len:Int): Unit ={
1423        for (i <- 0 until len)
1424          for (j <- 0 until len) {
1425            csBundle(i * len + j + 1).srcType(0) := SrcType.vp
1426            // csBundle(i * len + j + 1).srcType(1) := SrcType.vp
1427            // csBundle(i * len + j + 1).srcType(2) := SrcType.vp
1428            csBundle(i * len + j + 1).lsrc(0) := VECTOR_TMP_REG_LMUL.U
1429            csBundle(i * len + j + 1).lsrc(1) := src2 + j.U
1430            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1431            csBundle(i * len + j + 1).lsrc(2) := vd_old
1432            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1433            csBundle(i * len + j + 1).ldest := vd
1434            csBundle(i * len + j + 1).uopIdx := (i * len + j).U
1435          }
1436      }
1437      // i to vector move
1438      csBundle(0).srcType(0) := SrcType.reg
1439      csBundle(0).srcType(1) := SrcType.imm
1440      csBundle(0).lsrc(1) := 0.U
1441      csBundle(0).ldest := VECTOR_TMP_REG_LMUL.U
1442      csBundle(0).fuType := FuType.i2v.U
1443      csBundle(0).fuOpType := Cat(Mux(src1IsImm, IF2VectorType.imm2Vec(2, 0), IF2VectorType.i2Vec(2, 0)), vsewReg)
1444      csBundle(0).rfWen := false.B
1445      csBundle(0).fpWen := false.B
1446      csBundle(0).vecWen := true.B
1447      genCsBundle_RGATHER_VX(1)
1448      switch(vlmulReg) {
1449        is("b001".U ){
1450          genCsBundle_RGATHER_VX(2)
1451        }
1452        is("b010".U ){
1453          genCsBundle_RGATHER_VX(4)
1454        }
1455        is("b011".U ){
1456          genCsBundle_RGATHER_VX(8)
1457        }
1458      }
1459    }
1460    is(UopSplitType.VEC_RGATHEREI16) {
1461      def genCsBundle_VEC_RGATHEREI16_SEW8(len:Int): Unit ={
1462        for (i <- 0 until len)
1463          for (j <- 0 until len) {
1464            val vd_old0 = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2-1).U
1465            val vd0 = (VECTOR_TMP_REG_LMUL + j*2 ).U
1466            csBundle((i * len + j)*2+0).lsrc(0) := src1 + (i*2+0).U
1467            csBundle((i * len + j)*2+0).lsrc(1) := src2 + j.U
1468            csBundle((i * len + j)*2+0).lsrc(2) := vd_old0
1469            csBundle((i * len + j)*2+0).ldest := vd0
1470            csBundle((i * len + j)*2+0).uopIdx := ((i * len + j)*2+0).U
1471            val vd_old1 = (VECTOR_TMP_REG_LMUL + j*2).U
1472            val vd1 = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j*2+1 ).U
1473            csBundle((i * len + j)*2+1).lsrc(0) := src1 + (i*2+1).U
1474            csBundle((i * len + j)*2+1).lsrc(1) := src2 + j.U
1475            csBundle((i * len + j)*2+1).lsrc(2) := vd_old1
1476            csBundle((i * len + j)*2+1).ldest := vd1
1477            csBundle((i * len + j)*2+1).uopIdx := ((i * len + j)*2+1).U
1478          }
1479      }
1480      def genCsBundle_VEC_RGATHEREI16(len:Int): Unit ={
1481        for (i <- 0 until len)
1482          for (j <- 0 until len) {
1483            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1484            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1485            csBundle(i * len + j).lsrc(0) := src1 + i.U
1486            csBundle(i * len + j).lsrc(1) := src2 + j.U
1487            csBundle(i * len + j).lsrc(2) := vd_old
1488            csBundle(i * len + j).ldest := vd
1489            csBundle(i * len + j).uopIdx := (i * len + j).U
1490          }
1491      }
1492      def genCsBundle_VEC_RGATHEREI16_SEW32(len:Int): Unit ={
1493        for (i <- 0 until len)
1494          for (j <- 0 until len) {
1495            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1496            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1497            csBundle(i * len + j).lsrc(0) := src1 + (i / 2).U
1498            csBundle(i * len + j).lsrc(1) := src2 + j.U
1499            csBundle(i * len + j).lsrc(2) := vd_old
1500            csBundle(i * len + j).ldest := vd
1501            csBundle(i * len + j).uopIdx := (i * len + j).U
1502          }
1503      }
1504      def genCsBundle_VEC_RGATHEREI16_SEW64(len:Int): Unit ={
1505        for (i <- 0 until len)
1506          for (j <- 0 until len) {
1507            val vd_old = if(j==0) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j-1).U
1508            val vd = if(j==len-1) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j).U
1509            csBundle(i * len + j).lsrc(0) := src1 + (i / 4).U
1510            csBundle(i * len + j).lsrc(1) := src2 + j.U
1511            csBundle(i * len + j).lsrc(2) := vd_old
1512            csBundle(i * len + j).ldest := vd
1513            csBundle(i * len + j).uopIdx := (i * len + j).U
1514          }
1515      }
1516      when(!vsewReg.orR){
1517        genCsBundle_VEC_RGATHEREI16_SEW8(1)
1518      }.elsewhen(vsewReg === VSew.e32){
1519        genCsBundle_VEC_RGATHEREI16_SEW32(1)
1520      }.elsewhen(vsewReg === VSew.e64){
1521        genCsBundle_VEC_RGATHEREI16_SEW64(1)
1522      }.otherwise{
1523        genCsBundle_VEC_RGATHEREI16(1)
1524      }
1525      switch(vlmulReg) {
1526        is("b001".U) {
1527          when(!vsewReg.orR) {
1528            genCsBundle_VEC_RGATHEREI16_SEW8(2)
1529          }.elsewhen(vsewReg === VSew.e32){
1530            genCsBundle_VEC_RGATHEREI16_SEW32(2)
1531          }.elsewhen(vsewReg === VSew.e64){
1532            genCsBundle_VEC_RGATHEREI16_SEW64(2)
1533          }.otherwise{
1534            genCsBundle_VEC_RGATHEREI16(2)
1535          }
1536        }
1537        is("b010".U) {
1538          when(!vsewReg.orR) {
1539            genCsBundle_VEC_RGATHEREI16_SEW8(4)
1540          }.elsewhen(vsewReg === VSew.e32){
1541            genCsBundle_VEC_RGATHEREI16_SEW32(4)
1542          }.elsewhen(vsewReg === VSew.e64){
1543            genCsBundle_VEC_RGATHEREI16_SEW64(4)
1544          }.otherwise{
1545            genCsBundle_VEC_RGATHEREI16(4)
1546          }
1547        }
1548        is("b011".U) {
1549          when(vsewReg === VSew.e32){
1550            genCsBundle_VEC_RGATHEREI16_SEW32(8)
1551          }.elsewhen(vsewReg === VSew.e64){
1552            genCsBundle_VEC_RGATHEREI16_SEW64(8)
1553          }.otherwise{
1554            genCsBundle_VEC_RGATHEREI16(8)
1555          }
1556        }
1557      }
1558    }
1559    is(UopSplitType.VEC_COMPRESS) {
1560      def genCsBundle_VEC_COMPRESS(len:Int): Unit = {
1561        for (i <- 0 until len) {
1562          val jlen = if (i == len-1) i+1 else i+2
1563          for (j <- 0 until jlen) {
1564            val vd_old = if(i==j) (dest + i.U) else (VECTOR_TMP_REG_LMUL + j + 1).U
1565            val vd = if(i==len-1) (dest + j.U) else {
1566              if (j == i+1) VECTOR_TMP_REG_LMUL.U else (VECTOR_TMP_REG_LMUL + j + 1).U
1567            }
1568            val src13Type = if (j == i+1) DontCare else SrcType.vp
1569            csBundle(i*(i+3)/2 + j).srcType(0) := src13Type
1570            csBundle(i*(i+3)/2 + j).srcType(1) := SrcType.vp
1571            csBundle(i*(i+3)/2 + j).srcType(2) := src13Type
1572            csBundle(i*(i+3)/2 + j).srcType(3) := SrcType.vp
1573            csBundle(i*(i+3)/2 + j).lsrc(0) := src1
1574            csBundle(i*(i+3)/2 + j).lsrc(1) := src2 + i.U
1575            csBundle(i*(i+3)/2 + j).lsrc(2) := vd_old
1576            csBundle(i*(i+3)/2 + j).lsrc(3) := VECTOR_TMP_REG_LMUL.U
1577            csBundle(i*(i+3)/2 + j).ldest := vd
1578            csBundle(i*(i+3)/2 + j).uopIdx := (i*(i+3)/2 + j).U
1579          }
1580        }
1581      }
1582      switch(vlmulReg) {
1583        is("b001".U ){
1584          genCsBundle_VEC_COMPRESS(2)
1585        }
1586        is("b010".U ){
1587          genCsBundle_VEC_COMPRESS(4)
1588        }
1589        is("b011".U ){
1590          genCsBundle_VEC_COMPRESS(8)
1591        }
1592      }
1593    }
1594    is(UopSplitType.VEC_MVNR) {
1595      for (i <- 0 until MAX_VLMUL) {
1596        csBundle(i).lsrc(0) := src1 + i.U
1597        csBundle(i).lsrc(1) := src2 + i.U
1598        csBundle(i).lsrc(2) := dest + i.U
1599        csBundle(i).ldest := dest + i.U
1600        csBundle(i).uopIdx := i.U
1601      }
1602    }
1603    is(UopSplitType.VEC_US_LDST) {
1604      /*
1605      FMV.D.X
1606       */
1607      csBundle(0).srcType(0) := SrcType.reg
1608      csBundle(0).srcType(1) := SrcType.imm
1609      csBundle(0).lsrc(1) := 0.U
1610      csBundle(0).ldest := FP_TMP_REG_MV.U
1611      csBundle(0).fuType := FuType.i2v.U
1612      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1613      csBundle(0).rfWen := false.B
1614      csBundle(0).fpWen := true.B
1615      csBundle(0).vecWen := false.B
1616      csBundle(0).vlsInstr := true.B
1617      //LMUL
1618      for (i <- 0 until MAX_VLMUL) {
1619        csBundle(i + 1).srcType(0) := SrcType.fp
1620        csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1621        csBundle(i + 1).lsrc(2) := dest + i.U // old vd
1622        csBundle(i + 1).ldest := dest + i.U
1623        csBundle(i + 1).uopIdx := i.U
1624        csBundle(i + 1).vlsInstr := true.B
1625      }
1626      csBundle.head.waitForward := isUsSegment
1627      csBundle(numOfUop - 1.U).blockBackward := isUsSegment
1628    }
1629    is(UopSplitType.VEC_S_LDST) {
1630      /*
1631      FMV.D.X
1632       */
1633      csBundle(0).srcType(0) := SrcType.reg
1634      csBundle(0).srcType(1) := SrcType.imm
1635      csBundle(0).lsrc(1) := 0.U
1636      csBundle(0).ldest := FP_TMP_REG_MV.U
1637      csBundle(0).fuType := FuType.i2v.U
1638      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1639      csBundle(0).rfWen := false.B
1640      csBundle(0).fpWen := true.B
1641      csBundle(0).vecWen := false.B
1642      csBundle(0).vlsInstr := true.B
1643
1644      csBundle(1).srcType(0) := SrcType.reg
1645      csBundle(1).srcType(1) := SrcType.imm
1646      csBundle(1).lsrc(0) := latchedInst.lsrc(1)
1647      csBundle(1).lsrc(1) := 0.U
1648      csBundle(1).ldest := VECTOR_TMP_REG_LMUL.U
1649      csBundle(1).fuType := FuType.i2v.U
1650      csBundle(1).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1651      csBundle(1).rfWen := false.B
1652      csBundle(1).fpWen := true.B
1653      csBundle(1).vecWen := false.B
1654      csBundle(1).vlsInstr := true.B
1655
1656      //LMUL
1657      for (i <- 0 until MAX_VLMUL) {
1658        csBundle(i + 2).srcType(0) := SrcType.fp
1659        csBundle(i + 2).srcType(1) := SrcType.fp
1660        csBundle(i + 2).lsrc(0) := FP_TMP_REG_MV.U
1661        csBundle(i + 2).lsrc(1) := VECTOR_TMP_REG_LMUL.U
1662        csBundle(i + 2).lsrc(2) := dest + i.U // old vd
1663        csBundle(i + 2).ldest := dest + i.U
1664        csBundle(i + 2).uopIdx := i.U
1665        csBundle(i + 2).vlsInstr := true.B
1666      }
1667      csBundle.head.waitForward := isSdSegment
1668      csBundle(numOfUop - 1.U).blockBackward := isSdSegment
1669    }
1670    is(UopSplitType.VEC_I_LDST) {
1671      def genCsBundle_SEGMENT_INDEXED_LOADSTORE(emul:Int): Unit ={
1672        for (i <- 0 until MAX_VLMUL) {
1673          val src0Type = SrcType.fp
1674          val src1Type = if (i < emul) SrcType.vp else SrcType.no
1675          // lsrc0 is useless after uop 0, but we use it to ensure the correctness of the uop dependency
1676          val lsrc0 = FP_TMP_REG_MV.U
1677          val oldVd = dest + i.U
1678          csBundle(i + 1).srcType(0) := src0Type
1679          csBundle(i + 1).lsrc(0) := lsrc0
1680          csBundle(i + 1).srcType(1) := src1Type
1681          csBundle(i + 1).lsrc(1) := src2 + i.U
1682          csBundle(i + 1).srcType(2) := SrcType.vp
1683          csBundle(i + 1).lsrc(2) := oldVd
1684          csBundle(i + 1).ldest := dest + i.U
1685          csBundle(i + 1).uopIdx := i.U
1686          csBundle(i + 1).vlsInstr := true.B
1687        }
1688      }
1689
1690      val vlmul = vlmulReg
1691      val vsew = Cat(0.U(1.W), vsewReg)
1692      val veew = Cat(0.U(1.W), width)
1693      val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
1694      val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array(
1695        "b001".U -> 1.U,
1696        "b010".U -> 2.U,
1697        "b011".U -> 3.U
1698      ))
1699      val simple_emul = MuxLookup(vemul, 0.U(2.W), Array(
1700        "b001".U -> 1.U,
1701        "b010".U -> 2.U,
1702        "b011".U -> 3.U
1703      ))
1704      csBundle(0).srcType(0) := SrcType.reg
1705      csBundle(0).srcType(1) := SrcType.imm
1706      csBundle(0).lsrc(1) := 0.U
1707      csBundle(0).ldest := FP_TMP_REG_MV.U
1708      csBundle(0).fuType := FuType.i2v.U
1709      csBundle(0).fuOpType := Cat(IF2VectorType.i2Vec(2, 0), e64)
1710      csBundle(0).rfWen := false.B
1711      csBundle(0).fpWen := true.B
1712      csBundle(0).vecWen := false.B
1713      csBundle(0).vlsInstr := true.B
1714
1715      //LMUL
1716      when(nf === 0.U) {
1717        for (i <- 0 until MAX_VLMUL) {
1718          indexedLSRegOffset(i).src := Cat(simple_emul, simple_lmul)
1719          val offsetVs2 = indexedLSRegOffset(i).outOffsetVs2
1720          val offsetVd = indexedLSRegOffset(i).outOffsetVd
1721          csBundle(i + 1).srcType(0) := SrcType.fp
1722          csBundle(i + 1).lsrc(0) := FP_TMP_REG_MV.U
1723          csBundle(i + 1).lsrc(1) := Mux1H(UIntToOH(offsetVs2, MAX_VLMUL), (0 until MAX_VLMUL).map(j => src2 + j.U))
1724          csBundle(i + 1).srcType(2) := SrcType.vp
1725          // lsrc2 is old vd
1726          csBundle(i + 1).lsrc(2) := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1727          csBundle(i + 1).ldest := Mux1H(UIntToOH(offsetVd, MAX_VLMUL), (0 until MAX_VLMUL).map(j => dest + j.U))
1728          csBundle(i + 1).uopIdx := i.U
1729          csBundle(i + 1).vlsInstr := true.B
1730        }
1731      }.otherwise{
1732        // nf > 1, is segment indexed load/store
1733        genCsBundle_SEGMENT_INDEXED_LOADSTORE(1)
1734        switch(vemul) {
1735          is("b001".U ){
1736            genCsBundle_SEGMENT_INDEXED_LOADSTORE(2)
1737          }
1738          is("b010".U ){
1739            genCsBundle_SEGMENT_INDEXED_LOADSTORE(4)
1740          }
1741          is("b011".U ){
1742            genCsBundle_SEGMENT_INDEXED_LOADSTORE(8)
1743          }
1744        }
1745      }
1746      csBundle.head.waitForward := isIxSegment
1747      csBundle(numOfUop - 1.U).blockBackward := isIxSegment
1748    }
1749  }
1750
1751  //readyFromRename Counter
1752  val readyCounter = PriorityMuxDefault(outReadys.map(x => !x).zip((0 until RenameWidth).map(_.U)), RenameWidth.U)
1753
1754  // The left uops of the complex inst in ComplexDecoder can be send out this cycle
1755  val thisAllOut = uopRes <= readyCounter
1756
1757  switch(state) {
1758    is(s_idle) {
1759      when (inValid) {
1760        stateNext := s_active
1761        uopResNext := inUopInfo.numOfUop
1762      }
1763    }
1764    is(s_active) {
1765      when (thisAllOut) {
1766        when (inValid) {
1767          stateNext := s_active
1768          uopResNext := inUopInfo.numOfUop
1769        }.otherwise {
1770          stateNext := s_idle
1771          uopResNext := 0.U
1772        }
1773      }.otherwise {
1774        stateNext := s_active
1775        uopResNext := uopRes - readyCounter
1776      }
1777    }
1778  }
1779
1780  state := Mux(io.redirect, s_idle, stateNext)
1781  uopRes := Mux(io.redirect, 0.U, uopResNext)
1782
1783  val complexNum = Mux(uopRes > readyCounter, readyCounter, uopRes)
1784
1785  for(i <- 0 until RenameWidth) {
1786    outValids(i) := complexNum > i.U
1787    outDecodedInsts(i) := Mux((i.U + numOfUop - uopRes) < maxUopSize.U, csBundle(i.U + numOfUop - uopRes), csBundle(maxUopSize - 1))
1788  }
1789
1790  outComplexNum := Mux(state === s_active, complexNum, 0.U)
1791  inReady := state === s_idle || state === s_active && thisAllOut
1792
1793//  val validSimple = Wire(Vec(DecodeWidth, Bool()))
1794//  validSimple.zip(io.validFromIBuf.zip(io.isComplex)).map{ case (dst, (src1, src2)) => dst := src1 && !src2 }
1795//  val notInf = Wire(Vec(DecodeWidth, Bool()))
1796//  notInf.drop(1).zip(io.validFromIBuf.drop(1).zip(validSimple.drop(1))).map{ case (dst, (src1, src2)) => dst := !src1 || src2 }
1797//  notInf(0) := !io.validFromIBuf(0) || validSimple(0) || (io.isComplex(0) && io.in0pc === io.simple.decodedInst.pc)
1798//  val notInfVec = Wire(Vec(DecodeWidth, Bool()))
1799//  notInfVec.zipWithIndex.map{ case (dst, i) => dst := Cat(notInf.take(i + 1)).andR}
1800//
1801//  complexNum := Mux(io.validFromIBuf(0) && readyCounter.orR ,
1802//    Mux(uopRes0 > readyCounter, readyCounter, uopRes0),
1803//    0.U)
1804//  validToRename.zipWithIndex.foreach{
1805//    case(dst, i) =>
1806//      val validFix = Mux(complexNum.orR, validSimple((i+1).U - complexNum), validSimple(i))
1807//      dst := MuxCase(false.B, Seq(
1808//        (io.validFromIBuf(0) && readyCounter.orR && uopRes0 > readyCounter) -> Mux(readyCounter > i.U, true.B, false.B),
1809//        (io.validFromIBuf(0) && readyCounter.orR && !(uopRes0 > readyCounter)) -> Mux(complexNum > i.U, true.B, validFix && notInfVec(i.U - complexNum) && io.readyFromRename(i)),
1810//      ).toSeq)
1811//  }
1812//
1813//  readyToIBuf.zipWithIndex.foreach {
1814//    case (dst, i) =>
1815//      val readyToIBuf0 = Mux(io.isComplex(0), io.in0pc === io.simple.decodedInst.pc, true.B)
1816//      dst := MuxCase(true.B, Seq(
1817//        (io.validFromIBuf(0) && uopRes0 > readyCounter || !readyCounter.orR) -> false.B,
1818//        (io.validFromIBuf(0) && !(uopRes0 > readyCounter) && readyCounter.orR) -> (if (i==0) readyToIBuf0 else Mux(RenameWidth.U - complexNum >= i.U, notInfVec(i) && validSimple(i) && io.readyFromRename(i), false.B))
1819//      ).toSeq)
1820//  }
1821//
1822//  io.deq.decodedInsts := decodedInsts
1823//  io.deq.complexNum := complexNum
1824//  io.deq.validToRename := validToRename
1825//  io.deq.readyToIBuf := readyToIBuf
1826}
1827