xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VecCommon.scala (revision 272ec6b14a832d392220dc0e9441d1e03bb1dcb1)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27
28/**
29  * Common used parameters or functions in vlsu
30  */
31trait VLSUConstants {
32  val VLEN = 128
33  def VLENB = VLEN/8
34  def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg
35
36  def alignTypes = 4 // eew/sew = 1/2/4/8
37  def alignTypeBits = log2Up(alignTypes)
38  def maxMUL = 8
39  def maxFields = 8
40  /**
41    * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1,
42    * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total,
43    * each for a field. Therefore an instruction can be divided into 64 uops at most.
44    */
45  def maxUopNum = maxMUL * maxFields // 64
46  def maxFlowNum = 16
47  def maxElemNum = maxMUL * maxFlowNum // 128
48  // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx
49  def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction
50  def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop
51  def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8
52
53  def ewBits = 3 // bits-width of EEW/SEW
54  def mulBits = 3 // bits-width of emul/lmul
55
56  def getSlice(data: UInt, i: Int, alignBits: Int): UInt = {
57    require(data.getWidth >= (i+1) * alignBits)
58    data((i+1) * alignBits - 1, i * alignBits)
59  }
60
61  def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8)
62  def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16)
63  def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32)
64  def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64)
65}
66
67trait HasVLSUParameters extends HasXSParameter with VLSUConstants {
68  override val VLEN = coreParams.VLEN
69  def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U
70  def isStrided(instType: UInt) = instType(1, 0) === "b10".U
71  def isIndexed(instType: UInt) = instType(0) === "b1".U
72  def isNotIndexed(instType: UInt) = instType(0) === "b0".U
73  def isSegment(instType: UInt) = instType(2) === "b1".U
74
75  def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = {
76    require(oldData.getWidth == newData.getWidth)
77    require(oldData.getWidth == mask.getWidth * 8)
78    VecInit(mask.asBools.zipWithIndex.map { case (en, i) =>
79      Mux(en, getByte(newData, i), getByte(oldData, i))
80    })
81  }
82
83  // def asBytes(data: UInt) = {
84  //   require(data.getWidth % 8 == 0)
85  //   (0 until data.getWidth/8).map(i => getByte(data, i))
86  // }
87
88  def mergeDataWithElemIdx(
89    oldData: UInt,
90    newData: Seq[UInt],
91    alignedType: UInt,
92    elemIdx: Seq[UInt],
93    valids: Seq[Bool]
94  ): UInt = {
95    require(newData.length == elemIdx.length)
96    require(newData.length == valids.length)
97    LookupTree(alignedType, List(
98      "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
99        ParallelPosteriorityMux(
100          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
101          getByte(oldData, i) +: newData.map(getByte(_))
102        )}).asUInt,
103      "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
104        ParallelPosteriorityMux(
105          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
106          getHalfWord(oldData, i) +: newData.map(getHalfWord(_))
107        )}).asUInt,
108      "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
109        ParallelPosteriorityMux(
110          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
111          getWord(oldData, i) +: newData.map(getWord(_))
112        )}).asUInt,
113      "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
114        ParallelPosteriorityMux(
115          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
116          getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_))
117        )}).asUInt
118    ))
119  }
120
121  def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = {
122    mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B))
123  }
124}
125abstract class VLSUModule(implicit p: Parameters) extends XSModule
126  with HasVLSUParameters
127  with HasCircularQueuePtrHelper
128abstract class VLSUBundle(implicit p: Parameters) extends XSBundle
129  with HasVLSUParameters
130
131class VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle {
132  val uop = new DynInst
133}
134
135class OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle {
136  val isvec = Bool()
137  val vecdata = UInt(VLEN.W)
138  val mask = UInt(VLENB.W)
139  // val rob_idx_valid = Vec(2, Bool())
140  // val inner_idx = Vec(2, UInt(3.W))
141  // val rob_idx = Vec(2, new RobPtr)
142  // val offset = Vec(2, UInt(4.W))
143  val reg_offset = UInt(vOffsetBits.W)
144  val vecActive = Bool() // 1: vector active element, 0: vector not active element
145  val is_first_ele = Bool()
146  val elemIdx = UInt(elemIdxBits.W) // element index
147  val elemIdxInsideVd = UInt(elemIdxBits.W) // element index in scope of vd
148  val uopQueuePtr = new VluopPtr
149  val flowPtr = new VlflowPtr
150}
151
152class VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters {
153  val vec = new OnlyVecExuOutput
154}
155
156class VecStoreExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters {
157  val elemIdx = UInt(elemIdxBits.W)
158  val uopQueuePtr = new VsUopPtr
159  val fieldIdx = UInt(fieldBits.W)
160  val segmentIdx = UInt(elemIdxBits.W)
161  val vaddr = UInt(VAddrBits.W)
162}
163
164class VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp {
165  val flowMask       = UInt(VLENB.W) // each bit for a flow
166  val byteMask       = UInt(VLENB.W) // each bit for a byte
167  val data           = UInt(VLEN.W)
168  // val fof            = Bool() // fof is only used for vector loads
169  val excp_eew_index = UInt(elemIdxBits.W)
170  // val exceptionVec   = ExceptionVec() // uop has exceptionVec
171  val baseAddr = UInt(VAddrBits.W)
172  val stride = UInt(VLEN.W)
173  val flow_counter = UInt(flowIdxBits.W)
174
175  // instruction decode result
176  val flowNum = UInt(flowIdxBits.W) // # of flows in a uop
177  // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication
178  val nfields = UInt(fieldBits.W) // NFIELDS
179  val vm = Bool() // whether vector masking is enabled
180  val usWholeReg = Bool() // unit-stride, whole register load
181  val usMaskReg = Bool() // unit-stride, masked store/load
182  val eew = UInt(ewBits.W) // size of memory elements
183  val sew = UInt(ewBits.W)
184  val emul = UInt(mulBits.W)
185  val lmul = UInt(mulBits.W)
186  val vlmax = UInt(elemIdxBits.W)
187  val instType = UInt(3.W)
188  val vd_last_uop = Bool()
189  val vd_first_uop = Bool()
190}
191
192class VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp {
193  val vaddr             = UInt(VAddrBits.W)
194  val mask              = UInt(VLENB.W)
195  val alignedType       = UInt(alignTypeBits.W)
196  val exp               = Bool()
197  val elemIdx           = UInt(elemIdxBits.W)
198  val is_first_ele      = Bool()
199}
200
201object MulNum {
202  def apply (mul: UInt): UInt = { //mul means emul or lmul
203    (LookupTree(mul,List(
204      "b101".U -> 1.U , // 1/8
205      "b110".U -> 1.U , // 1/4
206      "b111".U -> 1.U , // 1/2
207      "b000".U -> 1.U , // 1
208      "b001".U -> 2.U , // 2
209      "b010".U -> 4.U , // 4
210      "b011".U -> 8.U   // 8
211    )))}
212}
213/**
214  * when emul is greater than or equal to 1, this means the entire register needs to be written;
215  * otherwise, only write the specified number of bytes */
216object MulDataSize {
217  def apply (mul: UInt): UInt = { //mul means emul or lmul
218    (LookupTree(mul,List(
219      "b101".U -> 2.U  , // 1/8
220      "b110".U -> 4.U  , // 1/4
221      "b111".U -> 8.U  , // 1/2
222      "b000".U -> 16.U , // 1
223      "b001".U -> 16.U , // 2
224      "b010".U -> 16.U , // 4
225      "b011".U -> 16.U   // 8
226    )))}
227}
228
229object OneRegNum {
230  def apply (eew: UInt): UInt = { //mul means emul or lmul
231    (LookupTree(eew,List(
232      "b000".U -> 16.U , // 1
233      "b101".U -> 8.U , // 2
234      "b110".U -> 4.U , // 4
235      "b111".U -> 2.U   // 8
236    )))}
237}
238
239//index inst read data byte
240object SewDataSize {
241  def apply (sew: UInt): UInt = {
242    (LookupTree(sew,List(
243      "b000".U -> 1.U , // 1
244      "b001".U -> 2.U , // 2
245      "b010".U -> 4.U , // 4
246      "b011".U -> 8.U   // 8
247    )))}
248}
249
250// strided inst read data byte
251object EewDataSize {
252  def apply (eew: UInt): UInt = {
253    (LookupTree(eew,List(
254      "b000".U -> 1.U , // 1
255      "b101".U -> 2.U , // 2
256      "b110".U -> 4.U , // 4
257      "b111".U -> 8.U   // 8
258    )))}
259}
260
261object loadDataSize {
262  def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = {
263    (LookupTree(instType,List(
264      "b000".U ->  MulDataSize(emul), // unit-stride
265      "b010".U ->  EewDataSize(eew)  , // strided
266      "b001".U ->  SewDataSize(sew)  , // indexed-unordered
267      "b011".U ->  SewDataSize(sew)  , // indexed-ordered
268      "b100".U ->  EewDataSize(eew)  , // segment unit-stride
269      "b110".U ->  EewDataSize(eew)  , // segment strided
270      "b101".U ->  SewDataSize(sew)  , // segment indexed-unordered
271      "b111".U ->  SewDataSize(sew)    // segment indexed-ordered
272    )))}
273}
274
275object storeDataSize {
276  def apply (instType: UInt, eew: UInt, sew: UInt): UInt = {
277    (LookupTree(instType,List(
278      "b000".U ->  EewDataSize(eew)  , // unit-stride, do not use
279      "b010".U ->  EewDataSize(eew)  , // strided
280      "b001".U ->  SewDataSize(sew)  , // indexed-unordered
281      "b011".U ->  SewDataSize(sew)  , // indexed-ordered
282      "b100".U ->  EewDataSize(eew)  , // segment unit-stride
283      "b110".U ->  EewDataSize(eew)  , // segment strided
284      "b101".U ->  SewDataSize(sew)  , // segment indexed-unordered
285      "b111".U ->  SewDataSize(sew)    // segment indexed-ordered
286    )))}
287}
288
289object GenVecStoreMask {
290  def apply (instType: UInt, eew: UInt, sew: UInt): UInt = {
291    val mask = Wire(UInt(16.W))
292    mask := UIntToOH(storeDataSize(instType = instType, eew = eew, sew = sew)) - 1.U
293    mask
294  }
295}
296
297/**
298  * these are used to obtain immediate addresses for  index instruction */
299object EewEq8 {
300  def apply(index:UInt, flow_inner_idx: UInt): UInt = {
301    (LookupTree(flow_inner_idx,List(
302      0.U  -> index(7 ,0   ),
303      1.U  -> index(15,8   ),
304      2.U  -> index(23,16  ),
305      3.U  -> index(31,24  ),
306      4.U  -> index(39,32  ),
307      5.U  -> index(47,40  ),
308      6.U  -> index(55,48  ),
309      7.U  -> index(63,56  ),
310      8.U  -> index(71,64  ),
311      9.U  -> index(79,72  ),
312      10.U -> index(87,80  ),
313      11.U -> index(95,88  ),
314      12.U -> index(103,96 ),
315      13.U -> index(111,104),
316      14.U -> index(119,112),
317      15.U -> index(127,120)
318    )))}
319}
320
321object EewEq16 {
322  def apply(index: UInt, flow_inner_idx: UInt): UInt = {
323    (LookupTree(flow_inner_idx, List(
324      0.U -> index(15, 0),
325      1.U -> index(31, 16),
326      2.U -> index(47, 32),
327      3.U -> index(63, 48),
328      4.U -> index(79, 64),
329      5.U -> index(95, 80),
330      6.U -> index(111, 96),
331      7.U -> index(127, 112)
332    )))}
333}
334
335object EewEq32 {
336  def apply(index: UInt, flow_inner_idx: UInt): UInt = {
337    (LookupTree(flow_inner_idx, List(
338      0.U -> index(31, 0),
339      1.U -> index(63, 32),
340      2.U -> index(95, 64),
341      3.U -> index(127, 96)
342    )))}
343}
344
345object EewEq64 {
346  def apply (index: UInt, flow_inner_idx: UInt): UInt = {
347    (LookupTree(flow_inner_idx, List(
348      0.U -> index(63, 0),
349      1.U -> index(127, 64)
350    )))}
351}
352
353object IndexAddr {
354  def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = {
355    (LookupTree(eew,List(
356      "b000".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register
357      "b101".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte
358      "b110".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte
359      "b111".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx )  // Imm is 8 Byte
360    )))}
361}
362
363object Log2Num {
364  def apply (num: UInt): UInt = {
365    (LookupTree(num,List(
366      16.U -> 4.U,
367      8.U  -> 3.U,
368      4.U  -> 2.U,
369      2.U  -> 1.U,
370      1.U  -> 0.U
371    )))}
372}
373
374object GenUopIdxInField {
375  def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = {
376    val isIndexed = instType(0)
377    val mulInField = Mux(
378      isIndexed,
379      Mux(lmul.asSInt > emul.asSInt, lmul, emul),
380      emul
381    )
382    LookupTree(mulInField, List(
383      "b101".U -> 0.U,
384      "b110".U -> 0.U,
385      "b111".U -> 0.U,
386      "b000".U -> 0.U,
387      "b001".U -> uopIdx(0),
388      "b010".U -> uopIdx(1, 0),
389      "b011".U -> uopIdx(2, 0)
390    ))
391  }
392}
393
394//eew decode
395object EewLog2 extends VLSUConstants {
396  // def apply (eew: UInt): UInt = {
397  //   (LookupTree(eew,List(
398  //     "b000".U -> "b000".U , // 1
399  //     "b101".U -> "b001".U , // 2
400  //     "b110".U -> "b010".U , // 4
401  //     "b111".U -> "b011".U   // 8
402  //   )))}
403  def apply(eew: UInt): UInt = ZeroExt(eew(1, 0), ewBits)
404}
405
406/**
407  * unit-stride instructions don't use this method;
408  * other instructions generate realFlowNum by EmulDataSize >> eew(1,0),
409  * EmulDataSize means the number of bytes that need to be written to the register,
410  * eew(1,0) means the number of bytes written at once*/
411object GenRealFlowNum {
412  def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = {
413    (LookupTree(instType,List(
414      "b000".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // store use, load do not use
415      "b010".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // strided
416      "b001".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered
417      "b011".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered
418      "b100".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // segment unit-stride
419      "b110".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // segment strided
420      "b101".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // segment indexed-unordered
421      "b111".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt)  // segment indexed-ordered
422    )))}
423}
424
425/**
426  * GenRealFlowLog2 = Log2(GenRealFlowNum)
427  */
428object GenRealFlowLog2 extends VLSUConstants {
429  def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = {
430    val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul)
431    val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul)
432    val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew(1, 0)
433    val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0)
434    (LookupTree(instType, List(
435      "b000".U -> eewRealFlowLog2, // unit-stride
436      "b010".U -> eewRealFlowLog2, // strided
437      "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered
438      "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered
439      "b100".U -> eewRealFlowLog2, // segment unit-stride
440      "b110".U -> eewRealFlowLog2, // segment strided
441      "b101".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-unordered
442      "b111".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-ordered
443    )))
444  }
445}
446
447/**
448  * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx
449  * inside the uop.
450  */
451object GenElemIdx extends VLSUConstants {
452  def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt,
453    uopIdx: UInt, flowIdx: UInt): UInt = {
454    val isIndexed = instType(0).asBool
455    val eewUopFlowsLog2 = Mux(emul.asSInt > 0.S, 0.U, emul) + log2Up(VLENB).U - eew(1, 0)
456    val sewUopFlowsLog2 = Mux(lmul.asSInt > 0.S, 0.U, lmul) + log2Up(VLENB).U - sew(1, 0)
457    val uopFlowsLog2 = Mux(
458      isIndexed,
459      Mux(emul.asSInt > lmul.asSInt, eewUopFlowsLog2, sewUopFlowsLog2),
460      eewUopFlowsLog2
461    )
462    LookupTree(uopFlowsLog2, List(
463      0.U -> uopIdx,
464      1.U -> uopIdx ## flowIdx(0),
465      2.U -> uopIdx ## flowIdx(1, 0),
466      3.U -> uopIdx ## flowIdx(2, 0),
467      4.U -> uopIdx ## flowIdx(3, 0)
468    ))
469  }
470}
471
472/**
473  * GenVLMAX calculates VLMAX, which equals MUL * ew
474  */
475object GenVLMAXLog2 extends VLSUConstants {
476  def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew
477}
478object GenVLMAX {
479  def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew)
480}
481
482object GenUSWholeRegVL extends VLSUConstants {
483  def apply(nfields: UInt, eew: UInt): UInt = {
484    LookupTree(eew(1, 0), List(
485      "b00".U -> (nfields << (log2Up(VLENB) - 0)),
486      "b01".U -> (nfields << (log2Up(VLENB) - 1)),
487      "b10".U -> (nfields << (log2Up(VLENB) - 2)),
488      "b11".U -> (nfields << (log2Up(VLENB) - 3))
489    ))
490  }
491}
492object GenUSWholeEmul extends VLSUConstants{
493  def apply(nf: UInt): UInt={
494    LookupTree(nf,List(
495      "b000".U -> "b000".U(mulBits.W),
496      "b001".U -> "b001".U(mulBits.W),
497      "b011".U -> "b010".U(mulBits.W),
498      "b111".U -> "b011".U(mulBits.W)
499    ))
500  }
501}
502
503
504object GenUSMaskRegVL extends VLSUConstants {
505  def apply(vl: UInt): UInt = {
506    (vl >> 3.U)
507  }
508}
509
510object GenUopByteMask {
511  def apply(flowMask: UInt, alignedType: UInt): UInt = {
512    LookupTree(alignedType, List(
513      "b00".U -> flowMask,
514      "b01".U -> FillInterleaved(2, flowMask),
515      "b10".U -> FillInterleaved(4, flowMask),
516      "b11".U -> FillInterleaved(8, flowMask)
517    ))
518  }
519}
520
521object GenVdIdxInField extends VLSUConstants {
522  def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = {
523    val vdIdx = Wire(UInt(log2Up(maxMUL).W))
524    when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) {
525      // Unit-stride or Strided, or indexed with lmul >= emul
526      vdIdx := uopIdx
527    }.otherwise {
528      // Indexed with lmul <= emul
529      val multiple = emul - lmul
530      val uopIdxWidth = uopIdx.getWidth
531      vdIdx := LookupTree(multiple, List(
532        0.U -> uopIdx,
533        1.U -> (uopIdx >> 1),
534        2.U -> (uopIdx >> 2),
535        3.U -> (uopIdx >> 3)
536      ))
537    }
538    vdIdx
539  }
540}
541