xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VecCommon.scala (revision 06cb2bc1c357185706f76984bcc822e898a9ed60)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27
28/**
29  * Common used parameters or functions in vlsu
30  */
31trait VLSUConstants {
32  val VLEN = 128
33  def VLENB = VLEN/8
34  def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg
35
36  def alignTypes = 4 // eew/sew = 1/2/4/8
37  def alignTypeBits = log2Up(alignTypes)
38  def maxMUL = 8
39  def maxFields = 8
40  /**
41    * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1,
42    * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total,
43    * each for a field. Therefore an instruction can be divided into 64 uops at most.
44    */
45  def maxUopNum = maxMUL * maxFields // 64
46  def maxFlowNum = 16
47  def maxElemNum = maxMUL * maxFlowNum // 128
48  // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx
49  def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction
50  def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop
51  def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8
52
53  def ewBits = 3 // bits-width of EEW/SEW
54  def mulBits = 3 // bits-width of emul/lmul
55
56  def getSlice(data: UInt, i: Int, alignBits: Int): UInt = {
57    require(data.getWidth >= (i+1) * alignBits)
58    data((i+1) * alignBits - 1, i * alignBits)
59  }
60
61  def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8)
62  def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16)
63  def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32)
64  def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64)
65}
66
67trait HasVLSUParameters extends HasXSParameter with VLSUConstants {
68  override val VLEN = coreParams.VLEN
69  def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U
70  def isStrided(instType: UInt) = instType(1, 0) === "b10".U
71  def isIndexed(instType: UInt) = instType(0) === "b1".U
72  def isNotIndexed(instType: UInt) = instType(0) === "b0".U
73  def isSegment(instType: UInt) = instType(2) === "b1".U
74
75  def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = {
76    require(oldData.getWidth == newData.getWidth)
77    require(oldData.getWidth == mask.getWidth * 8)
78    VecInit(mask.asBools.zipWithIndex.map { case (en, i) =>
79      Mux(en, getByte(newData, i), getByte(oldData, i))
80    })
81  }
82
83  // def asBytes(data: UInt) = {
84  //   require(data.getWidth % 8 == 0)
85  //   (0 until data.getWidth/8).map(i => getByte(data, i))
86  // }
87
88  def mergeDataWithElemIdx(
89    oldData: UInt,
90    newData: Seq[UInt],
91    alignedType: UInt,
92    elemIdx: Seq[UInt],
93    valids: Seq[Bool]
94  ): UInt = {
95    require(newData.length == elemIdx.length)
96    require(newData.length == valids.length)
97    LookupTree(alignedType, List(
98      "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
99        ParallelPosteriorityMux(
100          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
101          getByte(oldData, i) +: newData.map(getByte(_))
102        )}).asUInt,
103      "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
104        ParallelPosteriorityMux(
105          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
106          getHalfWord(oldData, i) +: newData.map(getHalfWord(_))
107        )}).asUInt,
108      "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
109        ParallelPosteriorityMux(
110          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
111          getWord(oldData, i) +: newData.map(getWord(_))
112        )}).asUInt,
113      "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
114        ParallelPosteriorityMux(
115          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
116          getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_))
117        )}).asUInt
118    ))
119  }
120
121  def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = {
122    mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B))
123  }
124}
125abstract class VLSUModule(implicit p: Parameters) extends XSModule
126  with HasVLSUParameters
127  with HasCircularQueuePtrHelper
128abstract class VLSUBundle(implicit p: Parameters) extends XSBundle
129  with HasVLSUParameters
130
131class VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle {
132  val uop = new DynInst
133}
134
135// Where is VecOperand used?
136class VecOperand(implicit p: Parameters) extends VLSUBundleWithMicroOp {
137  val vmask = UInt(VLEN.W) // the mask of inst which is readed from reg
138  val vecData = UInt(VLEN.W)
139  val baseAddr = UInt(VAddrBits.W) // base address from rs1
140  val stride = UInt(XLEN.W) // stride from rs2
141  val index = UInt(VLEN.W) // index from vs2
142  val pvd = UInt(5.W) // physical vector register destination
143  val lmul = UInt(3.W)
144  val sew = UInt(2.W)
145  val vma = Bool()
146  val vta = Bool()
147  val inner_idx = UInt(3.W) // the number index among 8 uop
148  val vl = UInt(8.W)
149  // TODO: How will OOO calculatr vector register numbers?
150  //  (EEW / SEW) * LMUL or (vl * EEW) / VLEN ?
151  //  So OOO will always use eew ?
152  // val eew = UInt(3.W)
153  val total_num = UInt(3.W) // An inst to how many uops
154}
155
156class VecDecode(implicit p: Parameters) extends VLSUBundle {
157  val uop_segment_num = UInt(3.W)
158  val uop_type = UInt(2.W)
159  val mask_en = Bool()
160  val uop_unit_stride_whole_reg = Bool()
161  val uop_unit_stride_mask = Bool()
162  val uop_unit_stride_fof = Bool()
163  val uop_eew = UInt(ewBits.W) // this is also the index width when the inst is a index load
164
165  def apply(inst: UInt) = {
166    this.uop_segment_num := inst(31, 29)
167    this.uop_type := inst(27, 26)
168    this.mask_en := inst(25)
169    this.uop_unit_stride_whole_reg := (inst(24,20) === "b01000".U)
170    this.uop_unit_stride_mask := (inst(24,20) === "b01011".U)
171    this.uop_unit_stride_fof := (inst(24,20) === "b10000".U)
172    this.uop_eew := inst(12 + ewBits - 1, 12)
173    this
174  }
175
176  def isUnitStride = uop_type === "b00".U
177  def isStrided = uop_type === "b10".U
178  def isIndexed = uop_type(0) === "b1".U
179}
180
181class OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle {
182  val isvec = Bool()
183  val vecdata = UInt(VLEN.W)
184  val mask = UInt(VLENB.W)
185  // val rob_idx_valid = Vec(2, Bool())
186  // val inner_idx = Vec(2, UInt(3.W))
187  // val rob_idx = Vec(2, new RobPtr)
188  // val offset = Vec(2, UInt(4.W))
189  val reg_offset = UInt(vOffsetBits.W)
190  val exp = Bool()
191  val is_first_ele = Bool()
192  val elemIdx = UInt(elemIdxBits.W) // element index
193  val uopQueuePtr = new VluopPtr
194  val flowPtr = new VlflowPtr
195}
196
197class VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters {
198  val vec = new OnlyVecExuOutput
199}
200
201class VecStoreExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters {
202  val elemIdx = UInt(elemIdxBits.W)
203  val uopQueuePtr = new VsUopPtr
204}
205
206class VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp {
207  val flowMask       = UInt(VLENB.W) // each bit for a flow
208  val byteMask       = UInt(VLENB.W) // each bit for a byte
209  val data           = UInt(VLEN.W)
210  // val fof            = Bool() // fof is only used for vector loads
211  val excp_eew_index = UInt(elemIdxBits.W)
212  // val exceptionVec   = ExceptionVec() // uop has exceptionVec
213  val baseAddr = UInt(VAddrBits.W)
214  val stride = UInt(VLEN.W)
215  val flow_counter = UInt(flowIdxBits.W)
216
217  // instruction decode result
218  val flowNum = UInt(flowIdxBits.W) // # of flows in a uop
219  // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication
220  val nfields = UInt(fieldBits.W) // NFIELDS
221  val vm = Bool() // whether vector masking is enabled
222  val usWholeReg = Bool() // unit-stride, whole register load
223  val usMaskReg = Bool() // unit-stride, masked store/load
224  val eew = UInt(ewBits.W) // size of memory elements
225  val sew = UInt(ewBits.W)
226  val emul = UInt(mulBits.W)
227  val lmul = UInt(mulBits.W)
228  val vlmax = UInt(elemIdxBits.W)
229  val instType = UInt(3.W)
230  val vd_last_uop = Bool()
231  val vd_first_uop = Bool()
232}
233
234class VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp {
235  val vaddr             = UInt(VAddrBits.W)
236  val mask              = UInt(VLENB.W)
237  val alignedType       = UInt(alignTypeBits.W)
238  val exp               = Bool()
239  val elemIdx           = UInt(elemIdxBits.W)
240  val is_first_ele      = Bool()
241}
242
243object MulNum {
244  def apply (mul: UInt): UInt = { //mul means emul or lmul
245    (LookupTree(mul,List(
246      "b101".U -> 1.U , // 1/8
247      "b110".U -> 1.U , // 1/4
248      "b111".U -> 1.U , // 1/2
249      "b000".U -> 1.U , // 1
250      "b001".U -> 2.U , // 2
251      "b010".U -> 4.U , // 4
252      "b011".U -> 8.U   // 8
253    )))}
254}
255/**
256  * when emul is greater than or equal to 1, this means the entire register needs to be written;
257  * otherwise, only write the specified number of bytes */
258object MulDataSize {
259  def apply (mul: UInt): UInt = { //mul means emul or lmul
260    (LookupTree(mul,List(
261      "b101".U -> 2.U  , // 1/8
262      "b110".U -> 4.U  , // 1/4
263      "b111".U -> 8.U  , // 1/2
264      "b000".U -> 16.U , // 1
265      "b001".U -> 16.U , // 2
266      "b010".U -> 16.U , // 4
267      "b011".U -> 16.U   // 8
268    )))}
269}
270
271object OneRegNum {
272  def apply (eew: UInt): UInt = { //mul means emul or lmul
273    (LookupTree(eew,List(
274      "b000".U -> 16.U , // 1
275      "b101".U -> 8.U , // 2
276      "b110".U -> 4.U , // 4
277      "b111".U -> 2.U   // 8
278    )))}
279}
280
281//index inst read data byte
282object SewDataSize {
283  def apply (sew: UInt): UInt = {
284    (LookupTree(sew,List(
285      "b000".U -> 1.U , // 1
286      "b001".U -> 2.U , // 2
287      "b010".U -> 4.U , // 4
288      "b011".U -> 8.U   // 8
289    )))}
290}
291
292// strided inst read data byte
293object EewDataSize {
294  def apply (eew: UInt): UInt = {
295    (LookupTree(eew,List(
296      "b000".U -> 1.U , // 1
297      "b101".U -> 2.U , // 2
298      "b110".U -> 4.U , // 4
299      "b111".U -> 8.U   // 8
300    )))}
301}
302
303object loadDataSize {
304  def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = {
305    (LookupTree(instType,List(
306      "b000".U ->  MulDataSize(emul), // unit-stride
307      "b010".U ->  EewDataSize(eew)  , // strided
308      "b001".U ->  SewDataSize(sew)  , // indexed-unordered
309      "b011".U ->  SewDataSize(sew)  , // indexed-ordered
310      "b100".U ->  EewDataSize(eew)  , // segment unit-stride
311      "b110".U ->  EewDataSize(eew)  , // segment strided
312      "b101".U ->  SewDataSize(sew)  , // segment indexed-unordered
313      "b111".U ->  SewDataSize(sew)    // segment indexed-ordered
314    )))}
315}
316
317// object GenVecLoadMask extends VLSUConstants {
318//   def apply(alignedType: UInt, vaddr: UInt): UInt = {
319//     LookupTree(alignedType, List(
320//       "b00".U -> 0x1.U, // b1
321//       "b01".U -> 0x3.U, // b11
322//       "b10".U -> 0xf.U, // b1111
323//       "b11".U -> 0xff.U // b11111111
324//     )) << vaddr(vOffsetBits - 1, 0)
325//   }
326// }
327
328object storeDataSize {
329  def apply (instType: UInt, eew: UInt, sew: UInt): UInt = {
330    (LookupTree(instType,List(
331      "b000".U ->  EewDataSize(eew)  , // unit-stride, do not use
332      "b010".U ->  EewDataSize(eew)  , // strided
333      "b001".U ->  SewDataSize(sew)  , // indexed-unordered
334      "b011".U ->  SewDataSize(sew)  , // indexed-ordered
335      "b100".U ->  EewDataSize(eew)  , // segment unit-stride
336      "b110".U ->  EewDataSize(eew)  , // segment strided
337      "b101".U ->  SewDataSize(sew)  , // segment indexed-unordered
338      "b111".U ->  SewDataSize(sew)    // segment indexed-ordered
339    )))}
340}
341
342object GenVecStoreMask {
343  def apply (instType: UInt, eew: UInt, sew: UInt): UInt = {
344    val mask = Wire(UInt(16.W))
345    mask := UIntToOH(storeDataSize(instType = instType, eew = eew, sew = sew)) - 1.U
346    mask
347  }
348}
349
350/**
351  * these are used to obtain immediate addresses for  index instruction */
352object EewEq8 {
353  def apply(index:UInt, flow_inner_idx: UInt): UInt = {
354    (LookupTree(flow_inner_idx,List(
355      0.U  -> index(7 ,0   ),
356      1.U  -> index(15,8   ),
357      2.U  -> index(23,16  ),
358      3.U  -> index(31,24  ),
359      4.U  -> index(39,32  ),
360      5.U  -> index(47,40  ),
361      6.U  -> index(55,48  ),
362      7.U  -> index(63,56  ),
363      8.U  -> index(71,64  ),
364      9.U  -> index(79,72  ),
365      10.U -> index(87,80  ),
366      11.U -> index(95,88  ),
367      12.U -> index(103,96 ),
368      13.U -> index(111,104),
369      14.U -> index(119,112),
370      15.U -> index(127,120)
371    )))}
372}
373
374object EewEq16 {
375  def apply(index: UInt, flow_inner_idx: UInt): UInt = {
376    (LookupTree(flow_inner_idx, List(
377      0.U -> index(15, 0),
378      1.U -> index(31, 16),
379      2.U -> index(47, 32),
380      3.U -> index(63, 48),
381      4.U -> index(79, 64),
382      5.U -> index(95, 80),
383      6.U -> index(111, 96),
384      7.U -> index(127, 112)
385    )))}
386}
387
388object EewEq32 {
389  def apply(index: UInt, flow_inner_idx: UInt): UInt = {
390    (LookupTree(flow_inner_idx, List(
391      0.U -> index(31, 0),
392      1.U -> index(63, 32),
393      2.U -> index(95, 64),
394      3.U -> index(127, 96)
395    )))}
396}
397
398object EewEq64 {
399  def apply (index: UInt, flow_inner_idx: UInt): UInt = {
400    (LookupTree(flow_inner_idx, List(
401      0.U -> index(63, 0),
402      1.U -> index(127, 64)
403    )))}
404}
405
406object IndexAddr {
407  def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = {
408    (LookupTree(eew,List(
409      "b000".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register
410      "b101".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte
411      "b110".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte
412      "b111".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx )  // Imm is 8 Byte
413    )))}
414}
415/*
416object RegFLowCnt {
417  def apply (emul: UInt, lmul:UInt, eew: UInt, uopIdx: UInt, flowIdx: UInt): UInt = {
418
419    (LookupTree(Cat(emul,lmul),List(
420      "b001000".U -> ((uopIdx(0  ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 2,lmul = 1
421      "b010000".U -> ((uopIdx(1,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 4,lmul = 1
422      "b011000".U -> ((uopIdx(2,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 8,lmul = 1
423      "b010001".U -> ((uopIdx(0  ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 4,lmul = 2
424      "b011001".U -> ((uopIdx(1,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),//emul = 8,lmul = 2
425      "b011010".U -> ((uopIdx(0  ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx) //emul = 8,lmul = 4
426    )))}
427}
428
429object AddrFLowCnt {
430  def apply (emul: UInt, lmul:UInt, sew:UInt, uopIdx: UInt, flowIdx: UInt):UInt = {
431    (LookupTree(Cat(lmul,emul),List(
432      "b001000".U -> ((uopIdx(0  ) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 2, emul = 1
433      "b010000".U -> ((uopIdx(1,0) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 4, emul = 1
434      "b011000".U -> ((uopIdx(2,0) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 8, emul = 1
435      "b010001".U -> ((uopIdx(0  ) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 4, emul = 2
436      "b011001".U -> ((uopIdx(1,0) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx),//lmul = 8, emul = 2
437      "b011011".U -> ((uopIdx(0  ) << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx) //lmul = 8, emul = 4
438    )))}
439}
440*/
441
442object RegFLowCnt {
443  def apply (emulNum: UInt, lmulNum:UInt, eew: UInt, uopIdx: UInt, flowIdx: UInt):UInt = {
444    (LookupTree(emulNum/lmulNum,List(
445      //"d1".U -> flowIdx,
446      "d2".U -> ((uopIdx(0  ) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),
447      "d4".U -> ((uopIdx(1,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx),
448      "d8".U -> ((uopIdx(2,0) << Log2Num((16.U >> eew(1,0)).asUInt)).asUInt + flowIdx)
449    )))}
450}
451
452object AddrFLowCnt {
453  def apply (emulNum: UInt, lmulNum:UInt, sew:UInt, uopIdx: UInt, flowIdx: UInt):UInt = {
454    (LookupTree(lmulNum/emulNum,List(
455      "d1".U -> flowIdx,
456      "d2".U -> ((uopIdx(0  ) << Log2Num((16.U >> sew(1,0)).asUInt)).asUInt + flowIdx),
457      "d4".U -> ((uopIdx(1,0) << Log2Num((16.U >> sew(1,0)).asUInt)).asUInt + flowIdx),
458      "d8".U -> ((uopIdx(2,0) << Log2Num((16.U >> sew(1,0)).asUInt)).asUInt + flowIdx)
459    )))}
460}
461
462
463object Log2Num {
464  def apply (num: UInt): UInt = {
465    (LookupTree(num,List(
466      16.U -> 4.U,
467      8.U  -> 3.U,
468      4.U  -> 2.U,
469      2.U  -> 1.U,
470      1.U  -> 0.U
471    )))}
472}
473
474/**
475  * when emul is less than or equal to 1, the nf is equal to uopIdx;
476  * when emul is equal to 2, the nf is equal to uopIdx >> 1, and so on*/
477object GenSegNfIdx {
478  def apply (mul: UInt, uopIdx: UInt):UInt = { // mul means lmul or emul
479    (LookupTree(mul,List(
480      "b101".U -> uopIdx,           // 1/8
481      "b110".U -> uopIdx,           // 1/4
482      "b111".U -> uopIdx,           // 1/2
483      "b000".U -> uopIdx,           // 1
484      "b001".U -> (uopIdx >> 1.U),  // 2
485      "b010".U -> (uopIdx >> 2.U),  // 4
486      "b011".U -> (uopIdx >> 3.U),  // 8
487    )))}
488}
489
490object GenUopIdxInField {
491  def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = {
492    val isIndexed = instType(0)
493    val mulInField = Mux(
494      isIndexed,
495      Mux(lmul.asSInt > emul.asSInt, lmul, emul),
496      emul
497    )
498    LookupTree(mulInField, List(
499      "b101".U -> 0.U,
500      "b110".U -> 0.U,
501      "b111".U -> 0.U,
502      "b000".U -> 0.U,
503      "b001".U -> uopIdx(0),
504      "b010".U -> uopIdx(1, 0),
505      "b011".U -> uopIdx(2, 0)
506    ))
507  }
508}
509
510object GenSegNfIdxMul {
511  def apply (emul: UInt, lmul: UInt, uopIdx: UInt):UInt = {
512    (LookupTree(Cat(emul,lmul),List(
513      "b001000".U -> uopIdx(5,1), //emul = 2,lmul = 1
514      "b010000".U -> uopIdx(5,2), //emul = 4,lmul = 1
515      "b011000".U -> uopIdx(5,3), //emul = 8,lmul = 1
516      "b010001".U -> uopIdx(5,3), //emul = 4,lmul = 2
517      "b011001".U -> uopIdx(5,4), //emul = 8,lmul = 2
518      "b011010".U -> uopIdx(5,5)  //emul = 8,lmul = 4
519    )))}
520}
521
522/**
523  * when emul is less than or equal to 1, only one segEmulIdx, so the segEmulIdx is 0.U;
524  * when emul is equal to 2, the segEmulIdx is equal to uopIdx(0), and so on*/
525object GenSegMulIdx {
526  def apply (mul: UInt, uopIdx: UInt): UInt = { //mul means emul or lmul
527    (LookupTree(mul,List(
528      "b101".U -> 0.U        , // 1/8
529      "b110".U -> 0.U        , // 1/4
530      "b111".U -> 0.U        , // 1/2
531      "b000".U -> 0.U        , // 1
532      "b001".U -> uopIdx(0)  , // 2
533      "b010".U -> uopIdx(1,0), // 4
534      "b011".U -> uopIdx(2,0)  //8
535    )))}
536}
537
538//eew decode
539object EewLog2 extends VLSUConstants {
540  // def apply (eew: UInt): UInt = {
541  //   (LookupTree(eew,List(
542  //     "b000".U -> "b000".U , // 1
543  //     "b101".U -> "b001".U , // 2
544  //     "b110".U -> "b010".U , // 4
545  //     "b111".U -> "b011".U   // 8
546  //   )))}
547  def apply(eew: UInt): UInt = ZeroExt(eew(1, 0), ewBits)
548}
549
550/**
551  * unit-stride instructions don't use this method;
552  * other instructions generate realFlowNum by EmulDataSize >> eew(1,0),
553  * EmulDataSize means the number of bytes that need to be written to the register,
554  * eew(1,0) means the number of bytes written at once*/
555object GenRealFlowNum {
556  def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = {
557    (LookupTree(instType,List(
558      "b000".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // store use, load do not use
559      "b010".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // strided
560      "b001".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered
561      "b011".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered
562      "b100".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // segment unit-stride
563      "b110".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // segment strided
564      "b101".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // segment indexed-unordered
565      "b111".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt)  // segment indexed-ordered
566    )))}
567}
568
569/**
570  * GenRealFlowLog2 = Log2(GenRealFlowNum)
571  */
572object GenRealFlowLog2 extends VLSUConstants {
573  def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = {
574    val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul)
575    val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul)
576    val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew(1, 0)
577    val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0)
578    (LookupTree(instType, List(
579      "b000".U -> eewRealFlowLog2, // unit-stride
580      "b010".U -> eewRealFlowLog2, // strided
581      "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered
582      "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered
583      "b100".U -> eewRealFlowLog2, // segment unit-stride
584      "b110".U -> eewRealFlowLog2, // segment strided
585      "b101".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-unordered
586      "b111".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-ordered
587    )))
588  }
589}
590
591/**
592  * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx
593  * inside the uop.
594  *
595  * eew = 0, elemIdx = uopIdx ## flowIdx(3, 0)
596  * eew = 1, elemIdx = uopIdx ## flowIdx(2, 0)
597  * eew = 2, elemIdx = uopIdx ## flowIdx(1, 0)
598  * eew = 3, elemIdx = uopIdx ## flowIdx(0)
599  */
600object GenElemIdx extends VLSUConstants {
601  def apply(alignedType: UInt, uopIdx: UInt, flowIdx: UInt): UInt = {
602    LookupTree(
603      alignedType,
604      (0 until alignTypes).map(i =>
605        i.U -> ((uopIdx ## flowIdx(log2Up(VLENB) - i - 1, 0))(log2Up(maxElemNum) - 1, 0))
606      )
607    )
608  }
609}
610
611/**
612  * GenVLMAX calculates VLMAX, which equals MUL * ew
613  */
614object GenVLMAXLog2 extends VLSUConstants {
615  def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew
616}
617object GenVLMAX {
618  def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew)
619}
620
621object GenUSWholeRegVL extends VLSUConstants {
622  def apply(nfields: UInt, eew: UInt): UInt = {
623    LookupTree(eew(1, 0), List(
624      "b00".U -> (nfields << (log2Up(VLENB) - 0)),
625      "b01".U -> (nfields << (log2Up(VLENB) - 1)),
626      "b10".U -> (nfields << (log2Up(VLENB) - 2)),
627      "b11".U -> (nfields << (log2Up(VLENB) - 3))
628    ))
629  }
630}
631object GenUSWholeEmul extends VLSUConstants{
632  def apply(nf: UInt): UInt={
633    LookupTree(nf,List(
634      "b000".U -> "b000".U(mulBits.W),
635      "b001".U -> "b001".U(mulBits.W),
636      "b011".U -> "b010".U(mulBits.W),
637      "b111".U -> "b011".U(mulBits.W)
638    ))
639  }
640}
641
642
643object GenUSMaskRegVL extends VLSUConstants {
644  def apply(vl: UInt): UInt = {
645    (vl >> 3.U)
646  }
647}
648
649object GenUopByteMask {
650  def apply(flowMask: UInt, alignedType: UInt): UInt = {
651    LookupTree(alignedType, List(
652      "b00".U -> flowMask,
653      "b01".U -> FillInterleaved(2, flowMask),
654      "b10".U -> FillInterleaved(4, flowMask),
655      "b11".U -> FillInterleaved(8, flowMask)
656    ))
657  }
658}
659
660object GenFlowMaskInsideReg extends VLSUConstants {
661  def apply(alignedType: UInt, elemIdx: UInt): UInt = {
662    LookupTree(alignedType, List(
663      "b00".U -> UIntToOH(elemIdx(3, 0)),
664      "b01".U -> FillInterleaved(2, UIntToOH(elemIdx(2, 0))),
665      "b10".U -> FillInterleaved(4, UIntToOH(elemIdx(1, 0))),
666      "b11".U -> FillInterleaved(8, UIntToOH(elemIdx(0)))
667    ))
668  }
669}
670
671// TODO: delete this in vs flow queue
672object GenEleIdx {
673  def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt, uopIdx: UInt, flowIdx: UInt): UInt = {
674    val eleIdx = Wire(UInt(7.W))
675    when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || emul.asSInt > lmul.asSInt) {
676      eleIdx := (uopIdx << Log2Num((MulDataSize(emul) >> eew(1,0)).asUInt)).asUInt + flowIdx
677    }.otherwise {
678      eleIdx := (uopIdx << Log2Num((MulDataSize(lmul) >> sew(1,0)).asUInt)).asUInt + flowIdx
679    }
680    eleIdx
681  }
682}
683
684object GenVdIdxInField extends VLSUConstants {
685  def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = {
686    val vdIdx = Wire(UInt(log2Up(maxMUL).W))
687    when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) {
688      // Unit-stride or Strided, or indexed with lmul >= emul
689      vdIdx := uopIdx
690    }.otherwise {
691      // Indexed with lmul <= emul
692      val multiple = emul - lmul
693      val uopIdxWidth = uopIdx.getWidth
694      vdIdx := LookupTree(multiple, List(
695        0.U -> uopIdx,
696        1.U -> (uopIdx >> 1),
697        2.U -> (uopIdx >> 2),
698        3.U -> (uopIdx >> 3)
699      ))
700    }
701    vdIdx
702  }
703}
704
705object GenFieldMask {
706  def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = {
707    val isSegment = instType(2)
708    val isIndexed = instType(0)
709    val alignedType = Mux(isIndexed, sew(1, 0), eew(1, 0))
710    val mul = Mux(isIndexed, lmul, emul)
711    val vlmaxMask = GenVLMAX(lmul, sew) - 1.U
712    val mulMask = LookupTree(alignedType, List(
713      "b00".U -> "b01111".U,
714      "b01".U -> "b00111".U,
715      "b10".U -> "b00011".U,
716      "b11".U -> "b00001".U
717    ))
718    Mux(
719      !isSegment || mul.asSInt >= 0.S,
720      vlmaxMask,
721      mulMask
722    )
723  }
724}