xref: /XiangShan/src/main/scala/xiangshan/mem/vector/VecCommon.scala (revision f3a9fb053ef5b99b1977960119e3ee440397383e)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.backend.rob.RobPtr
26import xiangshan.backend.Bundles._
27
28/**
29  * Common used parameters or functions in vlsu
30  */
31trait VLSUConstants {
32  val VLEN = 128
33  //for pack unit-stride flow
34  val AlignedNum = 4 // 1/2/4/8
35  def VLENB = VLEN/8
36  def vOffsetBits = log2Up(VLENB) // bits-width to index offset inside a vector reg
37
38  def alignTypes = 5 // eew/sew = 1/2/4/8, last indicate 128 bit element
39  def alignTypeBits = log2Up(alignTypes)
40  def maxMUL = 8
41  def maxFields = 8
42  /**
43    * In the most extreme cases like a segment indexed instruction, eew=64, emul=8, sew=8, lmul=1,
44    * and nf=8, each data reg is mapped with 8 index regs and there are 8 data regs in total,
45    * each for a field. Therefore an instruction can be divided into 64 uops at most.
46    */
47  def maxUopNum = maxMUL * maxFields // 64
48  def maxFlowNum = 16
49  def maxElemNum = maxMUL * maxFlowNum // 128
50  // def uopIdxBits = log2Up(maxUopNum) // to index uop inside an robIdx
51  def elemIdxBits = log2Up(maxElemNum) + 1 // to index which element in an instruction
52  def flowIdxBits = log2Up(maxFlowNum) + 1 // to index which flow in a uop
53  def fieldBits = log2Up(maxFields) + 1 // 4-bits to indicate 1~8
54
55  def ewBits = 3 // bits-width of EEW/SEW
56  def mulBits = 3 // bits-width of emul/lmul
57
58  def getSlice(data: UInt, i: Int, alignBits: Int): UInt = {
59    require(data.getWidth >= (i+1) * alignBits)
60    data((i+1) * alignBits - 1, i * alignBits)
61  }
62
63  def getByte(data: UInt, i: Int = 0) = getSlice(data, i, 8)
64  def getHalfWord(data: UInt, i: Int = 0) = getSlice(data, i, 16)
65  def getWord(data: UInt, i: Int = 0) = getSlice(data, i, 32)
66  def getDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 64)
67  def getDoubleDoubleWord(data: UInt, i: Int = 0) = getSlice(data, i, 128)
68}
69
70trait HasVLSUParameters extends HasXSParameter with VLSUConstants {
71  override val VLEN = coreParams.VLEN
72  def isUnitStride(instType: UInt) = instType(1, 0) === "b00".U
73  def isStrided(instType: UInt) = instType(1, 0) === "b10".U
74  def isIndexed(instType: UInt) = instType(0) === "b1".U
75  def isNotIndexed(instType: UInt) = instType(0) === "b0".U
76  def isSegment(instType: UInt) = instType(2) === "b1".U
77  def is128Bit(alignedType: UInt) = alignedType(2) === "b1".U
78
79  def mergeDataWithMask(oldData: UInt, newData: UInt, mask: UInt): Vec[UInt] = {
80    require(oldData.getWidth == newData.getWidth)
81    require(oldData.getWidth == mask.getWidth * 8)
82    VecInit(mask.asBools.zipWithIndex.map { case (en, i) =>
83      Mux(en, getByte(newData, i), getByte(oldData, i))
84    })
85  }
86
87  // def asBytes(data: UInt) = {
88  //   require(data.getWidth % 8 == 0)
89  //   (0 until data.getWidth/8).map(i => getByte(data, i))
90  // }
91
92  def mergeDataWithElemIdx(
93    oldData: UInt,
94    newData: Seq[UInt],
95    alignedType: UInt,
96    elemIdx: Seq[UInt],
97    valids: Seq[Bool]
98  ): UInt = {
99    require(newData.length == elemIdx.length)
100    require(newData.length == valids.length)
101    LookupTree(alignedType, List(
102      "b00".U -> VecInit(elemIdx.map(e => UIntToOH(e(3, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
103        ParallelPosteriorityMux(
104          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
105          getByte(oldData, i) +: newData.map(getByte(_))
106        )}).asUInt,
107      "b01".U -> VecInit(elemIdx.map(e => UIntToOH(e(2, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
108        ParallelPosteriorityMux(
109          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
110          getHalfWord(oldData, i) +: newData.map(getHalfWord(_))
111        )}).asUInt,
112      "b10".U -> VecInit(elemIdx.map(e => UIntToOH(e(1, 0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
113        ParallelPosteriorityMux(
114          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
115          getWord(oldData, i) +: newData.map(getWord(_))
116        )}).asUInt,
117      "b11".U -> VecInit(elemIdx.map(e => UIntToOH(e(0)).asBools).transpose.zipWithIndex.map { case (selVec, i) =>
118        ParallelPosteriorityMux(
119          true.B +: selVec.zip(valids).map(x => x._1 && x._2),
120          getDoubleWord(oldData, i) +: newData.map(getDoubleWord(_))
121        )}).asUInt
122    ))
123  }
124
125  def mergeDataWithElemIdx(oldData: UInt, newData: UInt, alignedType: UInt, elemIdx: UInt): UInt = {
126    mergeDataWithElemIdx(oldData, Seq(newData), alignedType, Seq(elemIdx), Seq(true.B))
127  }
128}
129abstract class VLSUModule(implicit p: Parameters) extends XSModule
130  with HasVLSUParameters
131  with HasCircularQueuePtrHelper
132abstract class VLSUBundle(implicit p: Parameters) extends XSBundle
133  with HasVLSUParameters
134
135class VLSUBundleWithMicroOp(implicit p: Parameters) extends VLSUBundle {
136  val uop = new DynInst
137}
138
139class OnlyVecExuOutput(implicit p: Parameters) extends VLSUBundle {
140  val isvec = Bool()
141  val vecdata = UInt(VLEN.W)
142  val mask = UInt(VLENB.W)
143  // val rob_idx_valid = Vec(2, Bool())
144  // val inner_idx = Vec(2, UInt(3.W))
145  // val rob_idx = Vec(2, new RobPtr)
146  // val offset = Vec(2, UInt(4.W))
147  val reg_offset = UInt(vOffsetBits.W)
148  val vecActive = Bool() // 1: vector active element, 0: vector not active element
149  val is_first_ele = Bool()
150  val elemIdx = UInt(elemIdxBits.W) // element index
151  val elemIdxInsideVd = UInt(elemIdxBits.W) // element index in scope of vd
152  // val uopQueuePtr = new VluopPtr
153  // val flowPtr = new VlflowPtr
154}
155
156class VecExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters {
157  val vec = new OnlyVecExuOutput
158    // pack
159  val isPackage         = Bool()
160  val packageNum        = UInt((log2Up(VLENB) + 1).W)
161  val originAlignedType = UInt(alignTypeBits.W)
162  val alignedType       = UInt(alignTypeBits.W)
163}
164
165// class VecStoreExuOutput(implicit p: Parameters) extends MemExuOutput with HasVLSUParameters {
166//   val elemIdx = UInt(elemIdxBits.W)
167//   val uopQueuePtr = new VsUopPtr
168//   val fieldIdx = UInt(fieldBits.W)
169//   val segmentIdx = UInt(elemIdxBits.W)
170//   val vaddr = UInt(VAddrBits.W)
171//   // pack
172//   val isPackage         = Bool()
173//   val packageNum        = UInt((log2Up(VLENB) + 1).W)
174//   val originAlignedType = UInt(alignTypeBits.W)
175//   val alignedType       = UInt(alignTypeBits.W)
176// }
177
178class VecUopBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp {
179  val flowMask       = UInt(VLENB.W) // each bit for a flow
180  val byteMask       = UInt(VLENB.W) // each bit for a byte
181  val data           = UInt(VLEN.W)
182  // val fof            = Bool() // fof is only used for vector loads
183  val excp_eew_index = UInt(elemIdxBits.W)
184  // val exceptionVec   = ExceptionVec() // uop has exceptionVec
185  val baseAddr = UInt(VAddrBits.W)
186  val stride = UInt(VLEN.W)
187  val flow_counter = UInt(flowIdxBits.W)
188
189  // instruction decode result
190  val flowNum = UInt(flowIdxBits.W) // # of flows in a uop
191  // val flowNumLog2 = UInt(log2Up(flowIdxBits).W) // log2(flowNum), for better timing of multiplication
192  val nfields = UInt(fieldBits.W) // NFIELDS
193  val vm = Bool() // whether vector masking is enabled
194  val usWholeReg = Bool() // unit-stride, whole register load
195  val usMaskReg = Bool() // unit-stride, masked store/load
196  val eew = UInt(ewBits.W) // size of memory elements
197  val sew = UInt(ewBits.W)
198  val emul = UInt(mulBits.W)
199  val lmul = UInt(mulBits.W)
200  val vlmax = UInt(elemIdxBits.W)
201  val instType = UInt(3.W)
202  val vd_last_uop = Bool()
203  val vd_first_uop = Bool()
204}
205
206class VecFlowBundle(implicit p: Parameters) extends VLSUBundleWithMicroOp {
207  val vaddr             = UInt(VAddrBits.W)
208  val mask              = UInt(VLENB.W)
209  val alignedType       = UInt(alignTypeBits.W)
210  val vecActive         = Bool()
211  val elemIdx           = UInt(elemIdxBits.W)
212  val is_first_ele      = Bool()
213
214  // pack
215  val isPackage         = Bool()
216  val packageNum        = UInt((log2Up(VLENB) + 1).W)
217  val originAlignedType = UInt(alignTypeBits.W)
218}
219
220object MulNum {
221  def apply (mul: UInt): UInt = { //mul means emul or lmul
222    (LookupTree(mul,List(
223      "b101".U -> 1.U , // 1/8
224      "b110".U -> 1.U , // 1/4
225      "b111".U -> 1.U , // 1/2
226      "b000".U -> 1.U , // 1
227      "b001".U -> 2.U , // 2
228      "b010".U -> 4.U , // 4
229      "b011".U -> 8.U   // 8
230    )))}
231}
232/**
233  * when emul is greater than or equal to 1, this means the entire register needs to be written;
234  * otherwise, only write the specified number of bytes */
235object MulDataSize {
236  def apply (mul: UInt): UInt = { //mul means emul or lmul
237    (LookupTree(mul,List(
238      "b101".U -> 2.U  , // 1/8
239      "b110".U -> 4.U  , // 1/4
240      "b111".U -> 8.U  , // 1/2
241      "b000".U -> 16.U , // 1
242      "b001".U -> 16.U , // 2
243      "b010".U -> 16.U , // 4
244      "b011".U -> 16.U   // 8
245    )))}
246}
247
248object OneRegNum {
249  def apply (eew: UInt): UInt = { //mul means emul or lmul
250    (LookupTree(eew,List(
251      "b000".U -> 16.U , // 1
252      "b101".U -> 8.U , // 2
253      "b110".U -> 4.U , // 4
254      "b111".U -> 2.U   // 8
255    )))}
256}
257
258//index inst read data byte
259object SewDataSize {
260  def apply (sew: UInt): UInt = {
261    (LookupTree(sew,List(
262      "b000".U -> 1.U , // 1
263      "b001".U -> 2.U , // 2
264      "b010".U -> 4.U , // 4
265      "b011".U -> 8.U   // 8
266    )))}
267}
268
269// strided inst read data byte
270object EewDataSize {
271  def apply (eew: UInt): UInt = {
272    (LookupTree(eew,List(
273      "b000".U -> 1.U , // 1
274      "b101".U -> 2.U , // 2
275      "b110".U -> 4.U , // 4
276      "b111".U -> 8.U   // 8
277    )))}
278}
279
280object loadDataSize {
281  def apply (instType: UInt, emul: UInt, eew: UInt, sew: UInt): UInt = {
282    (LookupTree(instType,List(
283      "b000".U ->  MulDataSize(emul), // unit-stride
284      "b010".U ->  EewDataSize(eew)  , // strided
285      "b001".U ->  SewDataSize(sew)  , // indexed-unordered
286      "b011".U ->  SewDataSize(sew)  , // indexed-ordered
287      "b100".U ->  EewDataSize(eew)  , // segment unit-stride
288      "b110".U ->  EewDataSize(eew)  , // segment strided
289      "b101".U ->  SewDataSize(sew)  , // segment indexed-unordered
290      "b111".U ->  SewDataSize(sew)    // segment indexed-ordered
291    )))}
292}
293
294object storeDataSize {
295  def apply (instType: UInt, eew: UInt, sew: UInt): UInt = {
296    (LookupTree(instType,List(
297      "b000".U ->  EewDataSize(eew)  , // unit-stride, do not use
298      "b010".U ->  EewDataSize(eew)  , // strided
299      "b001".U ->  SewDataSize(sew)  , // indexed-unordered
300      "b011".U ->  SewDataSize(sew)  , // indexed-ordered
301      "b100".U ->  EewDataSize(eew)  , // segment unit-stride
302      "b110".U ->  EewDataSize(eew)  , // segment strided
303      "b101".U ->  SewDataSize(sew)  , // segment indexed-unordered
304      "b111".U ->  SewDataSize(sew)    // segment indexed-ordered
305    )))}
306}
307
308object GenVecStoreMask {
309  def apply (instType: UInt, eew: UInt, sew: UInt): UInt = {
310    val mask = Wire(UInt(16.W))
311    mask := UIntToOH(storeDataSize(instType = instType, eew = eew, sew = sew)) - 1.U
312    mask
313  }
314}
315
316/**
317  * these are used to obtain immediate addresses for  index instruction */
318object EewEq8 {
319  def apply(index:UInt, flow_inner_idx: UInt): UInt = {
320    (LookupTree(flow_inner_idx,List(
321      0.U  -> index(7 ,0   ),
322      1.U  -> index(15,8   ),
323      2.U  -> index(23,16  ),
324      3.U  -> index(31,24  ),
325      4.U  -> index(39,32  ),
326      5.U  -> index(47,40  ),
327      6.U  -> index(55,48  ),
328      7.U  -> index(63,56  ),
329      8.U  -> index(71,64  ),
330      9.U  -> index(79,72  ),
331      10.U -> index(87,80  ),
332      11.U -> index(95,88  ),
333      12.U -> index(103,96 ),
334      13.U -> index(111,104),
335      14.U -> index(119,112),
336      15.U -> index(127,120)
337    )))}
338}
339
340object EewEq16 {
341  def apply(index: UInt, flow_inner_idx: UInt): UInt = {
342    (LookupTree(flow_inner_idx, List(
343      0.U -> index(15, 0),
344      1.U -> index(31, 16),
345      2.U -> index(47, 32),
346      3.U -> index(63, 48),
347      4.U -> index(79, 64),
348      5.U -> index(95, 80),
349      6.U -> index(111, 96),
350      7.U -> index(127, 112)
351    )))}
352}
353
354object EewEq32 {
355  def apply(index: UInt, flow_inner_idx: UInt): UInt = {
356    (LookupTree(flow_inner_idx, List(
357      0.U -> index(31, 0),
358      1.U -> index(63, 32),
359      2.U -> index(95, 64),
360      3.U -> index(127, 96)
361    )))}
362}
363
364object EewEq64 {
365  def apply (index: UInt, flow_inner_idx: UInt): UInt = {
366    (LookupTree(flow_inner_idx, List(
367      0.U -> index(63, 0),
368      1.U -> index(127, 64)
369    )))}
370}
371
372object IndexAddr {
373  def apply (index: UInt, flow_inner_idx: UInt, eew: UInt): UInt = {
374    (LookupTree(eew,List(
375      "b000".U -> EewEq8 (index = index, flow_inner_idx = flow_inner_idx ), // Imm is 1 Byte // TODO: index maybe cross register
376      "b101".U -> EewEq16(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 2 Byte
377      "b110".U -> EewEq32(index = index, flow_inner_idx = flow_inner_idx ), // Imm is 4 Byte
378      "b111".U -> EewEq64(index = index, flow_inner_idx = flow_inner_idx )  // Imm is 8 Byte
379    )))}
380}
381
382object Log2Num {
383  def apply (num: UInt): UInt = {
384    (LookupTree(num,List(
385      16.U -> 4.U,
386      8.U  -> 3.U,
387      4.U  -> 2.U,
388      2.U  -> 1.U,
389      1.U  -> 0.U
390    )))}
391}
392
393object GenUopIdxInField {
394  def apply (instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = {
395    val isIndexed = instType(0)
396    val mulInField = Mux(
397      isIndexed,
398      Mux(lmul.asSInt > emul.asSInt, lmul, emul),
399      emul
400    )
401    LookupTree(mulInField, List(
402      "b101".U -> 0.U,
403      "b110".U -> 0.U,
404      "b111".U -> 0.U,
405      "b000".U -> 0.U,
406      "b001".U -> uopIdx(0),
407      "b010".U -> uopIdx(1, 0),
408      "b011".U -> uopIdx(2, 0)
409    ))
410  }
411}
412
413//eew decode
414object EewLog2 extends VLSUConstants {
415  // def apply (eew: UInt): UInt = {
416  //   (LookupTree(eew,List(
417  //     "b000".U -> "b000".U , // 1
418  //     "b101".U -> "b001".U , // 2
419  //     "b110".U -> "b010".U , // 4
420  //     "b111".U -> "b011".U   // 8
421  //   )))}
422  def apply(eew: UInt): UInt = ZeroExt(eew(1, 0), ewBits)
423}
424
425/**
426  * unit-stride instructions don't use this method;
427  * other instructions generate realFlowNum by EmulDataSize >> eew(1,0),
428  * EmulDataSize means the number of bytes that need to be written to the register,
429  * eew(1,0) means the number of bytes written at once*/
430object GenRealFlowNum {
431  def apply (instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = {
432    (LookupTree(instType,List(
433      "b000".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // store use, load do not use
434      "b010".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // strided
435      "b001".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-unordered
436      "b011".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // indexed-ordered
437      "b100".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // segment unit-stride
438      "b110".U ->  (MulDataSize(emul) >> eew(1,0)).asUInt, // segment strided
439      "b101".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt), // segment indexed-unordered
440      "b111".U ->  Mux(emul.asSInt > lmul.asSInt, (MulDataSize(emul) >> eew(1,0)).asUInt, (MulDataSize(lmul) >> sew(1,0)).asUInt)  // segment indexed-ordered
441    )))}
442}
443
444/**
445  * GenRealFlowLog2 = Log2(GenRealFlowNum)
446  */
447object GenRealFlowLog2 extends VLSUConstants {
448  def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt): UInt = {
449    val emulLog2 = Mux(emul.asSInt >= 0.S, 0.U, emul)
450    val lmulLog2 = Mux(lmul.asSInt >= 0.S, 0.U, lmul)
451    val eewRealFlowLog2 = emulLog2 + log2Up(VLENB).U - eew(1, 0)
452    val sewRealFlowLog2 = lmulLog2 + log2Up(VLENB).U - sew(1, 0)
453    (LookupTree(instType, List(
454      "b000".U -> eewRealFlowLog2, // unit-stride
455      "b010".U -> eewRealFlowLog2, // strided
456      "b001".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-unordered
457      "b011".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // indexed-ordered
458      "b100".U -> eewRealFlowLog2, // segment unit-stride
459      "b110".U -> eewRealFlowLog2, // segment strided
460      "b101".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-unordered
461      "b111".U -> Mux(emul.asSInt > lmul.asSInt, eewRealFlowLog2, sewRealFlowLog2), // segment indexed-ordered
462    )))
463  }
464}
465
466/**
467  * GenElemIdx generals an element index within an instruction, given a certain uopIdx and a known flowIdx
468  * inside the uop.
469  */
470object GenElemIdx extends VLSUConstants {
471  def apply(instType: UInt, emul: UInt, lmul: UInt, eew: UInt, sew: UInt,
472    uopIdx: UInt, flowIdx: UInt): UInt = {
473    val isIndexed = instType(0).asBool
474    val eewUopFlowsLog2 = Mux(emul.asSInt > 0.S, 0.U, emul) + log2Up(VLENB).U - eew(1, 0)
475    val sewUopFlowsLog2 = Mux(lmul.asSInt > 0.S, 0.U, lmul) + log2Up(VLENB).U - sew(1, 0)
476    val uopFlowsLog2 = Mux(
477      isIndexed,
478      Mux(emul.asSInt > lmul.asSInt, eewUopFlowsLog2, sewUopFlowsLog2),
479      eewUopFlowsLog2
480    )
481    LookupTree(uopFlowsLog2, List(
482      0.U -> uopIdx,
483      1.U -> uopIdx ## flowIdx(0),
484      2.U -> uopIdx ## flowIdx(1, 0),
485      3.U -> uopIdx ## flowIdx(2, 0),
486      4.U -> uopIdx ## flowIdx(3, 0)
487    ))
488  }
489}
490
491/**
492  * GenVLMAX calculates VLMAX, which equals MUL * ew
493  */
494object GenVLMAXLog2 extends VLSUConstants {
495  def apply(lmul: UInt, sew: UInt): UInt = lmul + log2Up(VLENB).U - sew
496}
497object GenVLMAX {
498  def apply(lmul: UInt, sew: UInt): UInt = 1.U << GenVLMAXLog2(lmul, sew)
499}
500
501object GenUSWholeRegVL extends VLSUConstants {
502  def apply(nfields: UInt, eew: UInt): UInt = {
503    LookupTree(eew(1, 0), List(
504      "b00".U -> (nfields << (log2Up(VLENB) - 0)),
505      "b01".U -> (nfields << (log2Up(VLENB) - 1)),
506      "b10".U -> (nfields << (log2Up(VLENB) - 2)),
507      "b11".U -> (nfields << (log2Up(VLENB) - 3))
508    ))
509  }
510}
511object GenUSWholeEmul extends VLSUConstants{
512  def apply(nf: UInt): UInt={
513    LookupTree(nf,List(
514      "b000".U -> "b000".U(mulBits.W),
515      "b001".U -> "b001".U(mulBits.W),
516      "b011".U -> "b010".U(mulBits.W),
517      "b111".U -> "b011".U(mulBits.W)
518    ))
519  }
520}
521
522
523object GenUSMaskRegVL extends VLSUConstants {
524  def apply(vl: UInt): UInt = {
525    Mux(vl(2,0) === 0.U , (vl >> 3.U), ((vl >> 3.U) + 1.U))
526  }
527}
528
529object GenUopByteMask {
530  def apply(flowMask: UInt, alignedType: UInt): UInt = {
531    LookupTree(alignedType, List(
532      "b000".U -> flowMask,
533      "b001".U -> FillInterleaved(2, flowMask),
534      "b010".U -> FillInterleaved(4, flowMask),
535      "b011".U -> FillInterleaved(8, flowMask),
536      "b100".U -> FillInterleaved(16, flowMask)
537    ))
538  }
539}
540
541object GenVdIdxInField extends VLSUConstants {
542  def apply(instType: UInt, emul: UInt, lmul: UInt, uopIdx: UInt): UInt = {
543    val vdIdx = Wire(UInt(log2Up(maxMUL).W))
544    when (instType(1,0) === "b00".U || instType(1,0) === "b10".U || lmul.asSInt > emul.asSInt) {
545      // Unit-stride or Strided, or indexed with lmul >= emul
546      vdIdx := uopIdx
547    }.otherwise {
548      // Indexed with lmul <= emul
549      val multiple = emul - lmul
550      val uopIdxWidth = uopIdx.getWidth
551      vdIdx := LookupTree(multiple, List(
552        0.U -> uopIdx,
553        1.U -> (uopIdx >> 1),
554        2.U -> (uopIdx >> 2),
555        3.U -> (uopIdx >> 3)
556      ))
557    }
558    vdIdx
559  }
560}
561/**
562* Use start and vl to generate flow activative mask
563* mod = true fill 0
564* mod = false fill 1
565*/
566object GenFlowMask extends VLSUConstants {
567  def apply(elementMask: UInt, start: UInt, vl: UInt , mod: Boolean): UInt = {
568    val startMask = ~UIntToMask(start, VLEN)
569    val vlMask = UIntToMask(vl, VLEN)
570    val maskVlStart = vlMask & startMask
571    if(mod){
572      elementMask & maskVlStart
573    }
574    else{
575      (~elementMask).asUInt & maskVlStart
576    }
577  }
578}
579
580object CheckAligned extends VLSUConstants {
581  def apply(addr: UInt): UInt = {
582    val aligned_16 = (addr(0) === 0.U) // 16-bit
583    val aligned_32 = (addr(1,0) === 0.U) // 32-bit
584    val aligned_64 = (addr(2,0) === 0.U) // 64-bit
585    val aligned_128 = (addr(3,0) === 0.U) // 128-bit
586    Cat(true.B, aligned_16, aligned_32, aligned_64, aligned_128)
587  }
588}
589
590/**
591  search if mask have continue 'len' bit '1'
592  mask: source mask
593  len: search length
594*/
595object GenPackMask{
596  def leadX(mask: Seq[Bool], len: Int): Bool = {
597    if(len == 1){
598      mask.head
599    }
600    else{
601      leadX(mask.drop(1),len-1) & mask.head
602    }
603  }
604  def leadOneVec(shiftMask: Seq[Bool]): UInt = {
605    // max is 64-bit, so the max num of flow to pack is 8
606
607    val lead1 = leadX(shiftMask, 1) // continue 1 bit
608    val lead2 = leadX(shiftMask, 2) // continue 2 bit
609    val lead4 = leadX(shiftMask, 4) // continue 4 bit
610    val lead8 = leadX(shiftMask, 8) // continue 8 bit
611    val lead16 = leadX(shiftMask, 16) // continue 16 bit
612    Cat(lead1, lead2, lead4, lead8, lead16)
613  }
614
615  def apply(shiftMask: UInt) = {
616    // pack mask
617    val packMask = leadOneVec(shiftMask.asBools)
618    packMask
619  }
620}
621/**
622PackEnable = (LeadXVec >> eew) & alignedVec, where the 0th bit represents the ability to merge into a 64 bit flow, the second bit represents the ability to merge into a 32 bit flow, and so on.
623
624example:
625  addr = 0x0, activeMask = b00011100101111, flowIdx = 0, eew = 0(8-bit)
626
627  step 0 : addrAlignedVec = (1, 1, 1, 1) elemIdxAligned = (1, 1, 1, 1)
628  step 1 : activePackVec = (1, 1, 1, 0), inactivePackVec = (0, 0, 0, 0)
629  step 2 : activePackEnable = (1, 1, 1, 0), inactivePackVec = (0, 0, 0, 0)
630
631  we can package 4 8-bit activative flows into a 32-bit flow.
632*/
633object GenPackVec extends VLSUConstants{
634  def apply(addr: UInt, shiftMask: UInt, eew: UInt, elemIdx: UInt): UInt = {
635    val addrAlignedVec = CheckAligned(addr)
636    val elemIdxAligned = CheckAligned(elemIdx)
637    val packMask = GenPackMask(shiftMask)
638    // generate packVec
639    val packVec = addrAlignedVec & elemIdxAligned & (packMask.asUInt >> eew)
640
641    packVec
642  }
643}
644
645object GenPackAlignedType extends VLSUConstants{
646  def apply(packVec: UInt): UInt = {
647    val packAlignedType = PriorityMux(Seq(
648      packVec(0) -> "b100".U,
649      packVec(1) -> "b011".U,
650      packVec(2) -> "b010".U,
651      packVec(3) -> "b001".U,
652      packVec(4) -> "b000".U
653    ))
654    packAlignedType
655  }
656}
657
658object GenPackNum extends VLSUConstants{
659  def apply(alignedType: UInt, packAlignedType: UInt): UInt = {
660    (1.U << (packAlignedType - alignedType)).asUInt
661  }
662}
663
664object genVWmask128 {
665  def apply(addr: UInt, sizeEncode: UInt): UInt = {
666    (LookupTree(sizeEncode, List(
667      "b000".U -> 0x1.U, //0001 << addr(2:0)
668      "b001".U -> 0x3.U, //0011
669      "b010".U -> 0xf.U, //1111
670      "b011".U -> 0xff.U, //11111111
671      "b100".U -> 0xffff.U //1111111111111111
672    )) << addr(3, 0)).asUInt
673  }
674}
675/*
676* only use in max length is 128
677*/
678object genVWdata {
679  def apply(data: UInt, sizeEncode: UInt): UInt = {
680    LookupTree(sizeEncode, List(
681      "b000".U -> Fill(16, data(7, 0)),
682      "b001".U -> Fill(8, data(15, 0)),
683      "b010".U -> Fill(4, data(31, 0)),
684      "b011".U -> Fill(2, data(63,0)),
685      "b100".U -> data(127,0)
686    ))
687  }
688}
689
690object genUSSplitAddr{
691  def apply(addr: UInt, index: UInt): UInt = {
692    val tmpAddr = Cat(addr(38, 4), 0.U(4.W))
693    val nextCacheline = tmpAddr + 16.U
694    LookupTree(index, List(
695      0.U -> tmpAddr,
696      1.U -> nextCacheline
697    ))
698  }
699}
700
701object genUSSplitMask{
702  def apply(mask: UInt, index: UInt, addrOffset: UInt): UInt = {
703    val tmpMask = Cat(0.U(16.W),mask) << addrOffset // 32-bits
704    LookupTree(index, List(
705      0.U -> tmpMask(15, 0),
706      1.U -> tmpMask(31, 16),
707    ))
708  }
709}
710
711object genUSSplitData{
712  def apply(data: UInt, index: UInt, addrOffset: UInt): UInt = {
713    val tmpData = WireInit(0.U(256.W))
714    val lookupTable = (0 until 16).map{case i =>
715      if(i == 0){
716        i.U -> Cat(0.U(128.W), data)
717      }else{
718        i.U -> Cat(0.U(((16-i)*8).W), data, 0.U((i*8).W))
719      }
720    }
721    tmpData := LookupTree(addrOffset, lookupTable).asUInt
722
723    LookupTree(index, List(
724      0.U -> tmpData(127, 0),
725      1.U -> tmpData(255, 128)
726    ))
727  }
728}
729
730object genVdOffset{
731  def apply(offset: UInt, index: UInt): UInt = {
732    LookupTree(index, List(
733      0.U -> offset,
734      1.U -> ((~offset).asUInt + 1.U)
735    ))
736  }
737}
738
739/**
740  * for merge 128-bits data of unit-stride
741  */
742// object mergeDataByoffset{
743//   def apply(oldData: Seq[UInt], newData: UInt, mask: Seq[Bools], offset: Seq[Uint], valid: Seq[Bool]): UInt = {
744
745//   }
746// }
747
748object GenVSData extends VLSUConstants {
749  def apply(data: UInt, elemIdx: UInt, alignedType: UInt): UInt = {
750    LookupTree(alignedType, List(
751      "b000".U -> ZeroExt(LookupTree(elemIdx(3, 0), List.tabulate(VLEN/8)(i => i.U -> getByte(data, i))), VLEN),
752      "b001".U -> ZeroExt(LookupTree(elemIdx(2, 0), List.tabulate(VLEN/16)(i => i.U -> getHalfWord(data, i))), VLEN),
753      "b010".U -> ZeroExt(LookupTree(elemIdx(1, 0), List.tabulate(VLEN/32)(i => i.U -> getWord(data, i))), VLEN),
754      "b011".U -> ZeroExt(LookupTree(elemIdx(0), List.tabulate(VLEN/64)(i => i.U -> getDoubleWord(data, i))), VLEN),
755      "b100".U -> data // if have wider element, it will broken
756    ))
757  }
758}