xref: /XiangShan/src/main/scala/xiangshan/backend/decode/UopInfoGen.scala (revision e4d4d30585412eb8ac83b5c75599a348356342a2)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.backend.decode
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.rocket.Instructions
23import freechips.rocketchip.util.uintToBitPat
24import utils._
25import utility._
26import xiangshan.ExceptionNO.illegalInstr
27import xiangshan._
28import xiangshan.backend.fu.fpu.FPU
29import xiangshan.backend.fu.FuType
30import freechips.rocketchip.rocket.Instructions._
31import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
32import xiangshan.backend.fu.vector.Bundles.{VType, VLmul, VSew}
33import yunsuan.VpermType
34import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
35
36class strdiedLSNumOfUopTable() extends Module {
37  val src = IO(Input(UInt(5.W)))
38  val out = IO(Output(UInt(4.W)))
39  // strided load/store
40  var combVemulNf : Seq[(Int, Int, Int)] = Seq()
41  for (emul <- 0 until 4) {
42    for (nf <- 0 until 8) {
43      if ((1 << emul) * (nf + 1) <= 8) {
44        combVemulNf :+= (emul, nf, (1 << emul) * (nf + 1))
45      } else {
46        combVemulNf :+= (emul, nf, 0)
47      }
48    }
49  }
50  out := decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
51    case (emul, nf, uopNum) => (BitPat((emul << 3 | nf).U(5.W)), BitPat(uopNum.U(4.W)))
52  }, BitPat.N(4)))
53}
54
55class indexedLSNumOfUopTable() extends Module {
56  val src = IO(Input(UInt(7.W)))
57  val out = IO(Output(UInt(7.W)))
58  // strided load/store
59  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
60  for (emul <- 0 until 4) {
61    for (lmul <- 0 until 4) {
62      var max_mul = if (lmul > emul) lmul else emul
63      for (nf <- 0 until 8) {
64        if ((1 << lmul) * (nf + 1) <= 8) {    // indexed load/store must ensure that the lmul * nf is less or equal to 8
65          combVemulNf :+= (emul, lmul, nf, (1 << max_mul) * (nf + 1))
66        } else {
67          combVemulNf :+= (emul, lmul, nf, 0)
68        }
69      }
70    }
71  }
72  out := decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
73    case (emul, lmul, nf, uopNum) => (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat(uopNum.U(7.W)))
74  }, BitPat.N(7)))
75}
76
77class UopInfoGen (implicit p: Parameters) extends XSModule {
78  val io = IO(new UopInfoGenIO)
79
80  val stridedLSTable = Module(new strdiedLSNumOfUopTable)     // decoder for strided load/store
81  val indexedLSTable = Module(new indexedLSNumOfUopTable)     // decoder for indexed load/store
82  val indexedLSWBTable = Module(new indexedLSNumOfUopTable)   // decoder for indexed load/store WB
83
84  val typeOfSplit = io.in.preInfo.typeOfSplit
85  val vsew = Cat(0.U(1.W), io.in.preInfo.vsew)
86  val veew = Cat(0.U(1.W), io.in.preInfo.vwidth(1, 0))
87  val vmvn = io.in.preInfo.vmvn
88  val isVlsr = io.in.preInfo.isVlsr
89  val vlmul = io.in.preInfo.vlmul
90  val nf = io.in.preInfo.nf
91  val isComplex = io.out.isComplex
92
93  val lmul = MuxLookup(vlmul, 1.U(4.W), Array(
94    "b001".U -> 2.U,
95    "b010".U -> 4.U,
96    "b011".U -> 8.U
97  ))
98  val simple_lmul = MuxLookup(vlmul, 0.U(2.W), Array(
99    "b001".U -> 1.U,
100    "b010".U -> 2.U,
101    "b011".U -> 3.U
102  ))
103
104  val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
105
106  val emul = MuxLookup(vemul, 1.U(4.W), Array(
107    "b001".U -> 2.U,
108    "b010".U -> 4.U,
109    "b011".U -> 8.U
110  ))                                                              //TODO : eew and emul illegal exception need to be handled
111  val simple_emul = MuxLookup(vemul, 0.U(2.W), Array(
112    "b001".U -> 1.U,
113    "b010".U -> 2.U,
114    "b011".U -> 3.U
115  ))
116
117  val numOfUopVslide = MuxLookup(vlmul, 1.U(log2Up(MaxUopSize + 1).W), Array(
118    "b001".U -> 3.U,
119    "b010".U -> 10.U,
120    "b011".U -> 36.U
121  ))
122  val numOfUopVrgather = MuxLookup(vlmul, 1.U(log2Up(MaxUopSize + 1).W), Array(
123    "b001".U -> 4.U,
124    "b010".U -> 16.U,
125    "b011".U -> 64.U
126  ))
127  val numOfUopVrgatherei16 = Mux((!vsew.orR) && (vlmul =/= "b011".U),
128    Cat(numOfUopVrgather, 0.U(1.W)),
129    numOfUopVrgather
130  )
131  val numOfUopVcompress = MuxLookup(vlmul, 1.U(4.W), Array(
132    "b001".U -> 4.U,
133    "b010".U -> 13.U,
134    "b011".U -> 43.U
135  ))
136  val numOfUopVFRED = {
137    // addTime include add frs1
138     val addTime = MuxLookup(vlmul, 1.U(4.W), Array(
139       VLmul.m2 -> 2.U,
140       VLmul.m4 -> 4.U,
141       VLmul.m8 -> 8.U,
142     ))
143    val foldLastVlmul = MuxLookup(vsew, "b000".U, Array(
144      VSew.e16 -> VLmul.mf8,
145      VSew.e32 -> VLmul.mf4,
146      VSew.e64 -> VLmul.mf2,
147    ))
148    // lmul < 1, foldTime = vlmul - foldFastVlmul
149    // lmul >= 1, foldTime = 0.U - foldFastVlmul
150    val foldTime = Mux(vlmul(2), vlmul, 0.U) - foldLastVlmul
151    addTime + foldTime
152  }
153  val numOfUopVFREDOSUM = {
154    val uvlMax = MuxLookup(vsew, 0.U, Array(
155      VSew.e16 -> 8.U,
156      VSew.e32 -> 4.U,
157      VSew.e64 -> 2.U,
158    ))
159    val vlMax = Wire(UInt(7.W))
160    vlMax := Mux(vlmul(2), uvlMax >> (-vlmul)(1,0), uvlMax << vlmul(1,0)).asUInt
161    vlMax
162  }
163
164  stridedLSTable.src := Cat(simple_emul, nf)
165  val numOfUopVLoadStoreStrided = stridedLSTable.out
166  indexedLSTable.src := Cat(simple_emul, simple_lmul, nf)
167  val numOfUopVLoadStoreIndexed = indexedLSTable.out
168  indexedLSWBTable.src := Cat(simple_lmul, nf)
169  val numOfWBVLoadStoreIndexed = indexedLSWBTable.out
170
171  //number of uop
172  val numOfUop = MuxLookup(typeOfSplit, 1.U(log2Up(MaxUopSize + 1).W), Array(
173    UopSplitType.VSET -> 2.U,
174    UopSplitType.VEC_0XV -> 2.U,
175    UopSplitType.VEC_VVV -> lmul,
176    UopSplitType.VEC_VFV -> lmul,
177    UopSplitType.VEC_EXT2 -> lmul,
178    UopSplitType.VEC_EXT4 -> lmul,
179    UopSplitType.VEC_EXT8 -> lmul,
180    UopSplitType.VEC_VVM -> lmul,
181    UopSplitType.VEC_VFM -> lmul,
182    UopSplitType.VEC_VFRED -> numOfUopVFRED,
183    UopSplitType.VEC_VFREDOSUM -> numOfUopVFREDOSUM,
184    UopSplitType.VEC_VXM -> (lmul +& 1.U),
185    UopSplitType.VEC_VXV -> (lmul +& 1.U),
186    UopSplitType.VEC_VFW -> Cat(lmul, 0.U(1.W)), // lmul <= 4
187    UopSplitType.VEC_WFW -> Cat(lmul, 0.U(1.W)), // lmul <= 4
188    UopSplitType.VEC_VVW -> Cat(lmul, 0.U(1.W)), // lmul <= 4
189    UopSplitType.VEC_WVW -> Cat(lmul, 0.U(1.W)), // lmul <= 4
190    UopSplitType.VEC_VXW -> Cat(lmul, 1.U(1.W)), // lmul <= 4
191    UopSplitType.VEC_WXW -> Cat(lmul, 1.U(1.W)), // lmul <= 4
192    UopSplitType.VEC_WVV -> Cat(lmul, 0.U(1.W)), // lmul <= 4
193    UopSplitType.VEC_WXV -> Cat(lmul, 1.U(1.W)), // lmul <= 4
194    UopSplitType.VEC_SLIDE1UP -> (lmul +& 1.U),
195    UopSplitType.VEC_FSLIDE1UP -> lmul,
196    UopSplitType.VEC_SLIDE1DOWN -> Cat(lmul, 0.U(1.W)),
197    UopSplitType.VEC_FSLIDE1DOWN -> (Cat(lmul, 0.U(1.W)) - 1.U),
198    UopSplitType.VEC_VRED -> lmul,
199    UopSplitType.VEC_SLIDEUP -> (numOfUopVslide + 1.U),
200    UopSplitType.VEC_SLIDEDOWN -> (numOfUopVslide + 1.U),
201    UopSplitType.VEC_M0X -> lmul,
202    UopSplitType.VEC_MVV -> (Cat(lmul, 0.U(1.W)) - 1.U),
203    UopSplitType.VEC_M0X_VFIRST -> 1.U,
204    UopSplitType.VEC_VWW -> Cat(lmul, 0.U(1.W)),
205    UopSplitType.VEC_RGATHER -> numOfUopVrgather,
206    UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U),
207    UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16,
208    UopSplitType.VEC_MVNR -> (vmvn +& 1.U),
209    UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, (numOfUopVLoadStoreStrided +& 1.U)),   // with one move instruction
210    UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U),    // with two move instructions
211    UopSplitType.VEC_I_LDST -> (numOfUopVLoadStoreIndexed +& 1.U),
212  ))
213
214  // number of writeback num
215  val numOfWB = MuxLookup(typeOfSplit, 1.U(log2Up(MaxUopSize + 1).W), Array(
216    UopSplitType.VSET -> 2.U,
217    UopSplitType.VEC_0XV -> 2.U,
218    UopSplitType.VEC_VVV -> lmul,
219    UopSplitType.VEC_VFV -> lmul,
220    UopSplitType.VEC_EXT2 -> lmul,
221    UopSplitType.VEC_EXT4 -> lmul,
222    UopSplitType.VEC_EXT8 -> lmul,
223    UopSplitType.VEC_VVM -> lmul,
224    UopSplitType.VEC_VFM -> lmul,
225    UopSplitType.VEC_VFRED -> numOfUopVFRED,
226    UopSplitType.VEC_VFREDOSUM -> numOfUopVFREDOSUM,
227    UopSplitType.VEC_VXM -> (lmul +& 1.U),
228    UopSplitType.VEC_VXV -> (lmul +& 1.U),
229    UopSplitType.VEC_VFW -> Cat(lmul, 0.U(1.W)), // lmul <= 4
230    UopSplitType.VEC_WFW -> Cat(lmul, 0.U(1.W)), // lmul <= 4
231    UopSplitType.VEC_VVW -> Cat(lmul, 0.U(1.W)), // lmul <= 4
232    UopSplitType.VEC_WVW -> Cat(lmul, 0.U(1.W)), // lmul <= 4
233    UopSplitType.VEC_VXW -> Cat(lmul, 1.U(1.W)), // lmul <= 4
234    UopSplitType.VEC_WXW -> Cat(lmul, 1.U(1.W)), // lmul <= 4
235    UopSplitType.VEC_WVV -> Cat(lmul, 0.U(1.W)), // lmul <= 4
236    UopSplitType.VEC_WXV -> Cat(lmul, 1.U(1.W)), // lmul <= 4
237    UopSplitType.VEC_SLIDE1UP -> (lmul +& 1.U),
238    UopSplitType.VEC_FSLIDE1UP -> lmul,
239    UopSplitType.VEC_SLIDE1DOWN -> Cat(lmul, 0.U(1.W)),
240    UopSplitType.VEC_FSLIDE1DOWN -> (Cat(lmul, 0.U(1.W)) - 1.U),
241    UopSplitType.VEC_VRED -> lmul,
242    UopSplitType.VEC_SLIDEUP -> (numOfUopVslide + 1.U),
243    UopSplitType.VEC_SLIDEDOWN -> (numOfUopVslide + 1.U),
244    UopSplitType.VEC_M0X -> (lmul +& 1.U),
245    UopSplitType.VEC_MVV -> (Cat(lmul, 0.U(1.W)) - 1.U),
246    UopSplitType.VEC_M0X_VFIRST -> 2.U,
247    UopSplitType.VEC_VWW -> Cat(lmul, 0.U(1.W)),
248    UopSplitType.VEC_RGATHER -> numOfUopVrgather,
249    UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U),
250    UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16,
251    UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, (numOfUopVLoadStoreStrided +& 1.U)),   // with one move instruction
252    UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U),    // with two move instructions
253    UopSplitType.VEC_I_LDST -> (numOfWBVLoadStoreIndexed +& 1.U),
254    UopSplitType.VEC_MVNR -> (vmvn +& 1.U),
255  ))
256
257  isComplex := typeOfSplit =/= UopSplitType.SCA_SIM
258  io.out.uopInfo.numOfUop := numOfUop
259  io.out.uopInfo.numOfWB := numOfWB
260  io.out.uopInfo.lmul := lmul
261
262}
263
264class UopInfoGenIO(implicit p: Parameters) extends XSBundle {
265  val in = new Bundle {
266    val preInfo = Input(new PreInfo)
267  }
268  val out = new Bundle {
269    val isComplex = Output(Bool())
270    val uopInfo = Output(new UopInfo)
271  }
272}
273
274class PreInfo(implicit p: Parameters) extends XSBundle {
275  val typeOfSplit = UopSplitType()
276  val vsew = VSew()          //2 bit
277  val vlmul = VLmul()
278  val vwidth = UInt(3.W)     //eew
279  val nf = UInt(3.W)
280  val vmvn = UInt(3.W)       // vmvnr
281  val isVlsr = Bool()        // is vector whole register load/store
282}
283
284class UopInfo(implicit p: Parameters) extends XSBundle {
285  val numOfUop = UInt(log2Up(MaxUopSize + 1).W)
286  val numOfWB = UInt(log2Up(MaxUopSize + 1).W)
287  val lmul = UInt(4.W)
288}