xref: /XiangShan/src/main/scala/xiangshan/backend/decode/UopInfoGen.scala (revision 99f369f9af5e6460594190dced390d936b62b309)
1/***************************************************************************************
2  * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3  * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4  * Copyright (c) 2020-2021 Peng Cheng Laboratory
5  *
6  * XiangShan is licensed under Mulan PSL v2.
7  * You can use this software according to the terms and conditions of the Mulan PSL v2.
8  * You may obtain a copy of Mulan PSL v2 at:
9  *          http://license.coscl.org.cn/MulanPSL2
10  *
11  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14  *
15  * See the Mulan PSL v2 for more details.
16  ***************************************************************************************/
17
18package xiangshan.backend.decode
19
20import org.chipsalliance.cde.config.Parameters
21import chisel3._
22import chisel3.util._
23import freechips.rocketchip.rocket.Instructions
24import freechips.rocketchip.util.uintToBitPat
25import utils._
26import utility._
27import xiangshan.ExceptionNO.illegalInstr
28import xiangshan._
29import xiangshan.backend.fu.fpu.FPU
30import xiangshan.backend.fu.FuType
31import freechips.rocketchip.rocket.Instructions._
32import xiangshan.backend.Bundles.{DecodedInst, StaticInst}
33import xiangshan.backend.fu.vector.Bundles.{VType, VLmul, VSew}
34import yunsuan.VpermType
35import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder}
36
37class strdiedLSNumOfUopTable() extends Module {
38  val src = IO(Input(UInt(5.W)))
39  val out = IO(Output(UInt(4.W)))
40  // strided load/store
41  var combVemulNf : Seq[(Int, Int, Int)] = Seq()
42  for (emul <- 0 until 4) {
43    for (nf <- 0 until 8) {
44      if ((1 << emul) * (nf + 1) <= 8) {
45        combVemulNf :+= (emul, nf, (1 << emul) * (nf + 1))
46      } else {
47        combVemulNf :+= (emul, nf, 0)
48      }
49    }
50  }
51  out := decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
52    case (emul, nf, uopNum) => (BitPat((emul << 3 | nf).U(5.W)), BitPat(uopNum.U(4.W)))
53  }, BitPat.N(4)))
54}
55
56class indexedLSNumOfUopTable() extends Module {
57  val src = IO(Input(UInt(7.W)))
58  val out = IO(Output(UInt(7.W)))
59  // strided load/store
60  var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq()
61  for (emul <- 0 until 4) {
62    for (lmul <- 0 until 4) {
63      var emul_val = 1 << emul
64      var lmul_val = 1 << lmul
65      var mul_max = if (emul_val > lmul_val) emul_val else lmul_val
66      // nf = 0, number of uop = Max(lmul, emul)
67      if ((1 << lmul) <= 8) {    // indexed load/store must ensure that the lmul * nf is less or equal to 8
68        combVemulNf :+= (emul, lmul, 0, mul_max)
69      } else {
70        combVemulNf :+= (emul, lmul, 0, 0)
71      }
72      // nf > 0, number of uop = Max(lmul * nf, emul)
73      for (nf <- 1 until 8) {
74        var uop_num = if (lmul_val * (nf + 1) > emul_val) lmul_val * (nf + 1) else emul_val
75        if (lmul_val * (nf + 1) <= 8) {    // indexed load/store must ensure that the lmul * nf is less or equal to 8
76          combVemulNf :+= (emul, lmul, nf, uop_num)
77        } else {
78          combVemulNf :+= (emul, lmul, nf, 0)
79        }
80      }
81    }
82  }
83  out := decoder(QMCMinimizer, src, TruthTable(combVemulNf.map {
84    case (emul, lmul, nf, uopNum) => (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat(uopNum.U(4.W)))
85  }, BitPat.N(4)))
86}
87
88class UopInfoGen (implicit p: Parameters) extends XSModule {
89  val io = IO(new UopInfoGenIO)
90
91  val stridedLSTable = Module(new strdiedLSNumOfUopTable)     // decoder for strided load/store
92  val indexedLSTable = Module(new indexedLSNumOfUopTable)     // decoder for indexed load/store
93
94  val typeOfSplit = io.in.preInfo.typeOfSplit
95  val vsew = Cat(0.U(1.W), io.in.preInfo.vsew)
96  val veew = Cat(0.U(1.W), io.in.preInfo.vwidth(1, 0))
97  val vmvn = io.in.preInfo.vmvn
98  val isVlsr = io.in.preInfo.isVlsr
99  val isVlsm = io.in.preInfo.isVlsm
100  val vlmul = io.in.preInfo.vlmul
101  val nf = io.in.preInfo.nf
102  val isComplex = io.out.isComplex
103
104  val lmul = MuxLookup(vlmul, 1.U(4.W))(Seq(
105    "b001".U -> 2.U,
106    "b010".U -> 4.U,
107    "b011".U -> 8.U
108  ))
109  val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Seq(
110    "b001".U -> 1.U,
111    "b010".U -> 2.U,
112    "b011".U -> 3.U
113  ))
114
115  val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt
116
117  val emul = MuxLookup(vemul, 1.U(4.W))(Seq(
118    "b001".U -> 2.U,
119    "b010".U -> 4.U,
120    "b011".U -> 8.U
121  ))                                                              //TODO : eew and emul illegal exception need to be handled
122  val simple_emul = MuxLookup(vemul, 0.U(2.W))(Seq(
123    "b001".U -> 1.U,
124    "b010".U -> 2.U,
125    "b011".U -> 3.U
126  ))
127
128  val numOfUopVslide = MuxLookup(vlmul, 1.U(log2Up(MaxUopSize + 1).W))(Seq(
129    "b001".U -> 3.U,
130    "b010".U -> 10.U,
131    "b011".U -> 36.U
132  ))
133  val numOfUopVrgather = MuxLookup(vlmul, 1.U(log2Up(MaxUopSize + 1).W))(Seq(
134    "b001".U -> 4.U,
135    "b010".U -> 16.U,
136    "b011".U -> 64.U
137  ))
138  val numOfUopVrgatherei16 = Mux((!vsew.orR) && (vlmul =/= "b011".U),
139    Cat(numOfUopVrgather, 0.U(1.W)),
140    numOfUopVrgather
141  )
142  val numOfUopVcompress = MuxLookup(vlmul, 1.U(4.W))(Seq(
143    "b001".U -> 4.U,
144    "b010".U -> 13.U,
145    "b011".U -> 43.U
146  ))
147  val numOfUopVFRED = {
148    // addTime include add frs1
149     val addTime = MuxLookup(vlmul, 1.U(4.W))(Seq(
150       VLmul.m2 -> 2.U,
151       VLmul.m4 -> 4.U,
152       VLmul.m8 -> 8.U,
153     ))
154    val foldLastVlmul = MuxLookup(vsew, "b000".U)(Seq(
155      VSew.e16 -> VLmul.mf8,
156      VSew.e32 -> VLmul.mf4,
157      VSew.e64 -> VLmul.mf2,
158    ))
159    // lmul < 1, foldTime = vlmul - foldFastVlmul
160    // lmul >= 1, foldTime = 0.U - foldFastVlmul
161    val foldTime = Mux(vlmul(2), vlmul, 0.U) - foldLastVlmul
162    Mux((addTime + foldTime).orR, addTime + foldTime, 1.U)
163  }
164  val numOfUopVFREDOSUM = {
165    val uvlMax = MuxLookup(vsew, 1.U)(Seq(
166      VSew.e16 -> 8.U,
167      VSew.e32 -> 4.U,
168      VSew.e64 -> 2.U,
169    ))
170    val vlMax = Wire(UInt(7.W))
171    vlMax := Mux(vlmul(2), uvlMax >> (-vlmul)(1,0), uvlMax << vlmul(1,0)).asUInt
172    Mux(vlMax.orR, vlMax, 1.U)
173  }
174  /*
175   * when 1 <= lmul <= 4, numOfUopWV = 2 * lmul, otherwise numOfUopWV = 1
176   */
177  val numOfUopWV = MuxLookup(vlmul, 1.U(4.W))(Seq(
178    "b000".U -> 2.U,
179    "b001".U -> 4.U,
180    "b010".U -> 8.U,
181  ))
182  /*
183   * need an extra move instruction
184   * when 1 <= lmul <= 4, numOfUopWX = 2 * lmul + 1, otherwise numOfUopWX = 2
185   */
186  val numOfUopWX = MuxLookup(vlmul, 2.U(4.W))(Seq(
187    "b000".U -> 3.U,
188    "b001".U -> 5.U,
189    "b010".U -> 9.U,
190  ))
191
192  stridedLSTable.src := Cat(simple_emul, nf)
193  val numOfUopVLoadStoreStrided = stridedLSTable.out
194  indexedLSTable.src := Cat(simple_emul, simple_lmul, nf)
195  val numOfUopVLoadStoreIndexed = indexedLSTable.out
196
197  //number of uop
198  val numOfUop = MuxLookup(typeOfSplit, 1.U(log2Up(MaxUopSize + 1).W))(Seq(
199    UopSplitType.VSET -> 2.U,
200    UopSplitType.VEC_0XV -> 2.U,
201    UopSplitType.VEC_VVV -> lmul,
202    UopSplitType.VEC_VFV -> (lmul +& 1.U),
203    UopSplitType.VEC_EXT2 -> lmul,
204    UopSplitType.VEC_EXT4 -> lmul,
205    UopSplitType.VEC_EXT8 -> lmul,
206    UopSplitType.VEC_VVM -> lmul,
207    UopSplitType.VEC_VFM -> (lmul +& 1.U),
208    UopSplitType.VEC_VFRED -> numOfUopVFRED,
209    UopSplitType.VEC_VFREDOSUM -> numOfUopVFREDOSUM,
210    UopSplitType.VEC_VXM -> (lmul +& 1.U),
211    UopSplitType.VEC_VXV -> (lmul +& 1.U),
212    UopSplitType.VEC_VFW -> numOfUopWX, // lmul <= 4
213    UopSplitType.VEC_WFW -> numOfUopWX, // lmul <= 4
214    UopSplitType.VEC_VVW -> numOfUopWV, // lmul <= 4
215    UopSplitType.VEC_WVW -> numOfUopWV, // lmul <= 4
216    UopSplitType.VEC_VXW -> numOfUopWX, // lmul <= 4
217    UopSplitType.VEC_WXW -> numOfUopWX, // lmul <= 4
218    UopSplitType.VEC_WVV -> numOfUopWV, // lmul <= 4
219    UopSplitType.VEC_WXV -> numOfUopWX, // lmul <= 4
220    UopSplitType.VEC_SLIDE1UP -> (lmul +& 1.U),
221    UopSplitType.VEC_FSLIDE1UP -> (lmul +& 1.U),
222    UopSplitType.VEC_SLIDE1DOWN -> Cat(lmul, 0.U(1.W)),
223    UopSplitType.VEC_FSLIDE1DOWN -> Cat(lmul, 0.U(1.W)),
224    UopSplitType.VEC_VRED -> lmul,
225    UopSplitType.VEC_SLIDEUP -> (numOfUopVslide + 1.U),
226    UopSplitType.VEC_SLIDEDOWN -> (numOfUopVslide + 1.U),
227    UopSplitType.VEC_M0X -> lmul,
228    UopSplitType.VEC_MVV -> (Cat(lmul, 0.U(1.W)) - 1.U),
229    UopSplitType.VEC_VWW -> Cat(lmul, 0.U(1.W)), // lmul <= 4
230    UopSplitType.VEC_RGATHER -> numOfUopVrgather,
231    UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U),
232    UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16,
233    UopSplitType.VEC_COMPRESS -> numOfUopVcompress,
234    UopSplitType.VEC_MVNR -> (vmvn +& 1.U),
235    UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, Mux(isVlsm, 2.U, (numOfUopVLoadStoreStrided +& 1.U))),   // with one move instruction
236    UopSplitType.VEC_US_FF_LD -> (numOfUopVLoadStoreStrided +& 2.U),
237    UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U),    // with two move instructions
238    UopSplitType.VEC_I_LDST -> (numOfUopVLoadStoreIndexed +& 1.U),
239    UopSplitType.AMO_CAS_W -> 2.U,
240    UopSplitType.AMO_CAS_D -> 2.U,
241    UopSplitType.AMO_CAS_Q -> 4.U,
242  ))
243
244  // number of writeback num
245  val numOfWB = Mux(UopSplitType.isAMOCAS(typeOfSplit), numOfUop >> 1, numOfUop)
246
247  // vector instruction's uop UopSplitType are not SCA_SIM, and when the number of uop is 1, we can regard it as a simple instruction
248  isComplex := io.in.preInfo.isVecArith || io.in.preInfo.isVecMem || io.in.preInfo.isAmoCAS
249  io.out.uopInfo.numOfUop := numOfUop
250  io.out.uopInfo.numOfWB := numOfWB
251  io.out.uopInfo.lmul := lmul
252
253}
254
255class UopInfoGenIO(implicit p: Parameters) extends XSBundle {
256  val in = new Bundle {
257    val preInfo = Input(new PreInfo)
258  }
259  val out = new Bundle {
260    val isComplex = Output(Bool())
261    val uopInfo = Output(new UopInfo)
262  }
263}
264
265class PreInfo(implicit p: Parameters) extends XSBundle {
266  val isVecArith = Bool() // is vector arith or config instr
267  val isVecMem = Bool()
268  val isAmoCAS = Bool()
269  val typeOfSplit = UopSplitType()
270  val vsew = VSew()          //2 bit
271  val vlmul = VLmul()
272  val vwidth = UInt(3.W)     //eew
273  val nf = UInt(3.W)
274  val vmvn = UInt(3.W)       // vmvnr
275  val isVlsr = Bool()        // is vector whole register load/store
276  val isVlsm = Bool()        // is vector mask load/store
277}
278
279class UopInfo(implicit p: Parameters) extends XSBundle {
280  val numOfUop = UInt(log2Up(MaxUopSize + 1).W)
281  val numOfWB = UInt(log2Up(MaxUopSize + 1).W)
282  val lmul = UInt(4.W)
283}