1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.fu.vector.Bundles.{VType, VLmul, VSew} 33import yunsuan.VpermType 34import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 35 36class strdiedLSNumOfUopTable() extends Module { 37 val src = IO(Input(UInt(5.W))) 38 val out = IO(Output(UInt(4.W))) 39 // strided load/store 40 var combVemulNf : Seq[(Int, Int, Int)] = Seq() 41 for (emul <- 0 until 4) { 42 for (nf <- 0 until 8) { 43 if ((1 << emul) * (nf + 1) <= 8) { 44 combVemulNf :+= (emul, nf, (1 << emul) * (nf + 1)) 45 } else { 46 combVemulNf :+= (emul, nf, 0) 47 } 48 } 49 } 50 out := decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 51 case (emul, nf, uopNum) => (BitPat((emul << 3 | nf).U(5.W)), BitPat(uopNum.U(4.W))) 52 }, BitPat.N(4))) 53} 54 55class indexedLSNumOfUopTable() extends Module { 56 val src = IO(Input(UInt(7.W))) 57 val out = IO(Output(UInt(7.W))) 58 // strided load/store 59 var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq() 60 for (emul <- 0 until 4) { 61 for (lmul <- 0 until 4) { 62 var emul_val = 1 << emul 63 var lmul_val = 1 << lmul 64 var mul_max = if (emul_val > lmul_val) emul_val else lmul_val 65 // nf = 0, number of uop = Max(lmul, emul) 66 if ((1 << lmul) <= 8) { // indexed load/store must ensure that the lmul * nf is less or equal to 8 67 combVemulNf :+= (emul, lmul, 0, mul_max) 68 } else { 69 combVemulNf :+= (emul, lmul, 0, 0) 70 } 71 // nf > 0, number of uop = Max(lmul * nf, emul) 72 for (nf <- 1 until 8) { 73 var uop_num = if (lmul_val * (nf + 1) > emul_val) lmul_val * (nf + 1) else emul_val 74 if (lmul_val * (nf + 1) <= 8) { // indexed load/store must ensure that the lmul * nf is less or equal to 8 75 combVemulNf :+= (emul, lmul, nf, uop_num) 76 } else { 77 combVemulNf :+= (emul, lmul, nf, 0) 78 } 79 } 80 } 81 } 82 out := decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 83 case (emul, lmul, nf, uopNum) => (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat(uopNum.U(4.W))) 84 }, BitPat.N(4))) 85} 86 87class UopInfoGen (implicit p: Parameters) extends XSModule { 88 val io = IO(new UopInfoGenIO) 89 90 val stridedLSTable = Module(new strdiedLSNumOfUopTable) // decoder for strided load/store 91 val indexedLSTable = Module(new indexedLSNumOfUopTable) // decoder for indexed load/store 92 93 val typeOfSplit = io.in.preInfo.typeOfSplit 94 val vsew = Cat(0.U(1.W), io.in.preInfo.vsew) 95 val veew = Cat(0.U(1.W), io.in.preInfo.vwidth(1, 0)) 96 val vmvn = io.in.preInfo.vmvn 97 val isVlsr = io.in.preInfo.isVlsr 98 val isVlsm = io.in.preInfo.isVlsm 99 val vlmul = io.in.preInfo.vlmul 100 val nf = io.in.preInfo.nf 101 val isComplex = io.out.isComplex 102 103 val lmul = MuxLookup(vlmul, 1.U(4.W))(Array( 104 "b001".U -> 2.U, 105 "b010".U -> 4.U, 106 "b011".U -> 8.U 107 )) 108 val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array( 109 "b001".U -> 1.U, 110 "b010".U -> 2.U, 111 "b011".U -> 3.U 112 )) 113 114 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 115 116 val emul = MuxLookup(vemul, 1.U(4.W))(Array( 117 "b001".U -> 2.U, 118 "b010".U -> 4.U, 119 "b011".U -> 8.U 120 )) //TODO : eew and emul illegal exception need to be handled 121 val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array( 122 "b001".U -> 1.U, 123 "b010".U -> 2.U, 124 "b011".U -> 3.U 125 )) 126 127 val numOfUopVslide = MuxLookup(vlmul, 1.U(log2Up(MaxUopSize + 1).W))(Array( 128 "b001".U -> 3.U, 129 "b010".U -> 10.U, 130 "b011".U -> 36.U 131 )) 132 val numOfUopVrgather = MuxLookup(vlmul, 1.U(log2Up(MaxUopSize + 1).W))(Array( 133 "b001".U -> 4.U, 134 "b010".U -> 16.U, 135 "b011".U -> 64.U 136 )) 137 val numOfUopVrgatherei16 = Mux((!vsew.orR) && (vlmul =/= "b011".U), 138 Cat(numOfUopVrgather, 0.U(1.W)), 139 numOfUopVrgather 140 ) 141 val numOfUopVcompress = MuxLookup(vlmul, 1.U(4.W))(Array( 142 "b001".U -> 4.U, 143 "b010".U -> 13.U, 144 "b011".U -> 43.U 145 )) 146 val numOfUopVFRED = { 147 // addTime include add frs1 148 val addTime = MuxLookup(vlmul, 1.U(4.W))(Array( 149 VLmul.m2 -> 2.U, 150 VLmul.m4 -> 4.U, 151 VLmul.m8 -> 8.U, 152 )) 153 val foldLastVlmul = MuxLookup(vsew, "b000".U)(Array( 154 VSew.e16 -> VLmul.mf8, 155 VSew.e32 -> VLmul.mf4, 156 VSew.e64 -> VLmul.mf2, 157 )) 158 // lmul < 1, foldTime = vlmul - foldFastVlmul 159 // lmul >= 1, foldTime = 0.U - foldFastVlmul 160 val foldTime = Mux(vlmul(2), vlmul, 0.U) - foldLastVlmul 161 addTime + foldTime 162 } 163 val numOfUopVFREDOSUM = { 164 val uvlMax = MuxLookup(vsew, 0.U)(Array( 165 VSew.e16 -> 8.U, 166 VSew.e32 -> 4.U, 167 VSew.e64 -> 2.U, 168 )) 169 val vlMax = Wire(UInt(7.W)) 170 vlMax := Mux(vlmul(2), uvlMax >> (-vlmul)(1,0), uvlMax << vlmul(1,0)).asUInt 171 vlMax 172 } 173 /* 174 * when 1 <= lmul <= 4, numOfUopWV = 2 * lmul, otherwise numOfUopWV = 1 175 */ 176 val numOfUopWV = MuxLookup(vlmul, 1.U(4.W))(Array( 177 "b000".U -> 2.U, 178 "b001".U -> 4.U, 179 "b010".U -> 8.U, 180 )) 181 /* 182 * need an extra move instruction 183 * when 1 <= lmul <= 4, numOfUopWX = 2 * lmul + 1, otherwise numOfUopWX = 2 184 */ 185 val numOfUopWX = MuxLookup(vlmul, 2.U(4.W))(Array( 186 "b000".U -> 3.U, 187 "b001".U -> 5.U, 188 "b010".U -> 9.U, 189 )) 190 191 stridedLSTable.src := Cat(simple_emul, nf) 192 val numOfUopVLoadStoreStrided = stridedLSTable.out 193 indexedLSTable.src := Cat(simple_emul, simple_lmul, nf) 194 val numOfUopVLoadStoreIndexed = indexedLSTable.out 195 196 //number of uop 197 val numOfUop = MuxLookup(typeOfSplit, 1.U(log2Up(MaxUopSize + 1).W))(Array( 198 UopSplitType.VSET -> 2.U, 199 UopSplitType.VEC_0XV -> 2.U, 200 UopSplitType.VEC_VVV -> lmul, 201 UopSplitType.VEC_VFV -> (lmul +& 1.U), 202 UopSplitType.VEC_EXT2 -> lmul, 203 UopSplitType.VEC_EXT4 -> lmul, 204 UopSplitType.VEC_EXT8 -> lmul, 205 UopSplitType.VEC_VVM -> lmul, 206 UopSplitType.VEC_VFM -> (lmul +& 1.U), 207 UopSplitType.VEC_VFRED -> numOfUopVFRED, 208 UopSplitType.VEC_VFREDOSUM -> numOfUopVFREDOSUM, 209 UopSplitType.VEC_VXM -> (lmul +& 1.U), 210 UopSplitType.VEC_VXV -> (lmul +& 1.U), 211 UopSplitType.VEC_VFW -> numOfUopWX, // lmul <= 4 212 UopSplitType.VEC_WFW -> numOfUopWX, // lmul <= 4 213 UopSplitType.VEC_VVW -> numOfUopWV, // lmul <= 4 214 UopSplitType.VEC_WVW -> numOfUopWV, // lmul <= 4 215 UopSplitType.VEC_VXW -> numOfUopWX, // lmul <= 4 216 UopSplitType.VEC_WXW -> numOfUopWX, // lmul <= 4 217 UopSplitType.VEC_WVV -> numOfUopWV, // lmul <= 4 218 UopSplitType.VEC_WXV -> numOfUopWX, // lmul <= 4 219 UopSplitType.VEC_SLIDE1UP -> (lmul +& 1.U), 220 UopSplitType.VEC_FSLIDE1UP -> (lmul +& 1.U), 221 UopSplitType.VEC_SLIDE1DOWN -> Cat(lmul, 0.U(1.W)), 222 UopSplitType.VEC_FSLIDE1DOWN -> Cat(lmul, 0.U(1.W)), 223 UopSplitType.VEC_VRED -> lmul, 224 UopSplitType.VEC_SLIDEUP -> (numOfUopVslide + 1.U), 225 UopSplitType.VEC_SLIDEDOWN -> (numOfUopVslide + 1.U), 226 UopSplitType.VEC_M0X -> lmul, 227 UopSplitType.VEC_MVV -> (Cat(lmul, 0.U(1.W)) - 1.U), 228 UopSplitType.VEC_VWW -> Cat(lmul, 0.U(1.W)), // lmul <= 4 229 UopSplitType.VEC_RGATHER -> numOfUopVrgather, 230 UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U), 231 UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16, 232 UopSplitType.VEC_COMPRESS -> numOfUopVcompress, 233 UopSplitType.VEC_MVNR -> (vmvn +& 1.U), 234 UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, Mux(isVlsm, 2.U, (numOfUopVLoadStoreStrided +& 1.U))), // with one move instruction 235 UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U), // with two move instructions 236 UopSplitType.VEC_I_LDST -> (numOfUopVLoadStoreIndexed +& 1.U), 237 )) 238 239 // number of writeback num 240 val numOfWB = numOfUop 241 242 // vector instruction's uop UopSplitType are not SCA_SIM, and when the number of uop is 1, we can regard it as a simple instruction 243 isComplex := typeOfSplit =/= UopSplitType.SCA_SIM && numOfUop =/= 1.U 244 io.out.uopInfo.numOfUop := numOfUop 245 io.out.uopInfo.numOfWB := numOfWB 246 io.out.uopInfo.lmul := lmul 247 248} 249 250class UopInfoGenIO(implicit p: Parameters) extends XSBundle { 251 val in = new Bundle { 252 val preInfo = Input(new PreInfo) 253 } 254 val out = new Bundle { 255 val isComplex = Output(Bool()) 256 val uopInfo = Output(new UopInfo) 257 } 258} 259 260class PreInfo(implicit p: Parameters) extends XSBundle { 261 val typeOfSplit = UopSplitType() 262 val vsew = VSew() //2 bit 263 val vlmul = VLmul() 264 val vwidth = UInt(3.W) //eew 265 val nf = UInt(3.W) 266 val vmvn = UInt(3.W) // vmvnr 267 val isVlsr = Bool() // is vector whole register load/store 268 val isVlsm = Bool() // is vector mask load/store 269} 270 271class UopInfo(implicit p: Parameters) extends XSBundle { 272 val numOfUop = UInt(log2Up(MaxUopSize + 1).W) 273 val numOfWB = UInt(log2Up(MaxUopSize + 1).W) 274 val lmul = UInt(4.W) 275}