1/*************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 ***************************************************************************************/ 16 17package xiangshan.backend.decode 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.rocket.Instructions 23import freechips.rocketchip.util.uintToBitPat 24import utils._ 25import utility._ 26import xiangshan.ExceptionNO.illegalInstr 27import xiangshan._ 28import xiangshan.backend.fu.fpu.FPU 29import xiangshan.backend.fu.FuType 30import freechips.rocketchip.rocket.Instructions._ 31import xiangshan.backend.Bundles.{DecodedInst, StaticInst} 32import xiangshan.backend.fu.vector.Bundles.{VType, VLmul, VSew} 33import yunsuan.VpermType 34import chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 35 36class strdiedLSNumOfUopTable() extends Module { 37 val src = IO(Input(UInt(5.W))) 38 val out = IO(Output(UInt(4.W))) 39 // strided load/store 40 var combVemulNf : Seq[(Int, Int, Int)] = Seq() 41 for (emul <- 0 until 4) { 42 for (nf <- 0 until 8) { 43 if ((1 << emul) * (nf + 1) <= 8) { 44 combVemulNf :+= (emul, nf, (1 << emul) * (nf + 1)) 45 } else { 46 combVemulNf :+= (emul, nf, 0) 47 } 48 } 49 } 50 out := decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 51 case (emul, nf, uopNum) => (BitPat((emul << 3 | nf).U(5.W)), BitPat(uopNum.U(4.W))) 52 }, BitPat.N(4))) 53} 54 55class indexedLSNumOfUopTable() extends Module { 56 val src = IO(Input(UInt(7.W))) 57 val out = IO(Output(UInt(7.W))) 58 // strided load/store 59 var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq() 60 for (emul <- 0 until 4) { 61 for (lmul <- 0 until 4) { 62 var max_mul = if (lmul > emul) lmul else emul 63 for (nf <- 0 until 8) { 64 if ((1 << lmul) * (nf + 1) <= 8) { // indexed load/store must ensure that the lmul * nf is less or equal to 8 65 combVemulNf :+= (emul, lmul, nf, (1 << max_mul) * (nf + 1)) 66 } else { 67 combVemulNf :+= (emul, lmul, nf, 0) 68 } 69 } 70 } 71 } 72 out := decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 73 case (emul, lmul, nf, uopNum) => (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat(uopNum.U(7.W))) 74 }, BitPat.N(7))) 75} 76 77class UopInfoGen (implicit p: Parameters) extends XSModule { 78 val io = IO(new UopInfoGenIO) 79 80 val stridedLSTable = Module(new strdiedLSNumOfUopTable) // decoder for strided load/store 81 val indexedLSTable = Module(new indexedLSNumOfUopTable) // decoder for indexed load/store 82 val indexedLSWBTable = Module(new indexedLSNumOfUopTable) // decoder for indexed load/store WB 83 84 val typeOfSplit = io.in.preInfo.typeOfSplit 85 val vsew = Cat(0.U(1.W), io.in.preInfo.vsew) 86 val veew = Cat(0.U(1.W), io.in.preInfo.vwidth(1, 0)) 87 val vmvn = io.in.preInfo.vmvn 88 val isVlsr = io.in.preInfo.isVlsr 89 val isVlsm = io.in.preInfo.isVlsm 90 val vlmul = io.in.preInfo.vlmul 91 val nf = io.in.preInfo.nf 92 val isComplex = io.out.isComplex 93 94 val lmul = MuxLookup(vlmul, 1.U(4.W))(Array( 95 "b001".U -> 2.U, 96 "b010".U -> 4.U, 97 "b011".U -> 8.U 98 )) 99 val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Array( 100 "b001".U -> 1.U, 101 "b010".U -> 2.U, 102 "b011".U -> 3.U 103 )) 104 105 val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 106 107 val emul = MuxLookup(vemul, 1.U(4.W))(Array( 108 "b001".U -> 2.U, 109 "b010".U -> 4.U, 110 "b011".U -> 8.U 111 )) //TODO : eew and emul illegal exception need to be handled 112 val simple_emul = MuxLookup(vemul, 0.U(2.W))(Array( 113 "b001".U -> 1.U, 114 "b010".U -> 2.U, 115 "b011".U -> 3.U 116 )) 117 118 val numOfUopVslide = MuxLookup(vlmul, 1.U(log2Up(MaxUopSize + 1).W))(Array( 119 "b001".U -> 3.U, 120 "b010".U -> 10.U, 121 "b011".U -> 36.U 122 )) 123 val numOfUopVrgather = MuxLookup(vlmul, 1.U(log2Up(MaxUopSize + 1).W))(Array( 124 "b001".U -> 4.U, 125 "b010".U -> 16.U, 126 "b011".U -> 64.U 127 )) 128 val numOfUopVrgatherei16 = Mux((!vsew.orR) && (vlmul =/= "b011".U), 129 Cat(numOfUopVrgather, 0.U(1.W)), 130 numOfUopVrgather 131 ) 132 val numOfUopVcompress = MuxLookup(vlmul, 1.U(4.W))(Array( 133 "b001".U -> 4.U, 134 "b010".U -> 13.U, 135 "b011".U -> 43.U 136 )) 137 val numOfUopVFRED = { 138 // addTime include add frs1 139 val addTime = MuxLookup(vlmul, 1.U(4.W))(Array( 140 VLmul.m2 -> 2.U, 141 VLmul.m4 -> 4.U, 142 VLmul.m8 -> 8.U, 143 )) 144 val foldLastVlmul = MuxLookup(vsew, "b000".U)(Array( 145 VSew.e16 -> VLmul.mf8, 146 VSew.e32 -> VLmul.mf4, 147 VSew.e64 -> VLmul.mf2, 148 )) 149 // lmul < 1, foldTime = vlmul - foldFastVlmul 150 // lmul >= 1, foldTime = 0.U - foldFastVlmul 151 val foldTime = Mux(vlmul(2), vlmul, 0.U) - foldLastVlmul 152 addTime + foldTime 153 } 154 val numOfUopVFREDOSUM = { 155 val uvlMax = MuxLookup(vsew, 0.U)(Array( 156 VSew.e16 -> 8.U, 157 VSew.e32 -> 4.U, 158 VSew.e64 -> 2.U, 159 )) 160 val vlMax = Wire(UInt(7.W)) 161 vlMax := Mux(vlmul(2), uvlMax >> (-vlmul)(1,0), uvlMax << vlmul(1,0)).asUInt 162 vlMax 163 } 164 /* 165 * when 1 <= lmul <= 4, numOfUopWV = 2 * lmul, otherwise numOfUopWV = 1 166 */ 167 val numOfUopWV = MuxLookup(vlmul, 1.U(4.W))(Array( 168 "b000".U -> 2.U, 169 "b001".U -> 4.U, 170 "b010".U -> 8.U, 171 )) 172 /* 173 * need an extra move instruction 174 * when 1 <= lmul <= 4, numOfUopWX = 2 * lmul + 1, otherwise numOfUopWX = 2 175 */ 176 val numOfUopWX = MuxLookup(vlmul, 2.U(4.W))(Array( 177 "b000".U -> 3.U, 178 "b001".U -> 5.U, 179 "b010".U -> 9.U, 180 )) 181 182 stridedLSTable.src := Cat(simple_emul, nf) 183 val numOfUopVLoadStoreStrided = stridedLSTable.out 184 indexedLSTable.src := Cat(simple_emul, simple_lmul, nf) 185 val numOfUopVLoadStoreIndexed = indexedLSTable.out 186 indexedLSWBTable.src := Cat(simple_lmul, nf) 187 val numOfWBVLoadStoreIndexed = indexedLSWBTable.out 188 189 //number of uop 190 val numOfUop = MuxLookup(typeOfSplit, 1.U(log2Up(MaxUopSize + 1).W))(Array( 191 UopSplitType.VSET -> 2.U, 192 UopSplitType.VEC_0XV -> 2.U, 193 UopSplitType.VEC_VVV -> lmul, 194 UopSplitType.VEC_VFV -> (lmul +& 1.U), 195 UopSplitType.VEC_EXT2 -> lmul, 196 UopSplitType.VEC_EXT4 -> lmul, 197 UopSplitType.VEC_EXT8 -> lmul, 198 UopSplitType.VEC_VVM -> lmul, 199 UopSplitType.VEC_VFM -> (lmul +& 1.U), 200 UopSplitType.VEC_VFRED -> numOfUopVFRED, 201 UopSplitType.VEC_VFREDOSUM -> numOfUopVFREDOSUM, 202 UopSplitType.VEC_VXM -> (lmul +& 1.U), 203 UopSplitType.VEC_VXV -> (lmul +& 1.U), 204 UopSplitType.VEC_VFW -> numOfUopWX, // lmul <= 4 205 UopSplitType.VEC_WFW -> numOfUopWX, // lmul <= 4 206 UopSplitType.VEC_VVW -> numOfUopWV, // lmul <= 4 207 UopSplitType.VEC_WVW -> numOfUopWV, // lmul <= 4 208 UopSplitType.VEC_VXW -> numOfUopWX, // lmul <= 4 209 UopSplitType.VEC_WXW -> numOfUopWX, // lmul <= 4 210 UopSplitType.VEC_WVV -> numOfUopWV, // lmul <= 4 211 UopSplitType.VEC_WXV -> numOfUopWX, // lmul <= 4 212 UopSplitType.VEC_SLIDE1UP -> (lmul +& 1.U), 213 UopSplitType.VEC_FSLIDE1UP -> (lmul +& 1.U), 214 UopSplitType.VEC_SLIDE1DOWN -> Cat(lmul, 0.U(1.W)), 215 UopSplitType.VEC_FSLIDE1DOWN -> Cat(lmul, 0.U(1.W)), 216 UopSplitType.VEC_VRED -> lmul, 217 UopSplitType.VEC_SLIDEUP -> (numOfUopVslide + 1.U), 218 UopSplitType.VEC_SLIDEDOWN -> (numOfUopVslide + 1.U), 219 UopSplitType.VEC_M0X -> lmul, 220 UopSplitType.VEC_MVV -> (Cat(lmul, 0.U(1.W)) - 1.U), 221 UopSplitType.VEC_M0X_VFIRST -> 1.U, 222 UopSplitType.VEC_VWW -> Cat(lmul, 0.U(1.W)), // lmul <= 4 223 UopSplitType.VEC_RGATHER -> numOfUopVrgather, 224 UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U), 225 UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16, 226 UopSplitType.VEC_COMPRESS -> numOfUopVcompress, 227 UopSplitType.VEC_MVNR -> (vmvn +& 1.U), 228 UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, Mux(isVlsm, 2.U, (numOfUopVLoadStoreStrided +& 1.U))), // with one move instruction 229 UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U), // with two move instructions 230 UopSplitType.VEC_I_LDST -> (numOfUopVLoadStoreIndexed +& 1.U), 231 )) 232 233 // number of writeback num 234 val numOfWB = Mux(typeOfSplit === UopSplitType.VEC_I_LDST, (numOfWBVLoadStoreIndexed +& 1.U), numOfUop) 235 236 // vector instruction's uop UopSplitType are not SCA_SIM, and when the number of uop is 1, we can regard it as a simple instruction 237 isComplex := typeOfSplit =/= UopSplitType.SCA_SIM && numOfUop =/= 1.U 238 io.out.uopInfo.numOfUop := numOfUop 239 io.out.uopInfo.numOfWB := numOfWB 240 io.out.uopInfo.lmul := lmul 241 242} 243 244class UopInfoGenIO(implicit p: Parameters) extends XSBundle { 245 val in = new Bundle { 246 val preInfo = Input(new PreInfo) 247 } 248 val out = new Bundle { 249 val isComplex = Output(Bool()) 250 val uopInfo = Output(new UopInfo) 251 } 252} 253 254class PreInfo(implicit p: Parameters) extends XSBundle { 255 val typeOfSplit = UopSplitType() 256 val vsew = VSew() //2 bit 257 val vlmul = VLmul() 258 val vwidth = UInt(3.W) //eew 259 val nf = UInt(3.W) 260 val vmvn = UInt(3.W) // vmvnr 261 val isVlsr = Bool() // is vector whole register load/store 262 val isVlsm = Bool() // is vector mask load/store 263} 264 265class UopInfo(implicit p: Parameters) extends XSBundle { 266 val numOfUop = UInt(log2Up(MaxUopSize + 1).W) 267 val numOfWB = UInt(log2Up(MaxUopSize + 1).W) 268 val lmul = UInt(4.W) 269}