17f9f0a79SzhanglyGit/*************************************************************************************** 2e3da8badSTang Haojin * Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3e3da8badSTang Haojin * Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 47f9f0a79SzhanglyGit * Copyright (c) 2020-2021 Peng Cheng Laboratory 57f9f0a79SzhanglyGit * 67f9f0a79SzhanglyGit * XiangShan is licensed under Mulan PSL v2. 77f9f0a79SzhanglyGit * You can use this software according to the terms and conditions of the Mulan PSL v2. 87f9f0a79SzhanglyGit * You may obtain a copy of Mulan PSL v2 at: 97f9f0a79SzhanglyGit * http://license.coscl.org.cn/MulanPSL2 107f9f0a79SzhanglyGit * 117f9f0a79SzhanglyGit * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 127f9f0a79SzhanglyGit * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 137f9f0a79SzhanglyGit * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 147f9f0a79SzhanglyGit * 157f9f0a79SzhanglyGit * See the Mulan PSL v2 for more details. 167f9f0a79SzhanglyGit ***************************************************************************************/ 177f9f0a79SzhanglyGit 187f9f0a79SzhanglyGitpackage xiangshan.backend.decode 197f9f0a79SzhanglyGit 2083ba63b3SXuan Huimport org.chipsalliance.cde.config.Parameters 217f9f0a79SzhanglyGitimport chisel3._ 227f9f0a79SzhanglyGitimport chisel3.util._ 237f9f0a79SzhanglyGitimport freechips.rocketchip.rocket.Instructions 247f9f0a79SzhanglyGitimport freechips.rocketchip.util.uintToBitPat 257f9f0a79SzhanglyGitimport utils._ 267f9f0a79SzhanglyGitimport utility._ 277f9f0a79SzhanglyGitimport xiangshan.ExceptionNO.illegalInstr 287f9f0a79SzhanglyGitimport xiangshan._ 297f9f0a79SzhanglyGitimport xiangshan.backend.fu.fpu.FPU 307f9f0a79SzhanglyGitimport xiangshan.backend.fu.FuType 317f9f0a79SzhanglyGitimport freechips.rocketchip.rocket.Instructions._ 327f9f0a79SzhanglyGitimport xiangshan.backend.Bundles.{DecodedInst, StaticInst} 337f9f0a79SzhanglyGitimport xiangshan.backend.fu.vector.Bundles.{VType, VLmul, VSew} 347f9f0a79SzhanglyGitimport yunsuan.VpermType 35c4501a6fSZiyue-Zhangimport chisel3.util.experimental.decode.{QMCMinimizer, TruthTable, decoder} 36c4501a6fSZiyue-Zhang 37c4501a6fSZiyue-Zhangclass strdiedLSNumOfUopTable() extends Module { 38c4501a6fSZiyue-Zhang val src = IO(Input(UInt(5.W))) 39c4501a6fSZiyue-Zhang val out = IO(Output(UInt(4.W))) 40c4501a6fSZiyue-Zhang // strided load/store 41c4501a6fSZiyue-Zhang var combVemulNf : Seq[(Int, Int, Int)] = Seq() 42c4501a6fSZiyue-Zhang for (emul <- 0 until 4) { 43c4501a6fSZiyue-Zhang for (nf <- 0 until 8) { 44c4501a6fSZiyue-Zhang if ((1 << emul) * (nf + 1) <= 8) { 45c4501a6fSZiyue-Zhang combVemulNf :+= (emul, nf, (1 << emul) * (nf + 1)) 46c4501a6fSZiyue-Zhang } else { 47c4501a6fSZiyue-Zhang combVemulNf :+= (emul, nf, 0) 48c4501a6fSZiyue-Zhang } 49c4501a6fSZiyue-Zhang } 50c4501a6fSZiyue-Zhang } 51c4501a6fSZiyue-Zhang out := decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 52c4501a6fSZiyue-Zhang case (emul, nf, uopNum) => (BitPat((emul << 3 | nf).U(5.W)), BitPat(uopNum.U(4.W))) 53c4501a6fSZiyue-Zhang }, BitPat.N(4))) 54c4501a6fSZiyue-Zhang} 55c4501a6fSZiyue-Zhang 56c4501a6fSZiyue-Zhangclass indexedLSNumOfUopTable() extends Module { 57c4501a6fSZiyue-Zhang val src = IO(Input(UInt(7.W))) 58c4501a6fSZiyue-Zhang val out = IO(Output(UInt(7.W))) 59c4501a6fSZiyue-Zhang // strided load/store 60c4501a6fSZiyue-Zhang var combVemulNf : Seq[(Int, Int, Int, Int)] = Seq() 61c4501a6fSZiyue-Zhang for (emul <- 0 until 4) { 62c4501a6fSZiyue-Zhang for (lmul <- 0 until 4) { 6355f7bedaSZiyue Zhang var emul_val = 1 << emul 6455f7bedaSZiyue Zhang var lmul_val = 1 << lmul 6555f7bedaSZiyue Zhang var mul_max = if (emul_val > lmul_val) emul_val else lmul_val 6655f7bedaSZiyue Zhang // nf = 0, number of uop = Max(lmul, emul) 6755f7bedaSZiyue Zhang if ((1 << lmul) <= 8) { // indexed load/store must ensure that the lmul * nf is less or equal to 8 6855f7bedaSZiyue Zhang combVemulNf :+= (emul, lmul, 0, mul_max) 6955f7bedaSZiyue Zhang } else { 7055f7bedaSZiyue Zhang combVemulNf :+= (emul, lmul, 0, 0) 7155f7bedaSZiyue Zhang } 7255f7bedaSZiyue Zhang // nf > 0, number of uop = Max(lmul * nf, emul) 7355f7bedaSZiyue Zhang for (nf <- 1 until 8) { 7455f7bedaSZiyue Zhang var uop_num = if (lmul_val * (nf + 1) > emul_val) lmul_val * (nf + 1) else emul_val 7555f7bedaSZiyue Zhang if (lmul_val * (nf + 1) <= 8) { // indexed load/store must ensure that the lmul * nf is less or equal to 8 7655f7bedaSZiyue Zhang combVemulNf :+= (emul, lmul, nf, uop_num) 77c4501a6fSZiyue-Zhang } else { 78c4501a6fSZiyue-Zhang combVemulNf :+= (emul, lmul, nf, 0) 79c4501a6fSZiyue-Zhang } 80c4501a6fSZiyue-Zhang } 81c4501a6fSZiyue-Zhang } 82c4501a6fSZiyue-Zhang } 83c4501a6fSZiyue-Zhang out := decoder(QMCMinimizer, src, TruthTable(combVemulNf.map { 8455f7bedaSZiyue Zhang case (emul, lmul, nf, uopNum) => (BitPat((emul << 5 | lmul << 3 | nf).U(7.W)), BitPat(uopNum.U(4.W))) 8555f7bedaSZiyue Zhang }, BitPat.N(4))) 86c4501a6fSZiyue-Zhang} 877f9f0a79SzhanglyGit 887f9f0a79SzhanglyGitclass UopInfoGen (implicit p: Parameters) extends XSModule { 897f9f0a79SzhanglyGit val io = IO(new UopInfoGenIO) 907f9f0a79SzhanglyGit 91c4501a6fSZiyue-Zhang val stridedLSTable = Module(new strdiedLSNumOfUopTable) // decoder for strided load/store 92c4501a6fSZiyue-Zhang val indexedLSTable = Module(new indexedLSNumOfUopTable) // decoder for indexed load/store 93c4501a6fSZiyue-Zhang 9487dd4e0dSzhanglyGit val typeOfSplit = io.in.preInfo.typeOfSplit 9587dd4e0dSzhanglyGit val vsew = Cat(0.U(1.W), io.in.preInfo.vsew) 9687dd4e0dSzhanglyGit val veew = Cat(0.U(1.W), io.in.preInfo.vwidth(1, 0)) 970a34fc22SZiyue Zhang val vmvn = io.in.preInfo.vmvn 9806cb2bc1Sweidingliu val isVlsr = io.in.preInfo.isVlsr 99c90e3eacSZiyue Zhang val isVlsm = io.in.preInfo.isVlsm 10087dd4e0dSzhanglyGit val vlmul = io.in.preInfo.vlmul 101c4501a6fSZiyue-Zhang val nf = io.in.preInfo.nf 1027f9f0a79SzhanglyGit val isComplex = io.out.isComplex 1037f9f0a79SzhanglyGit 104e3da8badSTang Haojin val lmul = MuxLookup(vlmul, 1.U(4.W))(Seq( 1057f9f0a79SzhanglyGit "b001".U -> 2.U, 1067f9f0a79SzhanglyGit "b010".U -> 4.U, 1077f9f0a79SzhanglyGit "b011".U -> 8.U 1087f9f0a79SzhanglyGit )) 109e3da8badSTang Haojin val simple_lmul = MuxLookup(vlmul, 0.U(2.W))(Seq( 110c4501a6fSZiyue-Zhang "b001".U -> 1.U, 111c4501a6fSZiyue-Zhang "b010".U -> 2.U, 112c4501a6fSZiyue-Zhang "b011".U -> 3.U 113c4501a6fSZiyue-Zhang )) 1147f9f0a79SzhanglyGit 1157f9f0a79SzhanglyGit val vemul: UInt = veew.asUInt + 1.U + vlmul.asUInt + ~vsew.asUInt 1167f9f0a79SzhanglyGit 117e3da8badSTang Haojin val emul = MuxLookup(vemul, 1.U(4.W))(Seq( 1187f9f0a79SzhanglyGit "b001".U -> 2.U, 1197f9f0a79SzhanglyGit "b010".U -> 4.U, 1207f9f0a79SzhanglyGit "b011".U -> 8.U 1217f9f0a79SzhanglyGit )) //TODO : eew and emul illegal exception need to be handled 122e3da8badSTang Haojin val simple_emul = MuxLookup(vemul, 0.U(2.W))(Seq( 123c4501a6fSZiyue-Zhang "b001".U -> 1.U, 124c4501a6fSZiyue-Zhang "b010".U -> 2.U, 125c4501a6fSZiyue-Zhang "b011".U -> 3.U 126c4501a6fSZiyue-Zhang )) 1277f9f0a79SzhanglyGit 128e3da8badSTang Haojin val numOfUopVslide = MuxLookup(vlmul, 1.U(log2Up(MaxUopSize + 1).W))(Seq( 1297f9f0a79SzhanglyGit "b001".U -> 3.U, 1307f9f0a79SzhanglyGit "b010".U -> 10.U, 1317f9f0a79SzhanglyGit "b011".U -> 36.U 1327f9f0a79SzhanglyGit )) 133e3da8badSTang Haojin val numOfUopVrgather = MuxLookup(vlmul, 1.U(log2Up(MaxUopSize + 1).W))(Seq( 1347f9f0a79SzhanglyGit "b001".U -> 4.U, 1357f9f0a79SzhanglyGit "b010".U -> 16.U, 1367f9f0a79SzhanglyGit "b011".U -> 64.U 1377f9f0a79SzhanglyGit )) 1387f9f0a79SzhanglyGit val numOfUopVrgatherei16 = Mux((!vsew.orR) && (vlmul =/= "b011".U), 1397f9f0a79SzhanglyGit Cat(numOfUopVrgather, 0.U(1.W)), 1407f9f0a79SzhanglyGit numOfUopVrgather 1417f9f0a79SzhanglyGit ) 142e3da8badSTang Haojin val numOfUopVcompress = MuxLookup(vlmul, 1.U(4.W))(Seq( 1437f9f0a79SzhanglyGit "b001".U -> 4.U, 1447f9f0a79SzhanglyGit "b010".U -> 13.U, 1457f9f0a79SzhanglyGit "b011".U -> 43.U 1467f9f0a79SzhanglyGit )) 147582849ffSxiaofeibao-xjtu val numOfUopVFRED = { 148582849ffSxiaofeibao-xjtu // addTime include add frs1 149e3da8badSTang Haojin val addTime = MuxLookup(vlmul, 1.U(4.W))(Seq( 150582849ffSxiaofeibao-xjtu VLmul.m2 -> 2.U, 151582849ffSxiaofeibao-xjtu VLmul.m4 -> 4.U, 152582849ffSxiaofeibao-xjtu VLmul.m8 -> 8.U, 153582849ffSxiaofeibao-xjtu )) 154e3da8badSTang Haojin val foldLastVlmul = MuxLookup(vsew, "b000".U)(Seq( 155582849ffSxiaofeibao-xjtu VSew.e16 -> VLmul.mf8, 156582849ffSxiaofeibao-xjtu VSew.e32 -> VLmul.mf4, 157582849ffSxiaofeibao-xjtu VSew.e64 -> VLmul.mf2, 158582849ffSxiaofeibao-xjtu )) 159582849ffSxiaofeibao-xjtu // lmul < 1, foldTime = vlmul - foldFastVlmul 160582849ffSxiaofeibao-xjtu // lmul >= 1, foldTime = 0.U - foldFastVlmul 161582849ffSxiaofeibao-xjtu val foldTime = Mux(vlmul(2), vlmul, 0.U) - foldLastVlmul 1629fabe323SZiyue Zhang Mux((addTime + foldTime).orR, addTime + foldTime, 1.U) 163582849ffSxiaofeibao-xjtu } 164b94b1889Sxiaofeibao-xjtu val numOfUopVFREDOSUM = { 165e3da8badSTang Haojin val uvlMax = MuxLookup(vsew, 1.U)(Seq( 166b94b1889Sxiaofeibao-xjtu VSew.e16 -> 8.U, 167b94b1889Sxiaofeibao-xjtu VSew.e32 -> 4.U, 168b94b1889Sxiaofeibao-xjtu VSew.e64 -> 2.U, 169b94b1889Sxiaofeibao-xjtu )) 170b94b1889Sxiaofeibao-xjtu val vlMax = Wire(UInt(7.W)) 171b94b1889Sxiaofeibao-xjtu vlMax := Mux(vlmul(2), uvlMax >> (-vlmul)(1,0), uvlMax << vlmul(1,0)).asUInt 1729fabe323SZiyue Zhang Mux(vlMax.orR, vlMax, 1.U) 173b94b1889Sxiaofeibao-xjtu } 174904d2184SZiyue Zhang /* 175904d2184SZiyue Zhang * when 1 <= lmul <= 4, numOfUopWV = 2 * lmul, otherwise numOfUopWV = 1 176904d2184SZiyue Zhang */ 177e3da8badSTang Haojin val numOfUopWV = MuxLookup(vlmul, 1.U(4.W))(Seq( 178904d2184SZiyue Zhang "b000".U -> 2.U, 179904d2184SZiyue Zhang "b001".U -> 4.U, 180904d2184SZiyue Zhang "b010".U -> 8.U, 181904d2184SZiyue Zhang )) 182904d2184SZiyue Zhang /* 183904d2184SZiyue Zhang * need an extra move instruction 184904d2184SZiyue Zhang * when 1 <= lmul <= 4, numOfUopWX = 2 * lmul + 1, otherwise numOfUopWX = 2 185904d2184SZiyue Zhang */ 186e3da8badSTang Haojin val numOfUopWX = MuxLookup(vlmul, 2.U(4.W))(Seq( 187904d2184SZiyue Zhang "b000".U -> 3.U, 188904d2184SZiyue Zhang "b001".U -> 5.U, 189904d2184SZiyue Zhang "b010".U -> 9.U, 190904d2184SZiyue Zhang )) 1917f9f0a79SzhanglyGit 192c4501a6fSZiyue-Zhang stridedLSTable.src := Cat(simple_emul, nf) 193c4501a6fSZiyue-Zhang val numOfUopVLoadStoreStrided = stridedLSTable.out 194c4501a6fSZiyue-Zhang indexedLSTable.src := Cat(simple_emul, simple_lmul, nf) 195c4501a6fSZiyue-Zhang val numOfUopVLoadStoreIndexed = indexedLSTable.out 196c4501a6fSZiyue-Zhang 1977f9f0a79SzhanglyGit //number of uop 198e3da8badSTang Haojin val numOfUop = MuxLookup(typeOfSplit, 1.U(log2Up(MaxUopSize + 1).W))(Seq( 199e25c13faSXuan Hu UopSplitType.VSET -> 2.U, 2007f9f0a79SzhanglyGit UopSplitType.VEC_0XV -> 2.U, 2017f9f0a79SzhanglyGit UopSplitType.VEC_VVV -> lmul, 202395c8649SZiyue-Zhang UopSplitType.VEC_VFV -> (lmul +& 1.U), 2037f9f0a79SzhanglyGit UopSplitType.VEC_EXT2 -> lmul, 2047f9f0a79SzhanglyGit UopSplitType.VEC_EXT4 -> lmul, 2057f9f0a79SzhanglyGit UopSplitType.VEC_EXT8 -> lmul, 2067f9f0a79SzhanglyGit UopSplitType.VEC_VVM -> lmul, 207395c8649SZiyue-Zhang UopSplitType.VEC_VFM -> (lmul +& 1.U), 208582849ffSxiaofeibao-xjtu UopSplitType.VEC_VFRED -> numOfUopVFRED, 209b94b1889Sxiaofeibao-xjtu UopSplitType.VEC_VFREDOSUM -> numOfUopVFREDOSUM, 2107f9f0a79SzhanglyGit UopSplitType.VEC_VXM -> (lmul +& 1.U), 2117f9f0a79SzhanglyGit UopSplitType.VEC_VXV -> (lmul +& 1.U), 212395c8649SZiyue-Zhang UopSplitType.VEC_VFW -> numOfUopWX, // lmul <= 4 213395c8649SZiyue-Zhang UopSplitType.VEC_WFW -> numOfUopWX, // lmul <= 4 214904d2184SZiyue Zhang UopSplitType.VEC_VVW -> numOfUopWV, // lmul <= 4 215904d2184SZiyue Zhang UopSplitType.VEC_WVW -> numOfUopWV, // lmul <= 4 216904d2184SZiyue Zhang UopSplitType.VEC_VXW -> numOfUopWX, // lmul <= 4 217904d2184SZiyue Zhang UopSplitType.VEC_WXW -> numOfUopWX, // lmul <= 4 218904d2184SZiyue Zhang UopSplitType.VEC_WVV -> numOfUopWV, // lmul <= 4 219904d2184SZiyue Zhang UopSplitType.VEC_WXV -> numOfUopWX, // lmul <= 4 2207f9f0a79SzhanglyGit UopSplitType.VEC_SLIDE1UP -> (lmul +& 1.U), 221395c8649SZiyue-Zhang UopSplitType.VEC_FSLIDE1UP -> (lmul +& 1.U), 2227f9f0a79SzhanglyGit UopSplitType.VEC_SLIDE1DOWN -> Cat(lmul, 0.U(1.W)), 223395c8649SZiyue-Zhang UopSplitType.VEC_FSLIDE1DOWN -> Cat(lmul, 0.U(1.W)), 2247f9f0a79SzhanglyGit UopSplitType.VEC_VRED -> lmul, 2257f9f0a79SzhanglyGit UopSplitType.VEC_SLIDEUP -> (numOfUopVslide + 1.U), 2267f9f0a79SzhanglyGit UopSplitType.VEC_SLIDEDOWN -> (numOfUopVslide + 1.U), 227cd2c45feSZiyue Zhang UopSplitType.VEC_M0X -> lmul, 2287f9f0a79SzhanglyGit UopSplitType.VEC_MVV -> (Cat(lmul, 0.U(1.W)) - 1.U), 2293bb22d12SZiyue Zhang UopSplitType.VEC_VWW -> Cat(lmul, 0.U(1.W)), // lmul <= 4 2307f9f0a79SzhanglyGit UopSplitType.VEC_RGATHER -> numOfUopVrgather, 2317f9f0a79SzhanglyGit UopSplitType.VEC_RGATHER_VX -> (numOfUopVrgather +& 1.U), 2327f9f0a79SzhanglyGit UopSplitType.VEC_RGATHEREI16 -> numOfUopVrgatherei16, 2335da52072SsinceforYy UopSplitType.VEC_COMPRESS -> numOfUopVcompress, 2340a34fc22SZiyue Zhang UopSplitType.VEC_MVNR -> (vmvn +& 1.U), 235c90e3eacSZiyue Zhang UopSplitType.VEC_US_LDST -> Mux(isVlsr, nf +& 2.U, Mux(isVlsm, 2.U, (numOfUopVLoadStoreStrided +& 1.U))), // with one move instruction 236b0480352SZiyue Zhang UopSplitType.VEC_US_FF_LD -> (numOfUopVLoadStoreStrided +& 2.U), 237c4501a6fSZiyue-Zhang UopSplitType.VEC_S_LDST -> (numOfUopVLoadStoreStrided +& 2.U), // with two move instructions 238c4501a6fSZiyue-Zhang UopSplitType.VEC_I_LDST -> (numOfUopVLoadStoreIndexed +& 1.U), 23912861ac7Slinzhida UopSplitType.AMO_CAS_W -> 2.U, 24012861ac7Slinzhida UopSplitType.AMO_CAS_D -> 2.U, 24112861ac7Slinzhida UopSplitType.AMO_CAS_Q -> 4.U, 2427f9f0a79SzhanglyGit )) 2437f9f0a79SzhanglyGit 2443235a9d8SZiyue-Zhang // number of writeback num 24538c29594Szhanglinjuan val numOfWB = Mux(UopSplitType.isAMOCAS(typeOfSplit), numOfUop >> 1, numOfUop) 2463235a9d8SZiyue-Zhang 2477e4f0b19SZiyue-Zhang // vector instruction's uop UopSplitType are not SCA_SIM, and when the number of uop is 1, we can regard it as a simple instruction 248*99f369f9Sxiaofeibao-xjtu isComplex := io.in.preInfo.isVecArith || io.in.preInfo.isVecMem || io.in.preInfo.isAmoCAS 2497f9f0a79SzhanglyGit io.out.uopInfo.numOfUop := numOfUop 2503235a9d8SZiyue-Zhang io.out.uopInfo.numOfWB := numOfWB 2517f9f0a79SzhanglyGit io.out.uopInfo.lmul := lmul 2527f9f0a79SzhanglyGit 2537f9f0a79SzhanglyGit} 2547f9f0a79SzhanglyGit 2557f9f0a79SzhanglyGitclass UopInfoGenIO(implicit p: Parameters) extends XSBundle { 2567f9f0a79SzhanglyGit val in = new Bundle { 25787dd4e0dSzhanglyGit val preInfo = Input(new PreInfo) 2587f9f0a79SzhanglyGit } 2597f9f0a79SzhanglyGit val out = new Bundle { 2607f9f0a79SzhanglyGit val isComplex = Output(Bool()) 2617f9f0a79SzhanglyGit val uopInfo = Output(new UopInfo) 2627f9f0a79SzhanglyGit } 2637f9f0a79SzhanglyGit} 2647f9f0a79SzhanglyGit 26587dd4e0dSzhanglyGitclass PreInfo(implicit p: Parameters) extends XSBundle { 266*99f369f9Sxiaofeibao-xjtu val isVecArith = Bool() // is vector arith or config instr 267*99f369f9Sxiaofeibao-xjtu val isVecMem = Bool() 268*99f369f9Sxiaofeibao-xjtu val isAmoCAS = Bool() 2697f9f0a79SzhanglyGit val typeOfSplit = UopSplitType() 2707f9f0a79SzhanglyGit val vsew = VSew() //2 bit 2717f9f0a79SzhanglyGit val vlmul = VLmul() 2727f9f0a79SzhanglyGit val vwidth = UInt(3.W) //eew 273c4501a6fSZiyue-Zhang val nf = UInt(3.W) 2740a34fc22SZiyue Zhang val vmvn = UInt(3.W) // vmvnr 27506cb2bc1Sweidingliu val isVlsr = Bool() // is vector whole register load/store 276c90e3eacSZiyue Zhang val isVlsm = Bool() // is vector mask load/store 2777f9f0a79SzhanglyGit} 2787f9f0a79SzhanglyGit 2797f9f0a79SzhanglyGitclass UopInfo(implicit p: Parameters) extends XSBundle { 2807f9f0a79SzhanglyGit val numOfUop = UInt(log2Up(MaxUopSize + 1).W) 2813235a9d8SZiyue-Zhang val numOfWB = UInt(log2Up(MaxUopSize + 1).W) 2827f9f0a79SzhanglyGit val lmul = UInt(4.W) 2837f9f0a79SzhanglyGit}