1package xiangshan.backend.fu.wrapper 2 3import chipsalliance.rocketchip.config.Parameters 4import chisel3._ 5import chisel3.util._ 6import utils.XSError 7import xiangshan.backend.fu.FuConfig 8import xiangshan.backend.fu.vector.Bundles.{VLmul, VSew, ma} 9import xiangshan.backend.fu.vector.utils.VecDataSplitModule 10import xiangshan.backend.fu.vector.{Mgu, VecInfo, VecPipedFuncUnit} 11import yunsuan.{VfaluType, VfpuType} 12import yunsuan.vector.VectorFloatAdder 13 14class VFAlu(cfg: FuConfig)(implicit p: Parameters) extends VecPipedFuncUnit(cfg) { 15 XSError(io.in.valid && io.in.bits.ctrl.fuOpType === VfpuType.dummy, "Vfalu OpType not supported") 16 17 // params alias 18 private val dataWidth = cfg.dataBits 19 private val dataWidthOfDataModule = 64 20 private val numVecModule = dataWidth / dataWidthOfDataModule 21 22 // io alias 23 private val opcode = fuOpType(4,0) 24 private val resWiden = fuOpType(5) 25 private val opbWiden = fuOpType(6) 26 27 // modules 28 private val vfalus = Seq.fill(numVecModule)(Module(new VectorFloatAdder)) 29 private val vs2Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 30 private val vs1Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 31 private val oldVdSplit = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 32 private val mgu = Module(new Mgu(dataWidth)) 33 34 /** 35 * In connection of [[vs2Split]], [[vs1Split]] and [[oldVdSplit]] 36 */ 37 vs2Split.io.inVecData := vs2 38 vs1Split.io.inVecData := vs1 39 oldVdSplit.io.inVecData := oldVd 40 41 /** 42 * [[vfalus]]'s in connection 43 */ 44 // Vec(vs2(31,0), vs2(63,32), vs2(95,64), vs2(127,96)) ==> 45 // Vec( 46 // Cat(vs2(95,64), vs2(31,0)), 47 // Cat(vs2(127,96), vs2(63,32)), 48 // ) 49 private val vs2GroupedVec: Vec[UInt] = VecInit(vs2Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq) 50 private val vs1GroupedVec: Vec[UInt] = VecInit(vs1Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq) 51 private val resultData = Wire(Vec(numVecModule,UInt(dataWidthOfDataModule.W))) 52 private val fflagsData = Wire(Vec(numVecModule,UInt(20.W))) 53 private val srcMaskRShiftForReduction = Wire(UInt((8 * numVecModule).W)) 54 // for reduction 55 val isFirstGroupUop = vuopIdx === 0.U || 56 (vuopIdx === 1.U && (vlmul === VLmul.m4 || vlmul === VLmul.m8)) || 57 ((vuopIdx === 2.U || vuopIdx === 3.U) && vlmul === VLmul.m8) 58 val maskRshiftWidthForReduction = Wire(UInt(6.W)) 59 maskRshiftWidthForReduction := Mux1H( 60 Seq( 61 (vsew === VSew.e16) -> (vuopIdx(1, 0) << 4), 62 (vsew === VSew.e32) -> (vuopIdx(1, 0) << 3), 63 (vsew === VSew.e64) -> (vuopIdx(1, 0) << 2), 64 ) 65 ) 66 val vlMaskForReduction = (~(Fill(VLEN, 1.U) << vl)).asUInt 67 srcMaskRShiftForReduction := ((srcMask & vlMaskForReduction) >> maskRshiftWidthForReduction)(8 * numVecModule - 1, 0) 68 69 def genMaskForReduction(inmask: UInt, sew: UInt, i: Int): UInt = { 70 val f64MaskNum = dataWidth / 64 * 2 71 val f32MaskNum = dataWidth / 32 * 2 72 val f16MaskNum = dataWidth / 16 * 2 73 val f64Mask = inmask(f64MaskNum - 1, 0) 74 val f32Mask = inmask(f32MaskNum - 1, 0) 75 val f16Mask = inmask(f16MaskNum - 1, 0) 76 // vs2 reordered, so mask use high bits 77 val f64FirstFoldMask = Mux1H( 78 Seq( 79 vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(0), 0.U(3.W), f64Mask(1)), 80 ) 81 ) 82 val f32FirstFoldMask = Mux1H( 83 Seq( 84 vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(1), f32Mask(0), 0.U(2.W), f32Mask(3), f32Mask(2)), 85 vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(0), 0.U(3.W), f32Mask(1)), 86 ) 87 ) 88 val f16FirstFoldMask = Mux1H( 89 Seq( 90 vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(7,4), f16Mask(3,0)), 91 vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(1), f16Mask(0), 0.U(2.W), f16Mask(3), f16Mask(2)), 92 vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(0), 0.U(3.W), f16Mask(1)), 93 ) 94 ) 95 val f64FoldMask = Mux1H( 96 Seq( 97 vecCtrl.fpu.isFoldTo1_2 -> "b00010001".U, 98 ) 99 ) 100 val f32FoldMask = Mux1H( 101 Seq( 102 vecCtrl.fpu.isFoldTo1_2 -> "b00110011".U, 103 vecCtrl.fpu.isFoldTo1_4 -> "b00010001".U, 104 ) 105 ) 106 val f16FoldMask = Mux1H( 107 Seq( 108 vecCtrl.fpu.isFoldTo1_2 -> "b11111111".U, 109 vecCtrl.fpu.isFoldTo1_4 -> "b00110011".U, 110 vecCtrl.fpu.isFoldTo1_8 -> "b00010001".U, 111 ) 112 ) 113 // low 4 bits for vs2(fp_a), high 4 bits for vs1(fp_b), 114 val isFold = vecCtrl.fpu.isFoldTo1_2 || vecCtrl.fpu.isFoldTo1_4 || vecCtrl.fpu.isFoldTo1_8 115 val f64FirstNotFoldMask = Cat(0.U(3.W), f64Mask(i+2), 0.U(3.W), f64Mask(i)) 116 val f32FirstNotFoldMask = Cat(0.U(2.W), f32Mask(i + 5, i+4), 0.U(2.W), Cat(f32Mask(i + 1, i))) 117 val f16FirstNotFoldMask = Cat(f16Mask(i+11,i+8), f16Mask(i+3,0)) 118 val f64MaskI = Mux(isFirstGroupUop, Mux(isFold, f64FirstFoldMask, f64FirstNotFoldMask), Mux(isFold, f64FoldMask, Fill(8,1.U))) 119 val f32MaskI = Mux(isFirstGroupUop, Mux(isFold, f32FirstFoldMask, f32FirstNotFoldMask), Mux(isFold, f32FoldMask, Fill(8,1.U))) 120 val f16MaskI = Mux(isFirstGroupUop, Mux(isFold, f16FirstFoldMask, f16FirstNotFoldMask), Mux(isFold, f16FoldMask, Fill(8,1.U))) 121 val outMask = Mux1H( 122 Seq( 123 (sew === 3.U) -> f64MaskI, 124 (sew === 2.U) -> f32MaskI, 125 (sew === 1.U) -> f16MaskI, 126 ) 127 ) 128 outMask 129 } 130 def genMaskForMerge(inmask:UInt, sew:UInt, i:Int): UInt = { 131 val f64MaskNum = dataWidth / 64 132 val f32MaskNum = dataWidth / 32 133 val f16MaskNum = dataWidth / 16 134 val f64Mask = inmask(f64MaskNum-1,0) 135 val f32Mask = inmask(f32MaskNum-1,0) 136 val f16Mask = inmask(f16MaskNum-1,0) 137 val f64MaskI = Cat(0.U(3.W),f64Mask(i)) 138 val f32MaskI = Cat(0.U(2.W),f32Mask(2*i+1,2*i)) 139 val f16MaskI = f16Mask(4*i+3,4*i) 140 val outMask = Mux1H( 141 Seq( 142 (sew === 3.U) -> f64MaskI, 143 (sew === 2.U) -> f32MaskI, 144 (sew === 1.U) -> f16MaskI, 145 ) 146 ) 147 outMask 148 } 149 val isScalarMove = (fuOpType === VfaluType.vfmv_f_s) || (fuOpType === VfaluType.vfmv_s_f) 150 val srcMaskRShift = Wire(UInt((4 * numVecModule).W)) 151 val maskRshiftWidth = Wire(UInt(6.W)) 152 maskRshiftWidth := Mux1H( 153 Seq( 154 (vsew === VSew.e16) -> (vuopIdx(2,0) << 3), 155 (vsew === VSew.e32) -> (vuopIdx(2,0) << 2), 156 (vsew === VSew.e64) -> (vuopIdx(2,0) << 1), 157 ) 158 ) 159 srcMaskRShift := (srcMask >> maskRshiftWidth)(4 * numVecModule - 1, 0) 160 val fp_aIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool())) 161 val fp_bIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool())) 162 vfalus.zipWithIndex.foreach { 163 case (mod, i) => 164 mod.io.fp_a := Mux(opbWiden, vs1Split.io.outVec64b(i), vs2Split.io.outVec64b(i)) // very dirty TODO 165 mod.io.fp_b := Mux(opbWiden, vs2Split.io.outVec64b(i), vs1Split.io.outVec64b(i)) // very dirty TODO 166 mod.io.widen_a := Cat(vs2Split.io.outVec32b(i+numVecModule), vs2Split.io.outVec32b(i)) 167 mod.io.widen_b := Cat(vs1Split.io.outVec32b(i+numVecModule), vs1Split.io.outVec32b(i)) 168 mod.io.frs1 := 0.U // already vf -> vv 169 mod.io.is_frs1 := false.B // already vf -> vv 170 mod.io.mask := Mux(isScalarMove, !vuopIdx.orR, genMaskForMerge(inmask = srcMaskRShift, sew = vsew, i = i)) 171 mod.io.maskForReduction := genMaskForReduction(inmask = srcMaskRShiftForReduction, sew = vsew, i = i) 172 mod.io.uop_idx := vuopIdx(0) 173 mod.io.is_vec := true.B // Todo 174 mod.io.round_mode := frm 175 mod.io.fp_format := Mux(resWiden, vsew + 1.U, vsew) 176 mod.io.opb_widening := opbWiden 177 mod.io.res_widening := resWiden 178 mod.io.op_code := opcode 179 resultData(i) := mod.io.fp_result 180 fflagsData(i) := mod.io.fflags 181 fp_aIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & ( 182 ((vsew === VSew.e32) & (!vs2Split.io.outVec64b(i).head(32).andR)) | 183 ((vsew === VSew.e16) & (!vs2Split.io.outVec64b(i).head(48).andR)) 184 ) 185 fp_bIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & ( 186 ((vsew === VSew.e32) & (!vs1Split.io.outVec64b(i).head(32).andR)) | 187 ((vsew === VSew.e16) & (!vs1Split.io.outVec64b(i).head(48).andR)) 188 ) 189 mod.io.fp_aIsFpCanonicalNAN := fp_aIsFpCanonicalNAN(i) 190 mod.io.fp_bIsFpCanonicalNAN := fp_bIsFpCanonicalNAN(i) 191 } 192 val resultDataUInt = resultData.asUInt 193 val cmpResultWidth = dataWidth / 16 194 val cmpResult = Wire(Vec(cmpResultWidth, Bool())) 195 for (i <- 0 until cmpResultWidth) { 196 if(i == 0) { 197 cmpResult(i) := resultDataUInt(0) 198 } 199 else if(i < dataWidth / 64) { 200 cmpResult(i) := Mux1H( 201 Seq( 202 (outVecCtrl.vsew === 1.U) -> resultDataUInt(i*16), 203 (outVecCtrl.vsew === 2.U) -> resultDataUInt(i*32), 204 (outVecCtrl.vsew === 3.U) -> resultDataUInt(i*64) 205 ) 206 ) 207 } 208 else if(i < dataWidth / 32) { 209 cmpResult(i) := Mux1H( 210 Seq( 211 (outVecCtrl.vsew === 1.U) -> resultDataUInt(i * 16), 212 (outVecCtrl.vsew === 2.U) -> resultDataUInt(i * 32), 213 (outVecCtrl.vsew === 3.U) -> false.B 214 ) 215 ) 216 } 217 else if(i < dataWidth / 16) { 218 cmpResult(i) := Mux(outVecCtrl.vsew === 1.U, resultDataUInt(i*16), false.B) 219 } 220 } 221 222 val outEew = Mux(RegNext(resWiden), outVecCtrl.vsew + 1.U, outVecCtrl.vsew) 223 val outVuopidx = outVecCtrl.vuopIdx(2, 0) 224 val vlMax = ((VLEN/8).U >> outEew).asUInt 225 val lmulAbs = Mux(outVecCtrl.vlmul(2), (~outVecCtrl.vlmul(1,0)).asUInt + 1.U, outVecCtrl.vlmul(1,0)) 226 // vfmv_f_s need vl=1, reduction last uop need vl=1, other uop need vl=vlmax 227 val numOfUopVFRED = { 228 // addTime include add frs1 229 val addTime = MuxLookup(outVecCtrl.vlmul, 1.U(4.W), Array( 230 VLmul.m2 -> 2.U, 231 VLmul.m4 -> 4.U, 232 VLmul.m8 -> 8.U, 233 )) 234 val foldLastVlmul = MuxLookup(outVecCtrl.vsew, "b000".U, Array( 235 VSew.e16 -> VLmul.mf8, 236 VSew.e32 -> VLmul.mf4, 237 VSew.e64 -> VLmul.mf2, 238 )) 239 // lmul < 1, foldTime = vlmul - foldFastVlmul 240 // lmul >= 1, foldTime = 0.U - foldFastVlmul 241 val foldTime = Mux(outVecCtrl.vlmul(2), outVecCtrl.vlmul, 0.U) - foldLastVlmul 242 addTime + foldTime 243 } 244 val reductionVl = Mux(outVecCtrl.vuopIdx === numOfUopVFRED - 1.U, 1.U, vlMax) 245 val outIsResuction = outCtrl.fuOpType === VfaluType.vfredusum || outCtrl.fuOpType === VfaluType.vfredmax || outCtrl.fuOpType === VfaluType.vfredmin 246 val outVlFix = Mux( 247 outVecCtrl.fpu.isFpToVecInst || (outCtrl.fuOpType === VfaluType.vfmv_f_s), 248 1.U, 249 Mux( 250 outCtrl.fuOpType === VfaluType.vfmv_s_f, 251 outVl.orR, 252 Mux(outIsResuction, reductionVl, outVl) 253 ) 254 ) 255 val vlMaxAllUop = Wire(outVl.cloneType) 256 vlMaxAllUop := Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax << lmulAbs).asUInt 257 val vlMaxThisUop = Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax).asUInt 258 val vlSetThisUop = Mux(outVlFix > outVuopidx*vlMaxThisUop, outVlFix - outVuopidx*vlMaxThisUop, 0.U) 259 val vlThisUop = Wire(UInt(3.W)) 260 vlThisUop := Mux(vlSetThisUop < vlMaxThisUop, vlSetThisUop, vlMaxThisUop) 261 val vlMaskRShift = Wire(UInt((4 * numVecModule).W)) 262 vlMaskRShift := Fill(4 * numVecModule, 1.U(1.W)) >> ((4 * numVecModule).U - vlThisUop) 263 264 private val needNoMask = outCtrl.fuOpType === VfaluType.vfmerge || 265 outCtrl.fuOpType === VfaluType.vfmv_s_f || 266 outIsResuction || 267 outVecCtrl.fpu.isFpToVecInst 268 val maskToMgu = Mux(needNoMask, allMaskTrue, outSrcMask) 269 val allFFlagsEn = Wire(Vec(4*numVecModule,Bool())) 270 val outSrcMaskRShift = Wire(UInt((4*numVecModule).W)) 271 outSrcMaskRShift := (maskToMgu >> (outVecCtrl.vuopIdx(2,0) * vlMax))(4*numVecModule-1,0) 272 val f16FFlagsEn = outSrcMaskRShift 273 val f32FFlagsEn = Wire(Vec(numVecModule,UInt(4.W))) 274 for (i <- 0 until numVecModule){ 275 f32FFlagsEn(i) := Cat(Fill(2, 0.U),outSrcMaskRShift(2*i+1,2*i)) 276 } 277 val f64FFlagsEn = Wire(Vec(numVecModule, UInt(4.W))) 278 for (i <- 0 until numVecModule) { 279 f64FFlagsEn(i) := Cat(Fill(3, 0.U), outSrcMaskRShift(i)) 280 } 281 val fflagsEn= Mux1H( 282 Seq( 283 (outEew === 1.U) -> f16FFlagsEn.asUInt, 284 (outEew === 2.U) -> f32FFlagsEn.asUInt, 285 (outEew === 3.U) -> f64FFlagsEn.asUInt 286 ) 287 ) 288 allFFlagsEn := Mux(outIsResuction, Fill(4*numVecModule, 1.U), (fflagsEn & vlMaskRShift)).asTypeOf(allFFlagsEn) 289 290 val allFFlags = fflagsData.asTypeOf(Vec(4*numVecModule,UInt(5.W))) 291 val outFFlags = allFFlagsEn.zip(allFFlags).map{ 292 case(en,fflags) => Mux(en, fflags, 0.U(5.W)) 293 }.reduce(_ | _) 294 io.out.bits.res.fflags.get := outFFlags 295 296 297 val cmpResultOldVd = Wire(UInt(cmpResultWidth.W)) 298 val cmpResultOldVdRshiftWidth = Wire(UInt(6.W)) 299 cmpResultOldVdRshiftWidth := Mux1H( 300 Seq( 301 (outVecCtrl.vsew === VSew.e16) -> (outVecCtrl.vuopIdx(2, 0) << 3), 302 (outVecCtrl.vsew === VSew.e32) -> (outVecCtrl.vuopIdx(2, 0) << 2), 303 (outVecCtrl.vsew === VSew.e64) -> (outVecCtrl.vuopIdx(2, 0) << 1), 304 ) 305 ) 306 cmpResultOldVd := (outOldVd >> cmpResultOldVdRshiftWidth)(4*numVecModule-1,0) 307 val cmpResultForMgu = Wire(Vec(cmpResultWidth, Bool())) 308 for (i <- 0 until cmpResultWidth) { 309 cmpResultForMgu(i) := Mux(outSrcMaskRShift(i), cmpResult(i), Mux(outVecCtrl.vma, true.B, cmpResultOldVd(i))) 310 } 311 312 mgu.io.in.vd := Mux(outVecCtrl.isDstMask, Cat(0.U((dataWidth / 16 * 15).W), cmpResultForMgu.asUInt), resultDataUInt) 313 mgu.io.in.oldVd := outOldVd 314 mgu.io.in.mask := maskToMgu 315 mgu.io.in.info.ta := Mux(outCtrl.fuOpType === VfaluType.vfmv_f_s, true.B , outVecCtrl.vta) 316 mgu.io.in.info.ma := Mux(outCtrl.fuOpType === VfaluType.vfmv_s_f, true.B , outVecCtrl.vma) 317 mgu.io.in.info.vl := outVlFix 318 mgu.io.in.info.vstart := outVecCtrl.vstart 319 mgu.io.in.info.vlmul := outVecCtrl.vlmul 320 mgu.io.in.info.valid := io.out.valid 321 mgu.io.in.info.vstart := Mux(outVecCtrl.fpu.isFpToVecInst, 0.U, outVecCtrl.vstart) 322 mgu.io.in.info.eew := outEew 323 mgu.io.in.info.vsew := outVecCtrl.vsew 324 mgu.io.in.info.vdIdx := Mux(outIsResuction, 0.U, outVecCtrl.vuopIdx) 325 mgu.io.in.info.narrow := outVecCtrl.isNarrow 326 mgu.io.in.info.dstMask := outVecCtrl.isDstMask 327 val resultFpMask = Wire(UInt(VLEN.W)) 328 val isFclass = outVecCtrl.fpu.isFpToVecInst && (outCtrl.fuOpType === VfaluType.vfclass) 329 val fpCmpFuOpType = Seq(VfaluType.vfeq, VfaluType.vflt, VfaluType.vfle) 330 val isCmp = outVecCtrl.fpu.isFpToVecInst && (fpCmpFuOpType.map(_ === outCtrl.fuOpType).reduce(_|_)) 331 resultFpMask := Mux(isFclass || isCmp, Fill(16, 1.U(1.W)), Fill(VLEN, 1.U(1.W))) 332 io.out.bits.res.data := mgu.io.out.vd & resultFpMask 333 334} 335 336class VFMgu(vlen:Int)(implicit p: Parameters) extends Module{ 337 val io = IO(new VFMguIO(vlen)) 338 339 val vd = io.in.vd 340 val oldvd = io.in.oldVd 341 val mask = io.in.mask 342 val vsew = io.in.info.eew 343 val num16bits = vlen / 16 344 345} 346 347class VFMguIO(vlen: Int)(implicit p: Parameters) extends Bundle { 348 val in = new Bundle { 349 val vd = Input(UInt(vlen.W)) 350 val oldVd = Input(UInt(vlen.W)) 351 val mask = Input(UInt(vlen.W)) 352 val info = Input(new VecInfo) 353 } 354 val out = new Bundle { 355 val vd = Output(UInt(vlen.W)) 356 } 357}