1package xiangshan.backend.fu.wrapper 2 3import org.chipsalliance.cde.config.Parameters 4import chisel3._ 5import chisel3.util._ 6import utility.XSError 7import xiangshan.backend.fu.FuConfig 8import xiangshan.backend.fu.vector.Bundles.{VLmul, VSew} 9import xiangshan.backend.fu.vector.utils.VecDataSplitModule 10import xiangshan.backend.fu.vector.{Mgu, Mgtu, VecInfo, VecPipedFuncUnit} 11import xiangshan.ExceptionNO 12import yunsuan.{VfaluType, VfpuType} 13import yunsuan.vector.VectorFloatAdder 14import xiangshan.backend.fu.vector.Bundles.VConfig 15 16class VFAlu(cfg: FuConfig)(implicit p: Parameters) extends VecPipedFuncUnit(cfg) { 17 XSError(io.in.valid && io.in.bits.ctrl.fuOpType === VfpuType.dummy, "Vfalu OpType not supported") 18 19 // params alias 20 private val dataWidth = cfg.destDataBits 21 private val dataWidthOfDataModule = 64 22 private val numVecModule = dataWidth / dataWidthOfDataModule 23 24 // io alias 25 private val opcode = fuOpType(4,0) 26 private val resWiden = fuOpType(5) 27 private val opbWiden = fuOpType(6) 28 29 // modules 30 private val vfalus = Seq.fill(numVecModule)(Module(new VectorFloatAdder)) 31 private val vs2Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 32 private val vs1Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 33 private val oldVdSplit = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule)) 34 private val mgu = Module(new Mgu(dataWidth)) 35 private val mgtu = Module(new Mgtu(dataWidth)) 36 37 /** 38 * In connection of [[vs2Split]], [[vs1Split]] and [[oldVdSplit]] 39 */ 40 vs2Split.io.inVecData := vs2 41 vs1Split.io.inVecData := vs1 42 oldVdSplit.io.inVecData := oldVd 43 44 /** 45 * [[vfalus]]'s in connection 46 */ 47 // Vec(vs2(31,0), vs2(63,32), vs2(95,64), vs2(127,96)) ==> 48 // Vec( 49 // Cat(vs2(95,64), vs2(31,0)), 50 // Cat(vs2(127,96), vs2(63,32)), 51 // ) 52 private val vs2GroupedVec: Vec[UInt] = VecInit(vs2Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq) 53 private val vs1GroupedVec: Vec[UInt] = VecInit(vs1Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq) 54 private val resultData = Wire(Vec(numVecModule,UInt(dataWidthOfDataModule.W))) 55 private val fflagsData = Wire(Vec(numVecModule,UInt(20.W))) 56 private val srcMaskRShiftForReduction = Wire(UInt((8 * numVecModule).W)) 57 // for reduction 58 val isFirstGroupUop = vuopIdx === 0.U || 59 (vuopIdx === 1.U && (vlmul === VLmul.m4 || vlmul === VLmul.m8)) || 60 ((vuopIdx === 2.U || vuopIdx === 3.U) && vlmul === VLmul.m8) 61 val maskRshiftWidthForReduction = Wire(UInt(6.W)) 62 maskRshiftWidthForReduction := Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 63 vuopIdx, 64 Mux1H(Seq( 65 (vsew === VSew.e16) -> (vuopIdx(1, 0) << 4), 66 (vsew === VSew.e32) -> (vuopIdx(1, 0) << 3), 67 (vsew === VSew.e64) -> (vuopIdx(1, 0) << 2), 68 )) 69 ) 70 val vlMaskForReduction = (~(Fill(VLEN, 1.U) << vl)).asUInt 71 srcMaskRShiftForReduction := ((srcMask & vlMaskForReduction) >> maskRshiftWidthForReduction)(8 * numVecModule - 1, 0) 72 val existMask = (srcMask & vlMaskForReduction).orR 73 val existMaskReg = RegEnable(existMask, io.in.fire) 74 75 76 def genMaskForReduction(inmask: UInt, sew: UInt, i: Int): UInt = { 77 val f64MaskNum = dataWidth / 64 * 2 78 val f32MaskNum = dataWidth / 32 * 2 79 val f16MaskNum = dataWidth / 16 * 2 80 val f64Mask = inmask(f64MaskNum - 1, 0) 81 val f32Mask = inmask(f32MaskNum - 1, 0) 82 val f16Mask = inmask(f16MaskNum - 1, 0) 83 // vs2 reordered, so mask use high bits 84 val f64FirstFoldMaskUnorder = Mux1H( 85 Seq( 86 vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(0), 0.U(3.W), f64Mask(1)), 87 ) 88 ) 89 val f64FirstFoldMaskOrder = Mux1H( 90 Seq( 91 vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(1), 0.U(3.W), f64Mask(0)) 92 ) 93 ) 94 val f32FirstFoldMaskUnorder = Mux1H( 95 Seq( 96 vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(1), f32Mask(0), 0.U(2.W), f32Mask(3), f32Mask(2)), 97 vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(0), 0.U(3.W), f32Mask(1)), 98 ) 99 ) 100 val f32FirstFoldMaskOrder = Mux1H( 101 Seq( 102 vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(3), f32Mask(2), 0.U(2.W), f32Mask(1), f32Mask(0)), 103 vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(1), 0.U(3.W), f32Mask(0)), 104 ) 105 ) 106 val f16FirstFoldMaskUnorder = Mux1H( 107 Seq( 108 vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(3,0), f16Mask(7,4)), 109 vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(1), f16Mask(0), 0.U(2.W), f16Mask(3), f16Mask(2)), 110 vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(0), 0.U(3.W), f16Mask(1)), 111 ) 112 ) 113 val f16FirstFoldMaskOrder = Mux1H( 114 Seq( 115 vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(7,4), f16Mask(3,0)), 116 vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(3), f16Mask(2), 0.U(2.W), f16Mask(1), f16Mask(0)), 117 vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(1), 0.U(3.W), f16Mask(0)), 118 ) 119 ) 120 val f64FoldMask = Mux1H( 121 Seq( 122 vecCtrl.fpu.isFoldTo1_2 -> "b00010001".U, 123 ) 124 ) 125 val f32FoldMask = Mux1H( 126 Seq( 127 vecCtrl.fpu.isFoldTo1_2 -> "b00110011".U, 128 vecCtrl.fpu.isFoldTo1_4 -> "b00010001".U, 129 ) 130 ) 131 val f16FoldMask = Mux1H( 132 Seq( 133 vecCtrl.fpu.isFoldTo1_2 -> "b11111111".U, 134 vecCtrl.fpu.isFoldTo1_4 -> "b00110011".U, 135 vecCtrl.fpu.isFoldTo1_8 -> "b00010001".U, 136 ) 137 ) 138 // low 4 bits for vs2(fp_a), high 4 bits for vs1(fp_b), 139 val isFold = vecCtrl.fpu.isFoldTo1_2 || vecCtrl.fpu.isFoldTo1_4 || vecCtrl.fpu.isFoldTo1_8 140 val f64FirstNotFoldMask = Cat(0.U(3.W), f64Mask(i + 2), 0.U(3.W), f64Mask(i)) 141 val f32FirstNotFoldMask = Cat(0.U(2.W), f32Mask(i * 2 + 5, i * 2 + 4), 0.U(2.W), Cat(f32Mask(i * 2 + 1, i * 2))) 142 val f16FirstNotFoldMask = Cat(f16Mask(i * 4 + 11, i * 4 + 8), f16Mask(i * 4 + 3, i * 4)) 143 val f64MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 144 Mux(isFold, f64FirstFoldMaskOrder, f64FirstNotFoldMask), 145 Mux(isFirstGroupUop, 146 Mux(isFold, f64FirstFoldMaskUnorder, f64FirstNotFoldMask), 147 Mux(isFold, f64FoldMask, Fill(8, 1.U)))) 148 val f32MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 149 Mux(isFold, f32FirstFoldMaskOrder, f32FirstNotFoldMask), 150 Mux(isFirstGroupUop, 151 Mux(isFold, f32FirstFoldMaskUnorder, f32FirstNotFoldMask), 152 Mux(isFold, f32FoldMask, Fill(8, 1.U)))) 153 val f16MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, 154 Mux(isFold, f16FirstFoldMaskOrder, f16FirstNotFoldMask), 155 Mux(isFirstGroupUop, 156 Mux(isFold, f16FirstFoldMaskUnorder, f16FirstNotFoldMask), 157 Mux(isFold, f16FoldMask, Fill(8, 1.U)))) 158 val outMask = Mux1H( 159 Seq( 160 (sew === 3.U) -> f64MaskI, 161 (sew === 2.U) -> f32MaskI, 162 (sew === 1.U) -> f16MaskI, 163 ) 164 ) 165 Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, outMask(0),outMask) 166 } 167 def genMaskForMerge(inmask:UInt, sew:UInt, i:Int): UInt = { 168 val f64MaskNum = dataWidth / 64 169 val f32MaskNum = dataWidth / 32 170 val f16MaskNum = dataWidth / 16 171 val f64Mask = inmask(f64MaskNum-1,0) 172 val f32Mask = inmask(f32MaskNum-1,0) 173 val f16Mask = inmask(f16MaskNum-1,0) 174 val f64MaskI = Cat(0.U(3.W),f64Mask(i)) 175 val f32MaskI = Cat(0.U(2.W),f32Mask(2*i+1,2*i)) 176 val f16MaskI = f16Mask(4*i+3,4*i) 177 val outMask = Mux1H( 178 Seq( 179 (sew === 3.U) -> f64MaskI, 180 (sew === 2.U) -> f32MaskI, 181 (sew === 1.U) -> f16MaskI, 182 ) 183 ) 184 outMask 185 } 186 def genMaskForRedFFlag(sew:UInt): UInt = { 187 val default = "b11111111".U 188 val f64FoldMask = Mux(outVecCtrl.fpu.isFoldTo1_2, "b00000001".U, default) 189 val f32Fold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 190 val f32FoldMask = Mux1H( 191 Seq( 192 outVecCtrl.fpu.isFoldTo1_2 -> "b00000011".U, 193 outVecCtrl.fpu.isFoldTo1_4 -> "b00000001".U, 194 ) 195 ) 196 val f16Fold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 || outVecCtrl.fpu.isFoldTo1_8 197 val f16FoldMask = Mux1H( 198 Seq( 199 outVecCtrl.fpu.isFoldTo1_2 -> "b00001111".U, 200 outVecCtrl.fpu.isFoldTo1_4 -> "b00000011".U, 201 outVecCtrl.fpu.isFoldTo1_8 -> "b00000001".U, 202 ) 203 ) 204 Mux1H( 205 Seq( 206 (sew === 3.U) -> f64FoldMask, 207 (sew === 2.U) -> Mux(f32Fold, f32FoldMask, default), 208 (sew === 1.U) -> Mux(f16Fold, f16FoldMask, default), 209 ) 210 ) 211 } 212 val isScalarMove = (fuOpType === VfaluType.vfmv_f_s) || (fuOpType === VfaluType.vfmv_s_f) 213 val srcMaskRShift = Wire(UInt((4 * numVecModule).W)) 214 val maskRshiftWidth = Wire(UInt(6.W)) 215 maskRshiftWidth := Mux1H( 216 Seq( 217 (vsew === VSew.e16) -> (vuopIdx(2,0) << 3), 218 (vsew === VSew.e32) -> (vuopIdx(2,0) << 2), 219 (vsew === VSew.e64) -> (vuopIdx(2,0) << 1), 220 ) 221 ) 222 srcMaskRShift := (srcMask >> maskRshiftWidth)(4 * numVecModule - 1, 0) 223 val fp_aIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool())) 224 val fp_bIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool())) 225 val inIsFold = Wire(UInt(3.W)) 226 inIsFold := Cat(vecCtrl.fpu.isFoldTo1_8, vecCtrl.fpu.isFoldTo1_4, vecCtrl.fpu.isFoldTo1_2) 227 vfalus.zipWithIndex.foreach { 228 case (mod, i) => 229 mod.io.fire := io.in.valid 230 mod.io.fp_a := vs2Split.io.outVec64b(i) 231 mod.io.fp_b := vs1Split.io.outVec64b(i) 232 mod.io.widen_a := Cat(vs2Split.io.outVec32b(i+numVecModule), vs2Split.io.outVec32b(i)) 233 mod.io.widen_b := Cat(vs1Split.io.outVec32b(i+numVecModule), vs1Split.io.outVec32b(i)) 234 mod.io.frs1 := 0.U // already vf -> vv 235 mod.io.is_frs1 := false.B // already vf -> vv 236 mod.io.mask := Mux(isScalarMove, !vuopIdx.orR, genMaskForMerge(inmask = srcMaskRShift, sew = vsew, i = i)) 237 mod.io.maskForReduction := genMaskForReduction(inmask = srcMaskRShiftForReduction, sew = vsew, i = i) 238 mod.io.uop_idx := vuopIdx(0) 239 mod.io.is_vec := true.B // Todo 240 mod.io.round_mode := rm 241 mod.io.fp_format := Mux(resWiden, vsew + 1.U, vsew) 242 mod.io.opb_widening := opbWiden 243 mod.io.res_widening := resWiden 244 mod.io.op_code := opcode 245 mod.io.is_vfwredosum := fuOpType === VfaluType.vfwredosum 246 mod.io.is_fold := inIsFold 247 mod.io.vs2_fold := vs2 // for better timing 248 resultData(i) := mod.io.fp_result 249 fflagsData(i) := mod.io.fflags 250 fp_aIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & ( 251 ((vsew === VSew.e32) & (!vs2Split.io.outVec64b(i).head(32).andR)) | 252 ((vsew === VSew.e16) & (!vs2Split.io.outVec64b(i).head(48).andR)) 253 ) 254 fp_bIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & ( 255 ((vsew === VSew.e32) & (!vs1Split.io.outVec64b(i).head(32).andR)) | 256 ((vsew === VSew.e16) & (!vs1Split.io.outVec64b(i).head(48).andR)) 257 ) 258 mod.io.fp_aIsFpCanonicalNAN := fp_aIsFpCanonicalNAN(i) 259 mod.io.fp_bIsFpCanonicalNAN := fp_bIsFpCanonicalNAN(i) 260 } 261 val outVuopidx = outVecCtrl.vuopIdx(2, 0) // for vfadd max vuopidx=7 262 val numOfUopVFRED = Wire(UInt(4.W)) 263 val numofUopVFREDReg = RegEnable(numOfUopVFRED, io.in.fire) 264 val vs1Reg = RegEnable(vs1, io.in.fire) 265 val outIsVfRedUnordered = outCtrl.fuOpType === VfaluType.vfredusum || 266 outCtrl.fuOpType === VfaluType.vfredmax || 267 outCtrl.fuOpType === VfaluType.vfredmin 268 val outIsVfRedUnComp = outCtrl.fuOpType === VfaluType.vfredmax || 269 outCtrl.fuOpType === VfaluType.vfredmin 270 val outIsVfRedUnSum = outCtrl.fuOpType === VfaluType.vfredusum 271 val outIsVfRedOrdered = outCtrl.fuOpType === VfaluType.vfredosum || 272 outCtrl.fuOpType === VfaluType.vfwredosum 273 274 val isLastUopRed = outIsVfRedUnordered && outLastUop 275 val resultDataUInt = Mux(isLastUopRed && !existMaskReg, vs1Reg, resultData.asUInt) 276 val cmpResultWidth = dataWidth / 16 277 val cmpResult = Wire(Vec(cmpResultWidth, Bool())) 278 for (i <- 0 until cmpResultWidth) { 279 if(i == 0) { 280 cmpResult(i) := resultDataUInt(0) 281 } 282 else if(i < dataWidth / 64) { 283 cmpResult(i) := Mux1H( 284 Seq( 285 (outVecCtrl.vsew === 1.U) -> resultDataUInt(i*16), 286 (outVecCtrl.vsew === 2.U) -> resultDataUInt(i*32), 287 (outVecCtrl.vsew === 3.U) -> resultDataUInt(i*64) 288 ) 289 ) 290 } 291 else if(i < dataWidth / 32) { 292 cmpResult(i) := Mux1H( 293 Seq( 294 (outVecCtrl.vsew === 1.U) -> resultDataUInt(i * 16), 295 (outVecCtrl.vsew === 2.U) -> resultDataUInt(i * 32), 296 (outVecCtrl.vsew === 3.U) -> false.B 297 ) 298 ) 299 } 300 else if(i < dataWidth / 16) { 301 cmpResult(i) := Mux(outVecCtrl.vsew === 1.U, resultDataUInt(i*16), false.B) 302 } 303 } 304 val outCtrl_s0 = ctrlVec.head 305 val outVecCtrl_s0 = ctrlVec.head.vpu.get 306 val outEew_s0 = Mux(resWiden, outVecCtrl_s0.vsew + 1.U, outVecCtrl_s0.vsew) 307 val outWiden = RegEnable(resWiden, io.in.fire) 308 val outEew = Mux(outWiden, outVecCtrl.vsew + 1.U, outVecCtrl.vsew) 309 val vlMax_s0 = ((VLEN/8).U >> outEew_s0).asUInt 310 val vlMax = ((VLEN/8).U >> outEew).asUInt 311 val outVlmulFix = Mux(outWiden, outVecCtrl.vlmul + 1.U, outVecCtrl.vlmul) 312 val lmulAbs = Mux(outVlmulFix(2), (~outVlmulFix(1,0)).asUInt + 1.U, outVlmulFix(1,0)) 313 // vfmv_f_s need vl=1, reduction last uop need vl=1, other uop need vl=vlmax 314 numOfUopVFRED := { 315 // addTime include add frs1 316 val addTime = MuxLookup(outVecCtrl_s0.vlmul, 1.U(4.W))(Seq( 317 VLmul.m2 -> 2.U, 318 VLmul.m4 -> 4.U, 319 VLmul.m8 -> 8.U, 320 )) 321 val foldLastVlmul = MuxLookup(outVecCtrl_s0.vsew, "b000".U)(Seq( 322 VSew.e16 -> VLmul.mf8, 323 VSew.e32 -> VLmul.mf4, 324 VSew.e64 -> VLmul.mf2, 325 )) 326 // lmul < 1, foldTime = vlmul - foldFastVlmul 327 // lmul >= 1, foldTime = 0.U - foldFastVlmul 328 val foldTime = Mux(outVecCtrl_s0.vlmul(2), outVecCtrl_s0.vlmul, 0.U) - foldLastVlmul 329 addTime + foldTime 330 } 331 val reductionVl = Mux((outVecCtrl_s0.vuopIdx === numOfUopVFRED - 1.U) || (outCtrl_s0.fuOpType === VfaluType.vfredosum || outCtrl_s0.fuOpType === VfaluType.vfwredosum), 1.U, vlMax_s0) 332 val outIsResuction = outCtrl.fuOpType === VfaluType.vfredusum || 333 outCtrl.fuOpType === VfaluType.vfredmax || 334 outCtrl.fuOpType === VfaluType.vfredmin || 335 outCtrl.fuOpType === VfaluType.vfredosum || 336 outCtrl.fuOpType === VfaluType.vfwredosum 337 val outIsResuction_s0 = outCtrl_s0.fuOpType === VfaluType.vfredusum || 338 outCtrl_s0.fuOpType === VfaluType.vfredmax || 339 outCtrl_s0.fuOpType === VfaluType.vfredmin || 340 outCtrl_s0.fuOpType === VfaluType.vfredosum || 341 outCtrl_s0.fuOpType === VfaluType.vfwredosum 342 val outVConfig_s0 = if(!cfg.vconfigWakeUp) outVecCtrl_s0.vconfig else dataVec.head.getSrcVConfig.asTypeOf(new VConfig) 343 val outVl_s0 = outVConfig_s0.vl 344 val outVlFix_s0 = Mux( 345 outVecCtrl_s0.fpu.isFpToVecInst || (outCtrl_s0.fuOpType === VfaluType.vfmv_f_s), 346 1.U, 347 Mux( 348 outCtrl_s0.fuOpType === VfaluType.vfmv_s_f, 349 outVl_s0.orR, 350 Mux(outIsResuction_s0, reductionVl, outVl_s0) 351 ) 352 ) 353 val outVlFix = RegEnable(outVlFix_s0,io.in.fire) 354 355 val vlMaxAllUop = Wire(outVl.cloneType) 356 vlMaxAllUop := Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax << lmulAbs).asUInt 357 val vlMaxThisUop = Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax).asUInt 358 val vlSetThisUop = Mux(outVlFix > outVuopidx*vlMaxThisUop, outVlFix - outVuopidx*vlMaxThisUop, 0.U) 359 val vlThisUop = Wire(UInt(4.W)) 360 vlThisUop := Mux(vlSetThisUop < vlMaxThisUop, vlSetThisUop, vlMaxThisUop) 361 val vlMaskRShift = Wire(UInt((4 * numVecModule).W)) 362 vlMaskRShift := Fill(4 * numVecModule, 1.U(1.W)) >> ((4 * numVecModule).U - vlThisUop) 363 364 val outVuopidxForRed = outVecCtrl.vuopIdx(3, 0) // lmul=8 sew=16, (4+2+1)(vector)+(1+1+1)(fold)+(1)(scala) max vuopIdx=10 365 val outIsFisrtGroup = outVuopidxForRed === 0.U || 366 (outVuopidxForRed === 1.U && (outVlmul === VLmul.m4 || outVlmul === VLmul.m8)) || 367 ((outVuopidxForRed === 2.U || outVuopidxForRed === 3.U) && outVlmul === VLmul.m8) 368 val firstNeedFFlags = outIsFisrtGroup && outIsVfRedUnComp 369 val lastNeedFFlags = outVecCtrl.lastUop && outIsVfRedUnComp 370 private val needNoMask = outCtrl.fuOpType === VfaluType.vfmerge || 371 outCtrl.fuOpType === VfaluType.vfmv_s_f || 372 outIsResuction || 373 outVecCtrl.fpu.isFpToVecInst 374 val maskToMgu = Mux(needNoMask, allMaskTrue, outSrcMask) 375 val allFFlagsEn = Wire(Vec(4*numVecModule,Bool())) 376 val outSrcMaskRShift = Wire(UInt((4*numVecModule).W)) 377 outSrcMaskRShift := (maskToMgu >> (outVecCtrl.vuopIdx(2,0) * vlMax))(4*numVecModule-1,0) 378 val f16FFlagsEn = outSrcMaskRShift 379 val f32FFlagsEn = Wire(Vec(numVecModule,UInt(4.W))) 380 val f64FFlagsEn = Wire(Vec(numVecModule, UInt(4.W))) 381 val f16VlMaskEn = vlMaskRShift 382 val f32VlMaskEn = Wire(Vec(numVecModule, UInt(4.W))) 383 val f64VlMaskEn = Wire(Vec(numVecModule, UInt(4.W))) 384 for (i <- 0 until numVecModule){ 385 f32FFlagsEn(i) := Cat(Fill(2, 0.U), outSrcMaskRShift(2*i+1,2*i)) 386 f64FFlagsEn(i) := Cat(Fill(3, 0.U), outSrcMaskRShift(i)) 387 f32VlMaskEn(i) := Cat(Fill(2, 0.U), vlMaskRShift(2 * i + 1, 2 * i)) 388 f64VlMaskEn(i) := Cat(Fill(3, 0.U), vlMaskRShift(i)) 389 } 390 val fflagsEn= Mux1H( 391 Seq( 392 (outEew === 1.U) -> f16FFlagsEn.asUInt, 393 (outEew === 2.U) -> f32FFlagsEn.asUInt, 394 (outEew === 3.U) -> f64FFlagsEn.asUInt 395 ) 396 ) 397 val vlMaskEn = Mux1H( 398 Seq( 399 (outEew === 1.U) -> f16VlMaskEn.asUInt, 400 (outEew === 2.U) -> f32VlMaskEn.asUInt, 401 (outEew === 3.U) -> f64VlMaskEn.asUInt 402 ) 403 ) 404 val fflagsRedMask = genMaskForRedFFlag(outVecCtrl.vsew) 405 406 if (backendParams.debugEn){ 407 dontTouch(allFFlagsEn) 408 dontTouch(fflagsRedMask) 409 } 410 allFFlagsEn := Mux(outIsResuction, Cat(Fill(4*numVecModule - 1, firstNeedFFlags || outIsVfRedUnSum && !outVecCtrl.lastUop) & fflagsRedMask(4*numVecModule - 1, 1), 411 lastNeedFFlags || firstNeedFFlags || outIsVfRedOrdered || outIsVfRedUnSum), fflagsEn & vlMaskEn).asTypeOf(allFFlagsEn) 412 413 val allFFlags = fflagsData.asTypeOf(Vec( 4*numVecModule,UInt(5.W))) 414 val outFFlags = allFFlagsEn.zip(allFFlags).map{ 415 case(en,fflags) => Mux(en, fflags, 0.U(5.W)) 416 }.reduce(_ | _) 417 418 419 val cmpResultOldVd = Wire(UInt(cmpResultWidth.W)) 420 val cmpResultOldVdRshiftWidth = Wire(UInt(6.W)) 421 cmpResultOldVdRshiftWidth := Mux1H( 422 Seq( 423 (outVecCtrl.vsew === VSew.e16) -> (outVecCtrl.vuopIdx(2, 0) << 3), 424 (outVecCtrl.vsew === VSew.e32) -> (outVecCtrl.vuopIdx(2, 0) << 2), 425 (outVecCtrl.vsew === VSew.e64) -> (outVecCtrl.vuopIdx(2, 0) << 1), 426 ) 427 ) 428 cmpResultOldVd := (outOldVd >> cmpResultOldVdRshiftWidth)(4*numVecModule-1,0) 429 val cmpResultForMgu = Wire(Vec(cmpResultWidth, Bool())) 430 private val maxVdIdx = 8 431 private val elementsInOneUop = Mux1H( 432 Seq( 433 (outEew === 1.U) -> (cmpResultWidth).U(4.W), 434 (outEew === 2.U) -> (cmpResultWidth / 2).U(4.W), 435 (outEew === 3.U) -> (cmpResultWidth / 4).U(4.W), 436 ) 437 ) 438 private val vdIdx = outVecCtrl.vuopIdx(2, 0) 439 private val elementsComputed = Mux1H(Seq.tabulate(maxVdIdx)(i => (vdIdx === i.U) -> (elementsInOneUop * i.U))) 440 for (i <- 0 until cmpResultWidth) { 441 val cmpResultWithVmask = Mux(outSrcMaskRShift(i), cmpResult(i), Mux(outVecCtrl.vma, true.B, cmpResultOldVd(i))) 442 cmpResultForMgu(i) := Mux(elementsComputed +& i.U >= outVl, true.B, cmpResultWithVmask) 443 } 444 val outIsFold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 || outVecCtrl.fpu.isFoldTo1_8 445 val outOldVdForREDO = Mux1H(Seq( 446 (outVecCtrl.vsew === VSew.e16) -> (outOldVd >> 16), 447 (outVecCtrl.vsew === VSew.e32) -> (outOldVd >> 32), 448 (outVecCtrl.vsew === VSew.e64) -> (outOldVd >> 64), 449 )) 450 val outOldVdForWREDO = Mux( 451 !outIsFold, 452 Mux(outVecCtrl.vsew === VSew.e16, Cat(outOldVd(VLEN-1-16,16), 0.U(32.W)), Cat(outOldVd(VLEN-1-32,32), 0.U(64.W))), 453 Mux(outVecCtrl.vsew === VSew.e16, 454 // Divide vuopIdx by 8 and the remainder is 1 455 Mux(outVecCtrl.vuopIdx(2,0) === 1.U, outOldVd, outOldVd >> 16), 456 // Divide vuopIdx by 4 and the remainder is 1 457 Mux(outVecCtrl.vuopIdx(1,0) === 1.U, outOldVd, outOldVd >> 32) 458 ), 459 ) 460 val outOldVdForRED = Mux(outCtrl.fuOpType === VfaluType.vfredosum, outOldVdForREDO, outOldVdForWREDO) 461 val numOfUopVFREDOSUM = { 462 val uvlMax = MuxLookup(outVecCtrl.vsew, 0.U)(Seq( 463 VSew.e16 -> 8.U, 464 VSew.e32 -> 4.U, 465 VSew.e64 -> 2.U, 466 )) 467 val vlMax = Mux(outVecCtrl.vlmul(2), uvlMax >> (-outVecCtrl.vlmul)(1, 0), uvlMax << outVecCtrl.vlmul(1, 0)).asUInt 468 vlMax 469 } 470 val isLastUopForREDO = outVecCtrl.lastUop 471 val isOutOldVdForREDO = ((outCtrl.fuOpType === VfaluType.vfredosum && outIsFold) || outCtrl.fuOpType === VfaluType.vfwredosum) && !isLastUopForREDO 472 val taIsFalseForVFREDO = ((outCtrl.fuOpType === VfaluType.vfredosum) || (outCtrl.fuOpType === VfaluType.vfwredosum)) && (outVecCtrl.vuopIdx =/= numOfUopVFREDOSUM - 1.U) 473 // outVecCtrl.fpu.isFpToVecInst means the instruction is float instruction, not vector float instruction 474 val notUseVl = outVecCtrl.fpu.isFpToVecInst || (outCtrl.fuOpType === VfaluType.vfmv_f_s) 475 val notModifyVd = !notUseVl && (outVl === 0.U) 476 mgu.io.in.vd := Mux(outVecCtrl.isDstMask, Cat(0.U((dataWidth / 16 * 15).W), cmpResultForMgu.asUInt), resultDataUInt) 477 mgu.io.in.oldVd := Mux(isOutOldVdForREDO, outOldVdForRED, outOldVd) 478 mgu.io.in.mask := maskToMgu 479 mgu.io.in.info.ta := Mux(outCtrl.fuOpType === VfaluType.vfmv_f_s, true.B , Mux(taIsFalseForVFREDO, false.B, outVecCtrl.vta)) 480 mgu.io.in.info.ma := Mux(outCtrl.fuOpType === VfaluType.vfmv_s_f, true.B , outVecCtrl.vma) 481 mgu.io.in.info.vl := outVlFix 482 mgu.io.in.info.vstart := outVecCtrl.vstart 483 mgu.io.in.info.vlmul := outVecCtrl.vlmul 484 mgu.io.in.info.valid := Mux(notModifyVd, false.B, io.in.valid) 485 mgu.io.in.info.vstart := Mux(outVecCtrl.fpu.isFpToVecInst, 0.U, outVecCtrl.vstart) 486 mgu.io.in.info.eew := RegEnable(outEew_s0,io.in.fire) 487 mgu.io.in.info.vsew := outVecCtrl.vsew 488 mgu.io.in.info.vdIdx := RegEnable(Mux(outIsResuction_s0, 0.U, outVecCtrl_s0.vuopIdx), io.in.fire) 489 mgu.io.in.info.narrow := outVecCtrl.isNarrow 490 mgu.io.in.info.dstMask := outVecCtrl.isDstMask 491 mgu.io.in.isIndexedVls := false.B 492 mgtu.io.in.vd := Mux(outVecCtrl.isDstMask, mgu.io.out.vd, resultDataUInt) 493 mgtu.io.in.vl := outVl 494 val resultFpMask = Wire(UInt(VLEN.W)) 495 val isFclass = outVecCtrl.fpu.isFpToVecInst && (outCtrl.fuOpType === VfaluType.vfclass) 496 val fpCmpFuOpType = Seq(VfaluType.vfeq, VfaluType.vflt, VfaluType.vfle) 497 val isCmp = outVecCtrl.fpu.isFpToVecInst && (fpCmpFuOpType.map(_ === outCtrl.fuOpType).reduce(_|_)) 498 resultFpMask := Mux(isFclass || isCmp, Fill(16, 1.U(1.W)), Fill(VLEN, 1.U(1.W))) 499 // when dest is mask, the result need to be masked by mgtu 500 io.out.bits.res.data := Mux(notModifyVd, outOldVd, Mux(outVecCtrl.isDstMask, mgtu.io.out.vd, mgu.io.out.vd) & resultFpMask) 501 io.out.bits.res.fflags.get := Mux(notModifyVd, 0.U(5.W), outFFlags) 502 io.out.bits.ctrl.exceptionVec.get(ExceptionNO.illegalInstr) := mgu.io.out.illegal 503 504} 505 506class VFMgu(vlen:Int)(implicit p: Parameters) extends Module{ 507 val io = IO(new VFMguIO(vlen)) 508 509 val vd = io.in.vd 510 val oldvd = io.in.oldVd 511 val mask = io.in.mask 512 val vsew = io.in.info.eew 513 val num16bits = vlen / 16 514 515} 516 517class VFMguIO(vlen: Int)(implicit p: Parameters) extends Bundle { 518 val in = new Bundle { 519 val vd = Input(UInt(vlen.W)) 520 val oldVd = Input(UInt(vlen.W)) 521 val mask = Input(UInt(vlen.W)) 522 val info = Input(new VecInfo) 523 } 524 val out = new Bundle { 525 val vd = Output(UInt(vlen.W)) 526 } 527}