xref: /XiangShan/src/main/scala/xiangshan/backend/fu/wrapper/VFALU.scala (revision c41f725a91c55e75c95c55b4bb0d2649f43e4c83)
1package xiangshan.backend.fu.wrapper
2
3import org.chipsalliance.cde.config.Parameters
4import chisel3._
5import chisel3.util._
6import utility.XSError
7import xiangshan.backend.fu.FuConfig
8import xiangshan.backend.fu.vector.Bundles.{VLmul, VSew}
9import xiangshan.backend.fu.vector.utils.VecDataSplitModule
10import xiangshan.backend.fu.vector.{Mgu, Mgtu, VecInfo, VecPipedFuncUnit}
11import xiangshan.ExceptionNO
12import yunsuan.{VfaluType, VfpuType}
13import yunsuan.vector.VectorFloatAdder
14import xiangshan.backend.fu.vector.Bundles.VConfig
15
16class VFAlu(cfg: FuConfig)(implicit p: Parameters) extends VecPipedFuncUnit(cfg) {
17  XSError(io.in.valid && io.in.bits.ctrl.fuOpType === VfpuType.dummy, "Vfalu OpType not supported")
18
19  // params alias
20  private val dataWidth = cfg.destDataBits
21  private val dataWidthOfDataModule = 64
22  private val numVecModule = dataWidth / dataWidthOfDataModule
23
24  // io alias
25  private val opcode  = fuOpType(4,0)
26  private val resWiden  = fuOpType(5)
27  private val opbWiden  = fuOpType(6)
28
29  // modules
30  private val vfalus = Seq.fill(numVecModule)(Module(new VectorFloatAdder))
31  private val vs2Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
32  private val vs1Split = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
33  private val oldVdSplit  = Module(new VecDataSplitModule(dataWidth, dataWidthOfDataModule))
34  private val mgu = Module(new Mgu(dataWidth))
35  private val mgtu = Module(new Mgtu(dataWidth))
36
37  /**
38    * In connection of [[vs2Split]], [[vs1Split]] and [[oldVdSplit]]
39    */
40  vs2Split.io.inVecData := vs2
41  vs1Split.io.inVecData := vs1
42  oldVdSplit.io.inVecData := oldVd
43
44  /**
45    * [[vfalus]]'s in connection
46    */
47  // Vec(vs2(31,0), vs2(63,32), vs2(95,64), vs2(127,96)) ==>
48  // Vec(
49  //   Cat(vs2(95,64),  vs2(31,0)),
50  //   Cat(vs2(127,96), vs2(63,32)),
51  // )
52  private val vs2GroupedVec: Vec[UInt] = VecInit(vs2Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq)
53  private val vs1GroupedVec: Vec[UInt] = VecInit(vs1Split.io.outVec32b.zipWithIndex.groupBy(_._2 % 2).map(x => x._1 -> x._2.map(_._1)).values.map(x => Cat(x.reverse)).toSeq)
54  private val resultData = Wire(Vec(numVecModule,UInt(dataWidthOfDataModule.W)))
55  private val fflagsData = Wire(Vec(numVecModule,UInt(20.W)))
56  private val srcMaskRShiftForReduction = Wire(UInt((8 * numVecModule).W))
57  // for reduction
58  val isFirstGroupUop = vuopIdx === 0.U ||
59    (vuopIdx === 1.U && (vlmul === VLmul.m4 || vlmul === VLmul.m8)) ||
60    ((vuopIdx === 2.U || vuopIdx === 3.U) && vlmul === VLmul.m8)
61  val maskRshiftWidthForReduction = Wire(UInt(6.W))
62  maskRshiftWidthForReduction := Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum,
63    vuopIdx,
64    Mux1H(Seq(
65      (vsew === VSew.e16) -> (vuopIdx(1, 0) << 4),
66      (vsew === VSew.e32) -> (vuopIdx(1, 0) << 3),
67      (vsew === VSew.e64) -> (vuopIdx(1, 0) << 2),
68    ))
69  )
70  val vlMaskForReduction = (~(Fill(VLEN, 1.U) << vl)).asUInt
71  srcMaskRShiftForReduction := ((srcMask & vlMaskForReduction) >> maskRshiftWidthForReduction)(8 * numVecModule - 1, 0)
72  val existMask = (srcMask & vlMaskForReduction).orR
73  val existMaskReg = RegEnable(existMask, io.in.fire)
74
75
76  def genMaskForReduction(inmask: UInt, sew: UInt, i: Int): UInt = {
77    val f64MaskNum = dataWidth / 64 * 2
78    val f32MaskNum = dataWidth / 32 * 2
79    val f16MaskNum = dataWidth / 16 * 2
80    val f64Mask = inmask(f64MaskNum - 1, 0)
81    val f32Mask = inmask(f32MaskNum - 1, 0)
82    val f16Mask = inmask(f16MaskNum - 1, 0)
83    // vs2 reordered, so mask use high bits
84    val f64FirstFoldMaskUnorder = Mux1H(
85      Seq(
86        vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(0), 0.U(3.W), f64Mask(1)),
87      )
88    )
89    val f64FirstFoldMaskOrder = Mux1H(
90      Seq(
91        vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(3.W), f64Mask(1), 0.U(3.W), f64Mask(0))
92      )
93    )
94    val f32FirstFoldMaskUnorder = Mux1H(
95      Seq(
96        vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(1), f32Mask(0), 0.U(2.W), f32Mask(3), f32Mask(2)),
97        vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(0), 0.U(3.W), f32Mask(1)),
98      )
99    )
100    val f32FirstFoldMaskOrder = Mux1H(
101      Seq(
102        vecCtrl.fpu.isFoldTo1_2 -> Cat(0.U(2.W), f32Mask(3), f32Mask(2), 0.U(2.W), f32Mask(1), f32Mask(0)),
103        vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(3.W), f32Mask(1), 0.U(3.W), f32Mask(0)),
104      )
105    )
106    val f16FirstFoldMaskUnorder = Mux1H(
107      Seq(
108        vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(3,0), f16Mask(7,4)),
109        vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(1), f16Mask(0), 0.U(2.W), f16Mask(3), f16Mask(2)),
110        vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(0), 0.U(3.W), f16Mask(1)),
111      )
112    )
113    val f16FirstFoldMaskOrder = Mux1H(
114      Seq(
115        vecCtrl.fpu.isFoldTo1_2 -> Cat(f16Mask(7,4), f16Mask(3,0)),
116        vecCtrl.fpu.isFoldTo1_4 -> Cat(0.U(2.W), f16Mask(3), f16Mask(2), 0.U(2.W), f16Mask(1), f16Mask(0)),
117        vecCtrl.fpu.isFoldTo1_8 -> Cat(0.U(3.W), f16Mask(1), 0.U(3.W), f16Mask(0)),
118      )
119    )
120    val f64FoldMask = Mux1H(
121      Seq(
122        vecCtrl.fpu.isFoldTo1_2 -> "b00010001".U,
123      )
124    )
125    val f32FoldMask = Mux1H(
126      Seq(
127        vecCtrl.fpu.isFoldTo1_2 -> "b00110011".U,
128        vecCtrl.fpu.isFoldTo1_4 -> "b00010001".U,
129      )
130    )
131    val f16FoldMask = Mux1H(
132      Seq(
133        vecCtrl.fpu.isFoldTo1_2 -> "b11111111".U,
134        vecCtrl.fpu.isFoldTo1_4 -> "b00110011".U,
135        vecCtrl.fpu.isFoldTo1_8 -> "b00010001".U,
136      )
137    )
138    // low 4 bits for vs2(fp_a), high 4 bits for vs1(fp_b),
139    val isFold = vecCtrl.fpu.isFoldTo1_2 || vecCtrl.fpu.isFoldTo1_4 || vecCtrl.fpu.isFoldTo1_8
140    val f64FirstNotFoldMask = Cat(0.U(3.W), f64Mask(i + 2), 0.U(3.W), f64Mask(i))
141    val f32FirstNotFoldMask = Cat(0.U(2.W), f32Mask(i * 2 + 5, i * 2 + 4), 0.U(2.W), Cat(f32Mask(i * 2 + 1, i * 2)))
142    val f16FirstNotFoldMask = Cat(f16Mask(i * 4 + 11, i * 4 + 8), f16Mask(i * 4 + 3, i * 4))
143    val f64MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum,
144      Mux(isFold, f64FirstFoldMaskOrder, f64FirstNotFoldMask),
145      Mux(isFirstGroupUop,
146        Mux(isFold, f64FirstFoldMaskUnorder, f64FirstNotFoldMask),
147        Mux(isFold, f64FoldMask, Fill(8, 1.U))))
148    val f32MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum,
149      Mux(isFold, f32FirstFoldMaskOrder, f32FirstNotFoldMask),
150      Mux(isFirstGroupUop,
151        Mux(isFold, f32FirstFoldMaskUnorder, f32FirstNotFoldMask),
152        Mux(isFold, f32FoldMask, Fill(8, 1.U))))
153    val f16MaskI = Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum,
154      Mux(isFold, f16FirstFoldMaskOrder, f16FirstNotFoldMask),
155      Mux(isFirstGroupUop,
156        Mux(isFold, f16FirstFoldMaskUnorder, f16FirstNotFoldMask),
157        Mux(isFold, f16FoldMask, Fill(8, 1.U))))
158    val outMask = Mux1H(
159      Seq(
160        (sew === 3.U) -> f64MaskI,
161        (sew === 2.U) -> f32MaskI,
162        (sew === 1.U) -> f16MaskI,
163      )
164    )
165    Mux(fuOpType === VfaluType.vfredosum || fuOpType === VfaluType.vfwredosum, outMask(0),outMask)
166  }
167  def genMaskForMerge(inmask:UInt, sew:UInt, i:Int): UInt = {
168    val f64MaskNum = dataWidth / 64
169    val f32MaskNum = dataWidth / 32
170    val f16MaskNum = dataWidth / 16
171    val f64Mask = inmask(f64MaskNum-1,0)
172    val f32Mask = inmask(f32MaskNum-1,0)
173    val f16Mask = inmask(f16MaskNum-1,0)
174    val f64MaskI = Cat(0.U(3.W),f64Mask(i))
175    val f32MaskI = Cat(0.U(2.W),f32Mask(2*i+1,2*i))
176    val f16MaskI = f16Mask(4*i+3,4*i)
177    val outMask = Mux1H(
178      Seq(
179        (sew === 3.U) -> f64MaskI,
180        (sew === 2.U) -> f32MaskI,
181        (sew === 1.U) -> f16MaskI,
182      )
183    )
184    outMask
185  }
186  def genMaskForRedFFlag(sew:UInt): UInt = {
187    val default = "b11111111".U
188    val f64FoldMask = Mux(outVecCtrl.fpu.isFoldTo1_2, "b00000001".U, default)
189    val f32Fold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4
190    val f32FoldMask = Mux1H(
191      Seq(
192        outVecCtrl.fpu.isFoldTo1_2 -> "b00000011".U,
193        outVecCtrl.fpu.isFoldTo1_4 -> "b00000001".U,
194      )
195    )
196    val f16Fold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 || outVecCtrl.fpu.isFoldTo1_8
197    val f16FoldMask = Mux1H(
198      Seq(
199        outVecCtrl.fpu.isFoldTo1_2 -> "b00001111".U,
200        outVecCtrl.fpu.isFoldTo1_4 -> "b00000011".U,
201        outVecCtrl.fpu.isFoldTo1_8 -> "b00000001".U,
202      )
203    )
204    Mux1H(
205      Seq(
206        (sew === 3.U) -> f64FoldMask,
207        (sew === 2.U) -> Mux(f32Fold, f32FoldMask, default),
208        (sew === 1.U) -> Mux(f16Fold, f16FoldMask, default),
209      )
210    )
211  }
212  val isScalarMove = (fuOpType === VfaluType.vfmv_f_s) || (fuOpType === VfaluType.vfmv_s_f)
213  val srcMaskRShift = Wire(UInt((4 * numVecModule).W))
214  val maskRshiftWidth = Wire(UInt(6.W))
215  maskRshiftWidth := Mux1H(
216    Seq(
217      (vsew === VSew.e16) -> (vuopIdx(2,0) << 3),
218      (vsew === VSew.e32) -> (vuopIdx(2,0) << 2),
219      (vsew === VSew.e64) -> (vuopIdx(2,0) << 1),
220    )
221  )
222  srcMaskRShift := (srcMask >> maskRshiftWidth)(4 * numVecModule - 1, 0)
223  val fp_aIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool()))
224  val fp_bIsFpCanonicalNAN = Wire(Vec(numVecModule,Bool()))
225  val inIsFold = Wire(UInt(3.W))
226  inIsFold := Cat(vecCtrl.fpu.isFoldTo1_8, vecCtrl.fpu.isFoldTo1_4, vecCtrl.fpu.isFoldTo1_2)
227  vfalus.zipWithIndex.foreach {
228    case (mod, i) =>
229      mod.io.fire             := io.in.valid
230      mod.io.fp_a             := vs2Split.io.outVec64b(i)
231      mod.io.fp_b             := vs1Split.io.outVec64b(i)
232      mod.io.widen_a          := Cat(vs2Split.io.outVec32b(i+numVecModule), vs2Split.io.outVec32b(i))
233      mod.io.widen_b          := Cat(vs1Split.io.outVec32b(i+numVecModule), vs1Split.io.outVec32b(i))
234      mod.io.frs1             := 0.U     // already vf -> vv
235      mod.io.is_frs1          := false.B // already vf -> vv
236      mod.io.mask             := Mux(isScalarMove, !vuopIdx.orR, genMaskForMerge(inmask = srcMaskRShift, sew = vsew, i = i))
237      mod.io.maskForReduction := genMaskForReduction(inmask = srcMaskRShiftForReduction, sew = vsew, i = i)
238      mod.io.uop_idx          := vuopIdx(0)
239      mod.io.is_vec           := true.B // Todo
240      mod.io.round_mode       := rm
241      mod.io.fp_format        := Mux(resWiden, vsew + 1.U, vsew)
242      mod.io.opb_widening     := opbWiden
243      mod.io.res_widening     := resWiden
244      mod.io.op_code          := opcode
245      mod.io.is_vfwredosum    := fuOpType === VfaluType.vfwredosum
246      mod.io.is_fold          := inIsFold
247      mod.io.vs2_fold         := vs2      // for better timing
248      resultData(i)           := mod.io.fp_result
249      fflagsData(i)           := mod.io.fflags
250      fp_aIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & (
251          ((vsew === VSew.e32) & (!vs2Split.io.outVec64b(i).head(32).andR)) |
252          ((vsew === VSew.e16) & (!vs2Split.io.outVec64b(i).head(48).andR))
253        )
254      fp_bIsFpCanonicalNAN(i) := vecCtrl.fpu.isFpToVecInst & (
255          ((vsew === VSew.e32) & (!vs1Split.io.outVec64b(i).head(32).andR)) |
256          ((vsew === VSew.e16) & (!vs1Split.io.outVec64b(i).head(48).andR))
257        )
258      mod.io.fp_aIsFpCanonicalNAN := fp_aIsFpCanonicalNAN(i)
259      mod.io.fp_bIsFpCanonicalNAN := fp_bIsFpCanonicalNAN(i)
260  }
261  val outVuopidx = outVecCtrl.vuopIdx(2, 0) // for vfadd max vuopidx=7
262  val numOfUopVFRED = Wire(UInt(4.W))
263  val numofUopVFREDReg = RegEnable(numOfUopVFRED, io.in.fire)
264  val vs1Reg = RegEnable(vs1, io.in.fire)
265  val outIsVfRedUnordered = outCtrl.fuOpType === VfaluType.vfredusum ||
266    outCtrl.fuOpType === VfaluType.vfredmax ||
267    outCtrl.fuOpType === VfaluType.vfredmin
268  val outIsVfRedUnComp = outCtrl.fuOpType === VfaluType.vfredmax ||
269    outCtrl.fuOpType === VfaluType.vfredmin
270  val outIsVfRedUnSum = outCtrl.fuOpType === VfaluType.vfredusum
271  val outIsVfRedOrdered = outCtrl.fuOpType === VfaluType.vfredosum ||
272    outCtrl.fuOpType === VfaluType.vfwredosum
273
274  val isLastUopRed = outIsVfRedUnordered && outLastUop
275  val resultDataUInt = Mux(isLastUopRed && !existMaskReg, vs1Reg, resultData.asUInt)
276  val cmpResultWidth = dataWidth / 16
277  val cmpResult = Wire(Vec(cmpResultWidth, Bool()))
278  for (i <- 0 until cmpResultWidth) {
279    if(i == 0) {
280      cmpResult(i) := resultDataUInt(0)
281    }
282    else if(i < dataWidth / 64) {
283      cmpResult(i) := Mux1H(
284        Seq(
285          (outVecCtrl.vsew === 1.U) -> resultDataUInt(i*16),
286          (outVecCtrl.vsew === 2.U) -> resultDataUInt(i*32),
287          (outVecCtrl.vsew === 3.U) -> resultDataUInt(i*64)
288        )
289      )
290    }
291    else if(i < dataWidth / 32) {
292      cmpResult(i) := Mux1H(
293        Seq(
294          (outVecCtrl.vsew === 1.U) -> resultDataUInt(i * 16),
295          (outVecCtrl.vsew === 2.U) -> resultDataUInt(i * 32),
296          (outVecCtrl.vsew === 3.U) -> false.B
297        )
298      )
299    }
300    else if(i <  dataWidth / 16) {
301      cmpResult(i) := Mux(outVecCtrl.vsew === 1.U, resultDataUInt(i*16), false.B)
302    }
303  }
304  val outCtrl_s0 = ctrlVec.head
305  val outVecCtrl_s0 = ctrlVec.head.vpu.get
306  val outEew_s0 = Mux(resWiden, outVecCtrl_s0.vsew + 1.U, outVecCtrl_s0.vsew)
307  val outWiden = RegEnable(resWiden, io.in.fire)
308  val outEew = Mux(outWiden, outVecCtrl.vsew + 1.U, outVecCtrl.vsew)
309  val vlMax_s0 = ((VLEN/8).U >> outEew_s0).asUInt
310  val vlMax = ((VLEN/8).U >> outEew).asUInt
311  val outVlmulFix = Mux(outWiden, outVecCtrl.vlmul + 1.U, outVecCtrl.vlmul)
312  val lmulAbs = Mux(outVlmulFix(2), (~outVlmulFix(1,0)).asUInt + 1.U, outVlmulFix(1,0))
313  //  vfmv_f_s need vl=1, reduction last uop need vl=1, other uop need vl=vlmax
314  numOfUopVFRED := {
315    // addTime include add frs1
316    val addTime = MuxLookup(outVecCtrl_s0.vlmul, 1.U(4.W))(Seq(
317      VLmul.m2 -> 2.U,
318      VLmul.m4 -> 4.U,
319      VLmul.m8 -> 8.U,
320    ))
321    val foldLastVlmul = MuxLookup(outVecCtrl_s0.vsew, "b000".U)(Seq(
322      VSew.e16 -> VLmul.mf8,
323      VSew.e32 -> VLmul.mf4,
324      VSew.e64 -> VLmul.mf2,
325    ))
326    // lmul < 1, foldTime = vlmul - foldFastVlmul
327    // lmul >= 1, foldTime = 0.U - foldFastVlmul
328    val foldTime = Mux(outVecCtrl_s0.vlmul(2), outVecCtrl_s0.vlmul, 0.U) - foldLastVlmul
329    addTime + foldTime
330  }
331  val reductionVl = Mux((outVecCtrl_s0.vuopIdx ===  numOfUopVFRED - 1.U) || (outCtrl_s0.fuOpType === VfaluType.vfredosum || outCtrl_s0.fuOpType === VfaluType.vfwredosum), 1.U, vlMax_s0)
332  val outIsResuction = outCtrl.fuOpType === VfaluType.vfredusum ||
333    outCtrl.fuOpType === VfaluType.vfredmax ||
334    outCtrl.fuOpType === VfaluType.vfredmin ||
335    outCtrl.fuOpType === VfaluType.vfredosum ||
336    outCtrl.fuOpType === VfaluType.vfwredosum
337  val outIsResuction_s0 = outCtrl_s0.fuOpType === VfaluType.vfredusum ||
338    outCtrl_s0.fuOpType === VfaluType.vfredmax ||
339    outCtrl_s0.fuOpType === VfaluType.vfredmin ||
340    outCtrl_s0.fuOpType === VfaluType.vfredosum ||
341    outCtrl_s0.fuOpType === VfaluType.vfwredosum
342  val outVConfig_s0  = if(!cfg.vconfigWakeUp) outVecCtrl_s0.vconfig else dataVec.head.getSrcVConfig.asTypeOf(new VConfig)
343  val outVl_s0       = outVConfig_s0.vl
344  val outVlFix_s0 = Mux(
345    outVecCtrl_s0.fpu.isFpToVecInst || (outCtrl_s0.fuOpType === VfaluType.vfmv_f_s),
346    1.U,
347    Mux(
348      outCtrl_s0.fuOpType === VfaluType.vfmv_s_f,
349      outVl_s0.orR,
350      Mux(outIsResuction_s0, reductionVl, outVl_s0)
351    )
352  )
353  val outVlFix = RegEnable(outVlFix_s0,io.in.fire)
354
355  val vlMaxAllUop = Wire(outVl.cloneType)
356  vlMaxAllUop := Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax << lmulAbs).asUInt
357  val vlMaxThisUop = Mux(outVecCtrl.vlmul(2), vlMax >> lmulAbs, vlMax).asUInt
358  val vlSetThisUop = Mux(outVlFix > outVuopidx*vlMaxThisUop, outVlFix - outVuopidx*vlMaxThisUop, 0.U)
359  val vlThisUop = Wire(UInt(4.W))
360  vlThisUop := Mux(vlSetThisUop < vlMaxThisUop, vlSetThisUop, vlMaxThisUop)
361  val vlMaskRShift = Wire(UInt((4 * numVecModule).W))
362  vlMaskRShift := Fill(4 * numVecModule, 1.U(1.W)) >> ((4 * numVecModule).U - vlThisUop)
363
364  val outVuopidxForRed = outVecCtrl.vuopIdx(3, 0) // lmul=8 sew=16, (4+2+1)(vector)+(1+1+1)(fold)+(1)(scala) max vuopIdx=10
365  val outIsFisrtGroup = outVuopidxForRed === 0.U ||
366    (outVuopidxForRed === 1.U && (outVlmul === VLmul.m4 || outVlmul === VLmul.m8)) ||
367    ((outVuopidxForRed === 2.U || outVuopidxForRed === 3.U) && outVlmul === VLmul.m8)
368  val firstNeedFFlags = outIsFisrtGroup  && outIsVfRedUnComp
369  val lastNeedFFlags = outVecCtrl.lastUop && outIsVfRedUnComp
370  private val needNoMask = outCtrl.fuOpType === VfaluType.vfmerge ||
371    outCtrl.fuOpType === VfaluType.vfmv_s_f ||
372    outIsResuction ||
373    outVecCtrl.fpu.isFpToVecInst
374  val maskToMgu = Mux(needNoMask, allMaskTrue, outSrcMask)
375  val allFFlagsEn = Wire(Vec(4*numVecModule,Bool()))
376  val outSrcMaskRShift = Wire(UInt((4*numVecModule).W))
377  outSrcMaskRShift := (maskToMgu >> (outVecCtrl.vuopIdx(2,0) * vlMax))(4*numVecModule-1,0)
378  val f16FFlagsEn = outSrcMaskRShift
379  val f32FFlagsEn = Wire(Vec(numVecModule,UInt(4.W)))
380  val f64FFlagsEn = Wire(Vec(numVecModule, UInt(4.W)))
381  val f16VlMaskEn = vlMaskRShift
382  val f32VlMaskEn = Wire(Vec(numVecModule, UInt(4.W)))
383  val f64VlMaskEn = Wire(Vec(numVecModule, UInt(4.W)))
384  for (i <- 0 until numVecModule){
385    f32FFlagsEn(i) := Cat(Fill(2, 0.U), outSrcMaskRShift(2*i+1,2*i))
386    f64FFlagsEn(i) := Cat(Fill(3, 0.U), outSrcMaskRShift(i))
387    f32VlMaskEn(i) := Cat(Fill(2, 0.U), vlMaskRShift(2 * i + 1, 2 * i))
388    f64VlMaskEn(i) := Cat(Fill(3, 0.U), vlMaskRShift(i))
389  }
390  val fflagsEn= Mux1H(
391    Seq(
392      (outEew === 1.U) -> f16FFlagsEn.asUInt,
393      (outEew === 2.U) -> f32FFlagsEn.asUInt,
394      (outEew === 3.U) -> f64FFlagsEn.asUInt
395    )
396  )
397  val vlMaskEn = Mux1H(
398    Seq(
399      (outEew === 1.U) -> f16VlMaskEn.asUInt,
400      (outEew === 2.U) -> f32VlMaskEn.asUInt,
401      (outEew === 3.U) -> f64VlMaskEn.asUInt
402    )
403  )
404  val fflagsRedMask = genMaskForRedFFlag(outVecCtrl.vsew)
405
406  if (backendParams.debugEn){
407    dontTouch(allFFlagsEn)
408    dontTouch(fflagsRedMask)
409  }
410  allFFlagsEn := Mux(outIsResuction, Cat(Fill(4*numVecModule - 1, firstNeedFFlags || outIsVfRedUnSum && !outVecCtrl.lastUop) & fflagsRedMask(4*numVecModule - 1, 1),
411    lastNeedFFlags || firstNeedFFlags || outIsVfRedOrdered || outIsVfRedUnSum), fflagsEn & vlMaskEn).asTypeOf(allFFlagsEn)
412
413  val allFFlags = fflagsData.asTypeOf(Vec( 4*numVecModule,UInt(5.W)))
414  val outFFlags = allFFlagsEn.zip(allFFlags).map{
415    case(en,fflags) => Mux(en, fflags, 0.U(5.W))
416  }.reduce(_ | _)
417
418
419  val cmpResultOldVd = Wire(UInt(cmpResultWidth.W))
420  val cmpResultOldVdRshiftWidth = Wire(UInt(6.W))
421  cmpResultOldVdRshiftWidth := Mux1H(
422    Seq(
423      (outVecCtrl.vsew === VSew.e16) -> (outVecCtrl.vuopIdx(2, 0) << 3),
424      (outVecCtrl.vsew === VSew.e32) -> (outVecCtrl.vuopIdx(2, 0) << 2),
425      (outVecCtrl.vsew === VSew.e64) -> (outVecCtrl.vuopIdx(2, 0) << 1),
426    )
427  )
428  cmpResultOldVd := (outOldVd >> cmpResultOldVdRshiftWidth)(4*numVecModule-1,0)
429  val cmpResultForMgu = Wire(Vec(cmpResultWidth, Bool()))
430  private val maxVdIdx = 8
431  private val elementsInOneUop = Mux1H(
432    Seq(
433      (outEew === 1.U) -> (cmpResultWidth).U(4.W),
434      (outEew === 2.U) -> (cmpResultWidth / 2).U(4.W),
435      (outEew === 3.U) -> (cmpResultWidth / 4).U(4.W),
436    )
437  )
438  private val vdIdx = outVecCtrl.vuopIdx(2, 0)
439  private val elementsComputed = Mux1H(Seq.tabulate(maxVdIdx)(i => (vdIdx === i.U) -> (elementsInOneUop * i.U)))
440  for (i <- 0 until cmpResultWidth) {
441    val cmpResultWithVmask = Mux(outSrcMaskRShift(i), cmpResult(i), Mux(outVecCtrl.vma, true.B, cmpResultOldVd(i)))
442    cmpResultForMgu(i) := Mux(elementsComputed +& i.U >= outVl, true.B, cmpResultWithVmask)
443  }
444  val outIsFold = outVecCtrl.fpu.isFoldTo1_2 || outVecCtrl.fpu.isFoldTo1_4 || outVecCtrl.fpu.isFoldTo1_8
445  val outOldVdForREDO = Mux1H(Seq(
446    (outVecCtrl.vsew === VSew.e16) -> (outOldVd >> 16),
447    (outVecCtrl.vsew === VSew.e32) -> (outOldVd >> 32),
448    (outVecCtrl.vsew === VSew.e64) -> (outOldVd >> 64),
449  ))
450  val outOldVdForWREDO = Mux(
451    !outIsFold,
452    Mux(outVecCtrl.vsew === VSew.e16, Cat(outOldVd(VLEN-1-16,16), 0.U(32.W)), Cat(outOldVd(VLEN-1-32,32), 0.U(64.W))),
453    Mux(outVecCtrl.vsew === VSew.e16,
454      // Divide vuopIdx by 8 and the remainder is 1
455      Mux(outVecCtrl.vuopIdx(2,0) === 1.U, outOldVd, outOldVd >> 16),
456      // Divide vuopIdx by 4 and the remainder is 1
457      Mux(outVecCtrl.vuopIdx(1,0) === 1.U, outOldVd, outOldVd >> 32)
458    ),
459  )
460  val outOldVdForRED = Mux(outCtrl.fuOpType === VfaluType.vfredosum, outOldVdForREDO, outOldVdForWREDO)
461  val numOfUopVFREDOSUM = {
462    val uvlMax = MuxLookup(outVecCtrl.vsew, 0.U)(Seq(
463      VSew.e16 -> 8.U,
464      VSew.e32 -> 4.U,
465      VSew.e64 -> 2.U,
466    ))
467    val vlMax = Mux(outVecCtrl.vlmul(2), uvlMax >> (-outVecCtrl.vlmul)(1, 0), uvlMax << outVecCtrl.vlmul(1, 0)).asUInt
468    vlMax
469  }
470  val isLastUopForREDO = outVecCtrl.lastUop
471  val isOutOldVdForREDO = ((outCtrl.fuOpType === VfaluType.vfredosum && outIsFold) || outCtrl.fuOpType === VfaluType.vfwredosum) && !isLastUopForREDO
472  val taIsFalseForVFREDO = ((outCtrl.fuOpType === VfaluType.vfredosum) || (outCtrl.fuOpType === VfaluType.vfwredosum)) && (outVecCtrl.vuopIdx =/= numOfUopVFREDOSUM - 1.U)
473  // outVecCtrl.fpu.isFpToVecInst means the instruction is float instruction, not vector float instruction
474  val notUseVl = outVecCtrl.fpu.isFpToVecInst || (outCtrl.fuOpType === VfaluType.vfmv_f_s)
475  val notModifyVd = !notUseVl && (outVl === 0.U)
476  mgu.io.in.vd := Mux(outVecCtrl.isDstMask, Cat(0.U((dataWidth / 16 * 15).W), cmpResultForMgu.asUInt), resultDataUInt)
477  mgu.io.in.oldVd := Mux(isOutOldVdForREDO, outOldVdForRED, outOldVd)
478  mgu.io.in.mask := maskToMgu
479  mgu.io.in.info.ta := Mux(outCtrl.fuOpType === VfaluType.vfmv_f_s, true.B , Mux(taIsFalseForVFREDO, false.B, outVecCtrl.vta))
480  mgu.io.in.info.ma := Mux(outCtrl.fuOpType === VfaluType.vfmv_s_f, true.B , outVecCtrl.vma)
481  mgu.io.in.info.vl := outVlFix
482  mgu.io.in.info.vstart := outVecCtrl.vstart
483  mgu.io.in.info.vlmul := outVecCtrl.vlmul
484  mgu.io.in.info.valid := Mux(notModifyVd, false.B, io.in.valid)
485  mgu.io.in.info.vstart := Mux(outVecCtrl.fpu.isFpToVecInst, 0.U, outVecCtrl.vstart)
486  mgu.io.in.info.eew :=  RegEnable(outEew_s0,io.in.fire)
487  mgu.io.in.info.vsew := outVecCtrl.vsew
488  mgu.io.in.info.vdIdx := RegEnable(Mux(outIsResuction_s0, 0.U, outVecCtrl_s0.vuopIdx), io.in.fire)
489  mgu.io.in.info.narrow := outVecCtrl.isNarrow
490  mgu.io.in.info.dstMask := outVecCtrl.isDstMask
491  mgu.io.in.isIndexedVls := false.B
492  mgtu.io.in.vd := Mux(outVecCtrl.isDstMask, mgu.io.out.vd, resultDataUInt)
493  mgtu.io.in.vl := outVl
494  val resultFpMask = Wire(UInt(VLEN.W))
495  val isFclass = outVecCtrl.fpu.isFpToVecInst && (outCtrl.fuOpType === VfaluType.vfclass)
496  val fpCmpFuOpType = Seq(VfaluType.vfeq, VfaluType.vflt, VfaluType.vfle)
497  val isCmp = outVecCtrl.fpu.isFpToVecInst && (fpCmpFuOpType.map(_ === outCtrl.fuOpType).reduce(_|_))
498  resultFpMask := Mux(isFclass || isCmp, Fill(16, 1.U(1.W)), Fill(VLEN, 1.U(1.W)))
499  // when dest is mask, the result need to be masked by mgtu
500  io.out.bits.res.data := Mux(notModifyVd, outOldVd, Mux(outVecCtrl.isDstMask, mgtu.io.out.vd, mgu.io.out.vd) & resultFpMask)
501  io.out.bits.res.fflags.get := Mux(notModifyVd, 0.U(5.W), outFFlags)
502  io.out.bits.ctrl.exceptionVec.get(ExceptionNO.illegalInstr) := mgu.io.out.illegal
503
504}
505
506class VFMgu(vlen:Int)(implicit p: Parameters) extends Module{
507  val io = IO(new VFMguIO(vlen))
508
509  val vd = io.in.vd
510  val oldvd = io.in.oldVd
511  val mask = io.in.mask
512  val vsew = io.in.info.eew
513  val num16bits = vlen / 16
514
515}
516
517class VFMguIO(vlen: Int)(implicit p: Parameters) extends Bundle {
518  val in = new Bundle {
519    val vd = Input(UInt(vlen.W))
520    val oldVd = Input(UInt(vlen.W))
521    val mask = Input(UInt(vlen.W))
522    val info = Input(new VecInfo)
523  }
524  val out = new Bundle {
525    val vd = Output(UInt(vlen.W))
526  }
527}