xref: /XiangShan/src/main/scala/xiangshan/backend/fu/vector/VFPU.scala (revision 83ba63b34cf09b33c0a9e1b3203138e51af4491b)
13ebdf758SXuan Hu///****************************************************************************************
23ebdf758SXuan Hu//  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
33ebdf758SXuan Hu//  * Copyright (c) 2020-2021 Peng Cheng Laboratory
43ebdf758SXuan Hu//  *
53ebdf758SXuan Hu//  * XiangShan is licensed under Mulan PSL v2.
63ebdf758SXuan Hu//  * You can use this software according to the terms and conditions of the Mulan PSL v2.
73ebdf758SXuan Hu//  * You may obtain a copy of Mulan PSL v2 at:
83ebdf758SXuan Hu//  *          http://license.coscl.org.cn/MulanPSL2
93ebdf758SXuan Hu//  *
103ebdf758SXuan Hu//  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
113ebdf758SXuan Hu//  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
123ebdf758SXuan Hu//  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
133ebdf758SXuan Hu//  *
143ebdf758SXuan Hu//  * See the Mulan PSL v2 for more details.
153ebdf758SXuan Hu//  ****************************************************************************************
163ebdf758SXuan Hu//  */
173ebdf758SXuan Hu//
183ebdf758SXuan Hu//
193ebdf758SXuan Hu//package xiangshan.backend.fu.vector
203ebdf758SXuan Hu//
21*83ba63b3SXuan Hu//import org.chipsalliance.cde.config.Parameters
223ebdf758SXuan Hu//import chisel3.{Mux, _}
233ebdf758SXuan Hu//import chisel3.util._
243ebdf758SXuan Hu//import utils._
253ebdf758SXuan Hu//import utility._
263ebdf758SXuan Hu//import yunsuan.vector.{VectorFloatAdder,VectorFloatFMA,VectorFloatDivider}
273ebdf758SXuan Hu//import yunsuan.VfpuType
283ebdf758SXuan Hu//import xiangshan.{FuType, XSCoreParamsKey}
293ebdf758SXuan Hu//
303ebdf758SXuan Hu//class VFPU(implicit p: Parameters) extends VPUSubModule(p(XSCoreParamsKey).VLEN) {
313ebdf758SXuan Hu//  XSError(io.in.valid && io.in.bits.uop.ctrl.fuOpType === VfpuType.dummy, "VFPU OpType not supported")
323ebdf758SXuan Hu//  XSError(io.in.valid && (io.in.bits.uop.ctrl.vconfig.vtype.vsew === 0.U), "8 bits not supported in VFPU")
333ebdf758SXuan Hu//  override val dataModule = Seq(
343ebdf758SXuan Hu//    Module(new VfaluWrapper),
353ebdf758SXuan Hu//    Module(new VfmaccWrapper),
363ebdf758SXuan Hu//    Module(new VfdivWrapper)
373ebdf758SXuan Hu//  )
383ebdf758SXuan Hu//  val select0 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfalu
393ebdf758SXuan Hu//  val select1 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfmacc
403ebdf758SXuan Hu//  val select2 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfdiv
413ebdf758SXuan Hu//  override val select = Seq(
423ebdf758SXuan Hu//    io.in.bits.uop.ctrl.fuType === FuType.vfpu && select0,
433ebdf758SXuan Hu//    io.in.bits.uop.ctrl.fuType === FuType.vfpu && select1,
443ebdf758SXuan Hu//    io.in.bits.uop.ctrl.fuType === FuType.vfpu && select2
453ebdf758SXuan Hu//  )
463ebdf758SXuan Hu//  connectDataModule
473ebdf758SXuan Hu//}
483ebdf758SXuan Hu//
493ebdf758SXuan Hu//class VfdivWrapper(implicit p: Parameters)  extends VPUDataModule{
503ebdf758SXuan Hu//  needReverse := false.B
513ebdf758SXuan Hu//  needClearMask := false.B
523ebdf758SXuan Hu//
533ebdf758SXuan Hu//  val Latency = List(5, 7, 12)
543ebdf758SXuan Hu//  val AdderWidth = XLEN
553ebdf758SXuan Hu//  val NumAdder = VLEN / XLEN
563ebdf758SXuan Hu//
573ebdf758SXuan Hu//  // TODO: Place these logic within the functional unit
583ebdf758SXuan Hu//  val inHs = io.in.fire()
593ebdf758SXuan Hu//  val s0_mask = DataHoldBypass(in.src(3), inHs)
603ebdf758SXuan Hu//  val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1,0), inHs)
613ebdf758SXuan Hu//  val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs)
623ebdf758SXuan Hu//
633ebdf758SXuan Hu//  val vfdiv = Seq.fill(NumAdder)(Module(new VectorFloatDivider()))
643ebdf758SXuan Hu//  for (i <- 0 until NumAdder) {
653ebdf758SXuan Hu//    vfdiv(i).io.opb_i := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
663ebdf758SXuan Hu//    vfdiv(i).io.opa_i := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
673ebdf758SXuan Hu//    vfdiv(i).io.is_vec_i := true.B // If you can enter, it must be vector
683ebdf758SXuan Hu//    vfdiv(i).io.frs2_i    := in.src(0)(63,0) // f[rs2]
693ebdf758SXuan Hu//    vfdiv(i).io.frs1_i    := in.src(1)(63,0) // f[rs1]
703ebdf758SXuan Hu//    vfdiv(i).io.is_frs1_i := false.B // if true, vs2 / f[rs1]
713ebdf758SXuan Hu//    vfdiv(i).io.is_frs2_i := false.B // if true, f[rs2] / vs1
723ebdf758SXuan Hu//    vfdiv(i).io.is_sqrt_i := false.B // must false, not support sqrt now
733ebdf758SXuan Hu//    vfdiv(i).io.rm_i := rm
743ebdf758SXuan Hu//    vfdiv(i).io.fp_format_i := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W))
753ebdf758SXuan Hu//    vfdiv(i).io.start_valid_i := io.in.valid
763ebdf758SXuan Hu//    vfdiv(i).io.finish_ready_i := io.out.ready
773ebdf758SXuan Hu//    vfdiv(i).io.flush_i := false.B  // TODO
783ebdf758SXuan Hu//  }
793ebdf758SXuan Hu//
80*83ba63b3SXuan Hu//  val s4_fflagsVec = VecInit(vfdiv.map(_.io.fflags_o)).asUInt
813ebdf758SXuan Hu//  val s4_fflags16vl = fflagsGen(s0_mask, s4_fflagsVec, List.range(0, 8))
823ebdf758SXuan Hu//  val s4_fflags32vl = fflagsGen(s0_mask, s4_fflagsVec, List(0, 1, 4, 5))
833ebdf758SXuan Hu//  val s4_fflags64vl = fflagsGen(s0_mask, s4_fflagsVec, List(0, 4))
843ebdf758SXuan Hu//  val s4_fflags = LookupTree(s0_sew(1, 0), List(
853ebdf758SXuan Hu//    "b01".U -> Mux(s0_vl.orR, s4_fflags16vl(s0_vl - 1.U), 0.U(5.W)),
863ebdf758SXuan Hu//    "b10".U -> Mux(s0_vl.orR, s4_fflags32vl(s0_vl - 1.U), 0.U(5.W)),
873ebdf758SXuan Hu//    "b11".U -> Mux(s0_vl.orR, s4_fflags64vl(s0_vl - 1.U), 0.U(5.W)),
883ebdf758SXuan Hu//  ))
89*83ba63b3SXuan Hu//  val s4_result = VecInit(vfdiv.map(_.io.fpdiv_res_o)).asUInt
903ebdf758SXuan Hu//
913ebdf758SXuan Hu//  io.out.bits.data := s4_result
923ebdf758SXuan Hu//  fflags := s4_fflags
93*83ba63b3SXuan Hu//  io.in.ready := VecInit(vfdiv.map(_.io.start_ready_o)).asUInt.andR()
94*83ba63b3SXuan Hu//  io.out.valid := VecInit(vfdiv.map(_.io.finish_valid_o)).asUInt.andR()
953ebdf758SXuan Hu//}
963ebdf758SXuan Hu//
973ebdf758SXuan Hu//class VfmaccWrapper(implicit p: Parameters)  extends VPUDataModule{
983ebdf758SXuan Hu//  needReverse := false.B
993ebdf758SXuan Hu//  needClearMask := false.B
1003ebdf758SXuan Hu//
1013ebdf758SXuan Hu//  val Latency = 3
1023ebdf758SXuan Hu//  val AdderWidth = XLEN
1033ebdf758SXuan Hu//  val NumAdder = VLEN / XLEN
1043ebdf758SXuan Hu//
1053ebdf758SXuan Hu//  val inHs = io.in.fire()
1063ebdf758SXuan Hu//
1073ebdf758SXuan Hu//  val validPipe = Seq.fill(Latency)(RegInit(false.B))
1083ebdf758SXuan Hu//  validPipe.zipWithIndex.foreach {
1093ebdf758SXuan Hu//    case (valid, idx) =>
1103ebdf758SXuan Hu//      val _valid = if (idx == 0) Mux(inHs, true.B, false.B) else validPipe(idx - 1)
1113ebdf758SXuan Hu//      valid := _valid
1123ebdf758SXuan Hu//  }
1133ebdf758SXuan Hu//  // TODO: Place these logic within the functional unit
1143ebdf758SXuan Hu//  val s0_mask = DataHoldBypass(in.src(3), inHs)
1153ebdf758SXuan Hu//  val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1, 0), inHs)
1163ebdf758SXuan Hu//  val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs)
1173ebdf758SXuan Hu//
1183ebdf758SXuan Hu//  val vfmacc = Seq.fill(NumAdder)(Module(new VectorFloatFMA()))
1193ebdf758SXuan Hu//  for (i <- 0 until NumAdder) {
1203ebdf758SXuan Hu//    vfmacc(i).io.fp_a := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
1213ebdf758SXuan Hu//    vfmacc(i).io.fp_b := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
1223ebdf758SXuan Hu//    vfmacc(i).io.fp_c := Mux(inHs, in.src(2)(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
1233ebdf758SXuan Hu//    vfmacc(i).io.widen_b := Mux(inHs, Cat(vs1((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs1((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U)
1243ebdf758SXuan Hu//    vfmacc(i).io.widen_a := Mux(inHs, Cat(vs2((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs2((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U)
1253ebdf758SXuan Hu//    vfmacc(i).io.frs1 := in.src(0)(63,0)
1263ebdf758SXuan Hu//    vfmacc(i).io.is_frs1 := false.B // TODO: support vf inst
1273ebdf758SXuan Hu//    vfmacc(i).io.uop_idx := in.uop.ctrl.uopIdx // TODO
1283ebdf758SXuan Hu//    vfmacc(i).io.op_code := DontCare // TODO
1293ebdf758SXuan Hu//    vfmacc(i).io.is_vec := true.B // If you can enter, it must be vector
1303ebdf758SXuan Hu//    vfmacc(i).io.round_mode := rm
1313ebdf758SXuan Hu//    vfmacc(i).io.fp_format := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W))
1323ebdf758SXuan Hu//    vfmacc(i).io.res_widening := false.B // TODO
1333ebdf758SXuan Hu//  }
1343ebdf758SXuan Hu//
1353ebdf758SXuan Hu//  // output signal generation
136*83ba63b3SXuan Hu//  val s2_fflagsVec = VecInit(vfmacc.map(_.io.fflags)).asUInt
1373ebdf758SXuan Hu//  val s2_fflags16vl = fflagsGen(s0_mask, s2_fflagsVec, List.range(0, 8))
1383ebdf758SXuan Hu//  val s2_fflags32vl = fflagsGen(s0_mask, s2_fflagsVec, List(0, 1, 4, 5))
1393ebdf758SXuan Hu//  val s2_fflags64vl = fflagsGen(s0_mask, s2_fflagsVec, List(0, 4))
1403ebdf758SXuan Hu//  val s2_fflags = LookupTree(s0_sew(1, 0), List(
1413ebdf758SXuan Hu//    "b01".U -> Mux(s0_vl.orR, s2_fflags16vl(s0_vl - 1.U), 0.U(5.W)),
1423ebdf758SXuan Hu//    "b10".U -> Mux(s0_vl.orR, s2_fflags32vl(s0_vl - 1.U), 0.U(5.W)),
1433ebdf758SXuan Hu//    "b11".U -> Mux(s0_vl.orR, s2_fflags64vl(s0_vl - 1.U), 0.U(5.W)),
1443ebdf758SXuan Hu//  ))
1453ebdf758SXuan Hu//
146*83ba63b3SXuan Hu//  val s2_result = VecInit(vfmacc.map(_.io.fp_result)).asUInt
1473ebdf758SXuan Hu//
1483ebdf758SXuan Hu//  io.out.bits.data := s2_result
1493ebdf758SXuan Hu//  fflags := s2_fflags
1503ebdf758SXuan Hu//
1513ebdf758SXuan Hu//  io.in.ready := true.B
1523ebdf758SXuan Hu//  io.out.valid := validPipe(Latency - 1)
1533ebdf758SXuan Hu//}
1543ebdf758SXuan Hu//
1553ebdf758SXuan Hu//class VfaluWrapper(implicit p: Parameters)  extends VPUDataModule{
1563ebdf758SXuan Hu//  needReverse := false.B
1573ebdf758SXuan Hu//  needClearMask := false.B
1583ebdf758SXuan Hu//
1593ebdf758SXuan Hu//  val Latency = 2
1603ebdf758SXuan Hu//  val AdderWidth = XLEN
1613ebdf758SXuan Hu//  val NumAdder = VLEN / XLEN
1623ebdf758SXuan Hu//
1633ebdf758SXuan Hu//  val inHs = io.in.fire()
1643ebdf758SXuan Hu//
1653ebdf758SXuan Hu//  // reg input signal
1663ebdf758SXuan Hu//  val validPipe = Seq.fill(Latency)(RegInit(false.B))
1673ebdf758SXuan Hu//  validPipe.zipWithIndex.foreach {
1683ebdf758SXuan Hu//    case (valid, idx) =>
1693ebdf758SXuan Hu//      val _valid = if (idx == 0) Mux(inHs, true.B, false.B) else validPipe(idx - 1)
1703ebdf758SXuan Hu//      valid := _valid
1713ebdf758SXuan Hu//  }
1723ebdf758SXuan Hu//  // TODO: Place these logic within the functional unit
1733ebdf758SXuan Hu//  val s0_mask = DataHoldBypass(in.src(3), inHs)
1743ebdf758SXuan Hu//  val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1, 0), inHs)
1753ebdf758SXuan Hu//  val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs)
1763ebdf758SXuan Hu//
1773ebdf758SXuan Hu//  // connect the input signal
1783ebdf758SXuan Hu//  val vfalu = Seq.fill(NumAdder)(Module(new VectorFloatAdder()))
1793ebdf758SXuan Hu//  for (i <- 0 until NumAdder) {
1803ebdf758SXuan Hu//    vfalu(i).io.fp_b := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
1813ebdf758SXuan Hu//    vfalu(i).io.fp_a := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
1823ebdf758SXuan Hu//    vfalu(i).io.widen_b := Mux(inHs, Cat(vs1((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs1((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U)
1833ebdf758SXuan Hu//    vfalu(i).io.widen_a := Mux(inHs, Cat(vs2((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs2((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U)
1843ebdf758SXuan Hu//    vfalu(i).io.frs1 := in.src(0)(63, 0)
1853ebdf758SXuan Hu//    vfalu(i).io.is_frs1 := false.B // TODO: support vf inst
1863ebdf758SXuan Hu//    vfalu(i).io.mask := 0.U //TODO
1873ebdf758SXuan Hu//    vfalu(i).io.uop_idx := in.uop.ctrl.uopIdx // TODO
1883ebdf758SXuan Hu//    vfalu(i).io.is_vec := true.B // If you can enter, it must be vector
1893ebdf758SXuan Hu//    vfalu(i).io.round_mode := rm
1903ebdf758SXuan Hu//    vfalu(i).io.fp_format := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W))
1913ebdf758SXuan Hu//    vfalu(i).io.opb_widening := false.B // TODO
1923ebdf758SXuan Hu//    vfalu(i).io.res_widening := false.B // TODO
1933ebdf758SXuan Hu//    vfalu(i).io.op_code := in.uop.ctrl.fuOpType
1943ebdf758SXuan Hu//  }
1953ebdf758SXuan Hu//
1963ebdf758SXuan Hu//  // output signal generation
197*83ba63b3SXuan Hu//  val s0_fflagsVec = VecInit(vfalu.map(_.io.fflags)).asUInt
1983ebdf758SXuan Hu//  val s0_fflags16vl = fflagsGen(s0_mask, s0_fflagsVec, List.range(0, 8))
1993ebdf758SXuan Hu//  val s0_fflags32vl = fflagsGen(s0_mask, s0_fflagsVec, List(0, 1, 4, 5))
2003ebdf758SXuan Hu//  val s0_fflags64vl = fflagsGen(s0_mask, s0_fflagsVec, List(0, 4))
2013ebdf758SXuan Hu//  val s0_fflags = LookupTree(s0_sew(1, 0), List(
2023ebdf758SXuan Hu//    "b01".U -> Mux(s0_vl.orR, s0_fflags16vl(s0_vl - 1.U), 0.U(5.W)),
2033ebdf758SXuan Hu//    "b10".U -> Mux(s0_vl.orR, s0_fflags32vl(s0_vl - 1.U), 0.U(5.W)),
2043ebdf758SXuan Hu//    "b11".U -> Mux(s0_vl.orR, s0_fflags64vl(s0_vl - 1.U), 0.U(5.W)),
2053ebdf758SXuan Hu//  ))
2063ebdf758SXuan Hu//  val s1_fflags = RegEnable(s0_fflags, validPipe(Latency-2))
207*83ba63b3SXuan Hu//  val s0_result = VecInit(vfalu.map(_.io.fp_result)).asUInt
2083ebdf758SXuan Hu//  val s1_result = RegEnable(s0_result, validPipe(Latency-2))
2093ebdf758SXuan Hu//
2103ebdf758SXuan Hu//  fflags := s1_fflags
2113ebdf758SXuan Hu//  io.out.bits.data := s1_result
2123ebdf758SXuan Hu//
2133ebdf758SXuan Hu//  io.in.ready := true.B
2143ebdf758SXuan Hu//  io.out.valid := validPipe(Latency-1)
2153ebdf758SXuan Hu//}
2163ebdf758SXuan Hu//
2173ebdf758SXuan Hu//object fflagsGen{
2183ebdf758SXuan Hu//  def fflagsGen(vmask: UInt, fflagsResult:UInt, idx:List[Int] = List(0, 1, 4, 5)): Vec[UInt] = {
2193ebdf758SXuan Hu//    var num = idx.length
2203ebdf758SXuan Hu//    val fflags = Seq.fill(num)(Wire(UInt(5.W)))
2213ebdf758SXuan Hu//    fflags.zip(vmask(num-1, 0).asBools().reverse).zip(idx).foreach {
2223ebdf758SXuan Hu//      case ((fflags0, mask), id) =>
2233ebdf758SXuan Hu//        fflags0 := Mux(mask, fflagsResult(id*5+4,id*5+0), 0.U)
2243ebdf758SXuan Hu//    }
2253ebdf758SXuan Hu//    val fflagsVl = Wire(Vec(num,UInt(5.W)))
2263ebdf758SXuan Hu//    for (i <- 0 until num) {
2273ebdf758SXuan Hu//      val _fflags = if (i == 0) fflags(i) else (fflagsVl(i - 1) | fflags(i))
2283ebdf758SXuan Hu//      fflagsVl(i) := _fflags
2293ebdf758SXuan Hu//    }
2303ebdf758SXuan Hu//    fflagsVl
2313ebdf758SXuan Hu//  }
2323ebdf758SXuan Hu//
2333ebdf758SXuan Hu//  def apply(vmask: UInt, fflagsResult:UInt, idx:List[Int] = List(0, 1, 4, 5)): Vec[UInt] = {
2343ebdf758SXuan Hu//    fflagsGen(vmask, fflagsResult, idx)
2353ebdf758SXuan Hu//  }
2363ebdf758SXuan Hu//}