xref: /XiangShan/src/main/scala/xiangshan/backend/fu/vector/VFPU.scala (revision 83ba63b34cf09b33c0a9e1b3203138e51af4491b)
1///****************************************************************************************
2//  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3//  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4//  *
5//  * XiangShan is licensed under Mulan PSL v2.
6//  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7//  * You may obtain a copy of Mulan PSL v2 at:
8//  *          http://license.coscl.org.cn/MulanPSL2
9//  *
10//  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11//  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12//  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13//  *
14//  * See the Mulan PSL v2 for more details.
15//  ****************************************************************************************
16//  */
17//
18//
19//package xiangshan.backend.fu.vector
20//
21//import org.chipsalliance.cde.config.Parameters
22//import chisel3.{Mux, _}
23//import chisel3.util._
24//import utils._
25//import utility._
26//import yunsuan.vector.{VectorFloatAdder,VectorFloatFMA,VectorFloatDivider}
27//import yunsuan.VfpuType
28//import xiangshan.{FuType, XSCoreParamsKey}
29//
30//class VFPU(implicit p: Parameters) extends VPUSubModule(p(XSCoreParamsKey).VLEN) {
31//  XSError(io.in.valid && io.in.bits.uop.ctrl.fuOpType === VfpuType.dummy, "VFPU OpType not supported")
32//  XSError(io.in.valid && (io.in.bits.uop.ctrl.vconfig.vtype.vsew === 0.U), "8 bits not supported in VFPU")
33//  override val dataModule = Seq(
34//    Module(new VfaluWrapper),
35//    Module(new VfmaccWrapper),
36//    Module(new VfdivWrapper)
37//  )
38//  val select0 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfalu
39//  val select1 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfmacc
40//  val select2 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfdiv
41//  override val select = Seq(
42//    io.in.bits.uop.ctrl.fuType === FuType.vfpu && select0,
43//    io.in.bits.uop.ctrl.fuType === FuType.vfpu && select1,
44//    io.in.bits.uop.ctrl.fuType === FuType.vfpu && select2
45//  )
46//  connectDataModule
47//}
48//
49//class VfdivWrapper(implicit p: Parameters)  extends VPUDataModule{
50//  needReverse := false.B
51//  needClearMask := false.B
52//
53//  val Latency = List(5, 7, 12)
54//  val AdderWidth = XLEN
55//  val NumAdder = VLEN / XLEN
56//
57//  // TODO: Place these logic within the functional unit
58//  val inHs = io.in.fire()
59//  val s0_mask = DataHoldBypass(in.src(3), inHs)
60//  val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1,0), inHs)
61//  val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs)
62//
63//  val vfdiv = Seq.fill(NumAdder)(Module(new VectorFloatDivider()))
64//  for (i <- 0 until NumAdder) {
65//    vfdiv(i).io.opb_i := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
66//    vfdiv(i).io.opa_i := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
67//    vfdiv(i).io.is_vec_i := true.B // If you can enter, it must be vector
68//    vfdiv(i).io.frs2_i    := in.src(0)(63,0) // f[rs2]
69//    vfdiv(i).io.frs1_i    := in.src(1)(63,0) // f[rs1]
70//    vfdiv(i).io.is_frs1_i := false.B // if true, vs2 / f[rs1]
71//    vfdiv(i).io.is_frs2_i := false.B // if true, f[rs2] / vs1
72//    vfdiv(i).io.is_sqrt_i := false.B // must false, not support sqrt now
73//    vfdiv(i).io.rm_i := rm
74//    vfdiv(i).io.fp_format_i := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W))
75//    vfdiv(i).io.start_valid_i := io.in.valid
76//    vfdiv(i).io.finish_ready_i := io.out.ready
77//    vfdiv(i).io.flush_i := false.B  // TODO
78//  }
79//
80//  val s4_fflagsVec = VecInit(vfdiv.map(_.io.fflags_o)).asUInt
81//  val s4_fflags16vl = fflagsGen(s0_mask, s4_fflagsVec, List.range(0, 8))
82//  val s4_fflags32vl = fflagsGen(s0_mask, s4_fflagsVec, List(0, 1, 4, 5))
83//  val s4_fflags64vl = fflagsGen(s0_mask, s4_fflagsVec, List(0, 4))
84//  val s4_fflags = LookupTree(s0_sew(1, 0), List(
85//    "b01".U -> Mux(s0_vl.orR, s4_fflags16vl(s0_vl - 1.U), 0.U(5.W)),
86//    "b10".U -> Mux(s0_vl.orR, s4_fflags32vl(s0_vl - 1.U), 0.U(5.W)),
87//    "b11".U -> Mux(s0_vl.orR, s4_fflags64vl(s0_vl - 1.U), 0.U(5.W)),
88//  ))
89//  val s4_result = VecInit(vfdiv.map(_.io.fpdiv_res_o)).asUInt
90//
91//  io.out.bits.data := s4_result
92//  fflags := s4_fflags
93//  io.in.ready := VecInit(vfdiv.map(_.io.start_ready_o)).asUInt.andR()
94//  io.out.valid := VecInit(vfdiv.map(_.io.finish_valid_o)).asUInt.andR()
95//}
96//
97//class VfmaccWrapper(implicit p: Parameters)  extends VPUDataModule{
98//  needReverse := false.B
99//  needClearMask := false.B
100//
101//  val Latency = 3
102//  val AdderWidth = XLEN
103//  val NumAdder = VLEN / XLEN
104//
105//  val inHs = io.in.fire()
106//
107//  val validPipe = Seq.fill(Latency)(RegInit(false.B))
108//  validPipe.zipWithIndex.foreach {
109//    case (valid, idx) =>
110//      val _valid = if (idx == 0) Mux(inHs, true.B, false.B) else validPipe(idx - 1)
111//      valid := _valid
112//  }
113//  // TODO: Place these logic within the functional unit
114//  val s0_mask = DataHoldBypass(in.src(3), inHs)
115//  val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1, 0), inHs)
116//  val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs)
117//
118//  val vfmacc = Seq.fill(NumAdder)(Module(new VectorFloatFMA()))
119//  for (i <- 0 until NumAdder) {
120//    vfmacc(i).io.fp_a := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
121//    vfmacc(i).io.fp_b := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
122//    vfmacc(i).io.fp_c := Mux(inHs, in.src(2)(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
123//    vfmacc(i).io.widen_b := Mux(inHs, Cat(vs1((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs1((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U)
124//    vfmacc(i).io.widen_a := Mux(inHs, Cat(vs2((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs2((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U)
125//    vfmacc(i).io.frs1 := in.src(0)(63,0)
126//    vfmacc(i).io.is_frs1 := false.B // TODO: support vf inst
127//    vfmacc(i).io.uop_idx := in.uop.ctrl.uopIdx // TODO
128//    vfmacc(i).io.op_code := DontCare // TODO
129//    vfmacc(i).io.is_vec := true.B // If you can enter, it must be vector
130//    vfmacc(i).io.round_mode := rm
131//    vfmacc(i).io.fp_format := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W))
132//    vfmacc(i).io.res_widening := false.B // TODO
133//  }
134//
135//  // output signal generation
136//  val s2_fflagsVec = VecInit(vfmacc.map(_.io.fflags)).asUInt
137//  val s2_fflags16vl = fflagsGen(s0_mask, s2_fflagsVec, List.range(0, 8))
138//  val s2_fflags32vl = fflagsGen(s0_mask, s2_fflagsVec, List(0, 1, 4, 5))
139//  val s2_fflags64vl = fflagsGen(s0_mask, s2_fflagsVec, List(0, 4))
140//  val s2_fflags = LookupTree(s0_sew(1, 0), List(
141//    "b01".U -> Mux(s0_vl.orR, s2_fflags16vl(s0_vl - 1.U), 0.U(5.W)),
142//    "b10".U -> Mux(s0_vl.orR, s2_fflags32vl(s0_vl - 1.U), 0.U(5.W)),
143//    "b11".U -> Mux(s0_vl.orR, s2_fflags64vl(s0_vl - 1.U), 0.U(5.W)),
144//  ))
145//
146//  val s2_result = VecInit(vfmacc.map(_.io.fp_result)).asUInt
147//
148//  io.out.bits.data := s2_result
149//  fflags := s2_fflags
150//
151//  io.in.ready := true.B
152//  io.out.valid := validPipe(Latency - 1)
153//}
154//
155//class VfaluWrapper(implicit p: Parameters)  extends VPUDataModule{
156//  needReverse := false.B
157//  needClearMask := false.B
158//
159//  val Latency = 2
160//  val AdderWidth = XLEN
161//  val NumAdder = VLEN / XLEN
162//
163//  val inHs = io.in.fire()
164//
165//  // reg input signal
166//  val validPipe = Seq.fill(Latency)(RegInit(false.B))
167//  validPipe.zipWithIndex.foreach {
168//    case (valid, idx) =>
169//      val _valid = if (idx == 0) Mux(inHs, true.B, false.B) else validPipe(idx - 1)
170//      valid := _valid
171//  }
172//  // TODO: Place these logic within the functional unit
173//  val s0_mask = DataHoldBypass(in.src(3), inHs)
174//  val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1, 0), inHs)
175//  val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs)
176//
177//  // connect the input signal
178//  val vfalu = Seq.fill(NumAdder)(Module(new VectorFloatAdder()))
179//  for (i <- 0 until NumAdder) {
180//    vfalu(i).io.fp_b := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
181//    vfalu(i).io.fp_a := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U)
182//    vfalu(i).io.widen_b := Mux(inHs, Cat(vs1((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs1((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U)
183//    vfalu(i).io.widen_a := Mux(inHs, Cat(vs2((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs2((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U)
184//    vfalu(i).io.frs1 := in.src(0)(63, 0)
185//    vfalu(i).io.is_frs1 := false.B // TODO: support vf inst
186//    vfalu(i).io.mask := 0.U //TODO
187//    vfalu(i).io.uop_idx := in.uop.ctrl.uopIdx // TODO
188//    vfalu(i).io.is_vec := true.B // If you can enter, it must be vector
189//    vfalu(i).io.round_mode := rm
190//    vfalu(i).io.fp_format := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W))
191//    vfalu(i).io.opb_widening := false.B // TODO
192//    vfalu(i).io.res_widening := false.B // TODO
193//    vfalu(i).io.op_code := in.uop.ctrl.fuOpType
194//  }
195//
196//  // output signal generation
197//  val s0_fflagsVec = VecInit(vfalu.map(_.io.fflags)).asUInt
198//  val s0_fflags16vl = fflagsGen(s0_mask, s0_fflagsVec, List.range(0, 8))
199//  val s0_fflags32vl = fflagsGen(s0_mask, s0_fflagsVec, List(0, 1, 4, 5))
200//  val s0_fflags64vl = fflagsGen(s0_mask, s0_fflagsVec, List(0, 4))
201//  val s0_fflags = LookupTree(s0_sew(1, 0), List(
202//    "b01".U -> Mux(s0_vl.orR, s0_fflags16vl(s0_vl - 1.U), 0.U(5.W)),
203//    "b10".U -> Mux(s0_vl.orR, s0_fflags32vl(s0_vl - 1.U), 0.U(5.W)),
204//    "b11".U -> Mux(s0_vl.orR, s0_fflags64vl(s0_vl - 1.U), 0.U(5.W)),
205//  ))
206//  val s1_fflags = RegEnable(s0_fflags, validPipe(Latency-2))
207//  val s0_result = VecInit(vfalu.map(_.io.fp_result)).asUInt
208//  val s1_result = RegEnable(s0_result, validPipe(Latency-2))
209//
210//  fflags := s1_fflags
211//  io.out.bits.data := s1_result
212//
213//  io.in.ready := true.B
214//  io.out.valid := validPipe(Latency-1)
215//}
216//
217//object fflagsGen{
218//  def fflagsGen(vmask: UInt, fflagsResult:UInt, idx:List[Int] = List(0, 1, 4, 5)): Vec[UInt] = {
219//    var num = idx.length
220//    val fflags = Seq.fill(num)(Wire(UInt(5.W)))
221//    fflags.zip(vmask(num-1, 0).asBools().reverse).zip(idx).foreach {
222//      case ((fflags0, mask), id) =>
223//        fflags0 := Mux(mask, fflagsResult(id*5+4,id*5+0), 0.U)
224//    }
225//    val fflagsVl = Wire(Vec(num,UInt(5.W)))
226//    for (i <- 0 until num) {
227//      val _fflags = if (i == 0) fflags(i) else (fflagsVl(i - 1) | fflags(i))
228//      fflagsVl(i) := _fflags
229//    }
230//    fflagsVl
231//  }
232//
233//  def apply(vmask: UInt, fflagsResult:UInt, idx:List[Int] = List(0, 1, 4, 5)): Vec[UInt] = {
234//    fflagsGen(vmask, fflagsResult, idx)
235//  }
236//}