1/**************************************************************************************** 2 * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3 * Copyright (c) 2020-2021 Peng Cheng Laboratory 4 * 5 * XiangShan is licensed under Mulan PSL v2. 6 * You can use this software according to the terms and conditions of the Mulan PSL v2. 7 * You may obtain a copy of Mulan PSL v2 at: 8 * http://license.coscl.org.cn/MulanPSL2 9 * 10 * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11 * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12 * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13 * 14 * See the Mulan PSL v2 for more details. 15 **************************************************************************************** 16 */ 17 18 19package xiangshan.backend.fu.vector 20 21import chipsalliance.rocketchip.config.Parameters 22import chisel3.{Mux, _} 23import chisel3.util._ 24import utils._ 25import utility._ 26import yunsuan.vector.{VectorFloatAdder,VectorFloatFMA} 27import yunsuan.VfpuType 28import xiangshan.{FuOpType, SrcType, XSBundle, XSCoreParamsKey, XSModule} 29import xiangshan.backend.fu.fpu.FPUSubModule 30 31class VFPU(implicit p: Parameters) extends FPUSubModule(p(XSCoreParamsKey).VLEN){ 32 XSError(io.in.valid && io.in.bits.uop.ctrl.fuOpType === VfpuType.dummy, "VFPU OpType not supported") 33 XSError(io.in.valid && (io.in.bits.uop.ctrl.vconfig.vtype.vsew === 0.U), "8 bits not supported in VFPU") 34 override val dataModule = null // Only use IO, not dataModule 35 36// rename signal 37 val in = io.in.bits 38 val ctrl = io.in.bits.uop.ctrl 39 val vtype = ctrl.vconfig.vtype 40 val src1Type = io.in.bits.uop.ctrl.srcType 41 42// reg input signal 43 val s0_uopReg = Reg(io.in.bits.uop.cloneType) 44 val s0_maskReg = Reg(UInt(8.W)) 45 val inHs = io.in.fire() 46 when(inHs){ 47 s0_uopReg := io.in.bits.uop 48 s0_maskReg := Fill(8, 1.U(1.W)) 49 } 50 51// connect the input port of vfalu 52 val vfalu = Module(new VfaluWrapper()(p)) 53 vfalu.io.in.bits.src <> in.src 54 vfalu.io.in.bits.srcType <> in.uop.ctrl.srcType 55 vfalu.io.in.bits.vmask := Fill(8, 1.U(1.W)) 56 vfalu.io.in.bits.vl := in.uop.ctrl.vconfig.vl 57 vfalu.io.in.bits.round_mode := rm 58 vfalu.io.in.bits.fp_format := vtype.vsew(1,0) 59 vfalu.io.in.bits.opb_widening := false.B // TODO 60 vfalu.io.in.bits.res_widening := false.B // TODO 61 vfalu.io.in.bits.op_code := ctrl.fuOpType 62 vfalu.io.ready_out.s0_mask := s0_maskReg 63 vfalu.io.ready_out.s0_sew := s0_uopReg.ctrl.vconfig.vtype.vsew(1, 0) 64 vfalu.io.ready_out.s0_vl := s0_uopReg.ctrl.vconfig.vl 65 66// connect the input port of vfmacc 67 val vfmacc = Module(new VfmaccWrapper()(p)) 68 vfmacc.io.in.bits.src <> in.src 69 vfmacc.io.in.bits.srcType <> in.uop.ctrl.srcType 70 vfmacc.io.in.bits.vmask := Fill(8, 1.U(1.W)) 71 vfmacc.io.in.bits.vl := in.uop.ctrl.vconfig.vl 72 vfmacc.io.in.bits.round_mode := rm 73 vfmacc.io.in.bits.fp_format := vtype.vsew(1, 0) 74 vfmacc.io.in.bits.opb_widening := DontCare // TODO 75 vfmacc.io.in.bits.res_widening := false.B // TODO 76 vfmacc.io.in.bits.op_code := DontCare 77 vfmacc.io.ready_out.s0_mask := s0_maskReg 78 vfmacc.io.ready_out.s0_sew := s0_uopReg.ctrl.vconfig.vtype.vsew(1, 0) 79 vfmacc.io.ready_out.s0_vl := s0_uopReg.ctrl.vconfig.vl 80 81// connect the output port 82 fflags := LookupTree(s0_uopReg.ctrl.fuOpType, List( 83 VfpuType.fadd -> vfalu.io.out.bits.fflags, 84 VfpuType.fsub -> vfalu.io.out.bits.fflags, 85 VfpuType.fmacc -> vfmacc.io.out.bits.fflags, 86 )) 87 io.out.bits.data := LookupTree(s0_uopReg.ctrl.fuOpType, List( 88 VfpuType.fadd -> vfalu.io.out.bits.result, 89 VfpuType.fsub -> vfalu.io.out.bits.result, 90 VfpuType.fmacc -> vfmacc.io.out.bits.result, 91 )) 92 io.out.bits.uop := s0_uopReg 93 // valid/ready 94 vfalu.io.in.valid := io.in.valid && VfpuType.isVfalu(in.uop.ctrl.fuOpType) 95 vfmacc.io.in.valid := io.in.valid && in.uop.ctrl.fuOpType === VfpuType.fmacc 96 io.out.valid := vfalu.io.out.valid || vfmacc.io.out.valid 97 vfalu.io.out.ready := io.out.ready 98 vfmacc.io.out.ready := io.out.ready 99 io.in.ready := vfalu.io.in.ready && vfmacc.io.in.ready 100} 101 102class VFPUWraaperBundle (implicit p: Parameters) extends XSBundle{ 103 val in = Flipped(DecoupledIO(Output(new Bundle { 104 val src = Vec(3, Input(UInt(VLEN.W))) 105 val srcType = Vec(4, SrcType()) 106 val vmask = UInt((VLEN / 16).W) 107 val vl = UInt(8.W) 108 109 val round_mode = UInt(3.W) 110 val fp_format = UInt(2.W) // vsew 111 val opb_widening = Bool() 112 val res_widening = Bool() 113 val op_code = FuOpType() 114 }))) 115 116 val ready_out = Input(new Bundle { 117 val s0_mask = UInt((VLEN / 16).W) 118 val s0_sew = UInt(2.W) 119 val s0_vl = UInt(8.W) 120 }) 121 122 val out = DecoupledIO(Output(new Bundle { 123 val result = UInt(128.W) 124 val fflags = UInt(5.W) 125 })) 126} 127 128class VfmaccWrapper(implicit p: Parameters) extends XSModule{ 129 val Latency = 3 130 val AdderWidth = XLEN 131 val NumAdder = VLEN / XLEN 132 133 val io = IO(new VFPUWraaperBundle) 134 135 val in = io.in.bits 136 val out = io.out.bits 137 val inHs = io.in.fire() 138 139 val validPipe = Seq.fill(Latency)(RegInit(false.B)) 140 validPipe.zipWithIndex.foreach { 141 case (valid, idx) => 142 val _valid = if (idx == 0) Mux(inHs, true.B, false.B) else validPipe(idx - 1) 143 valid := _valid 144 } 145 val s0_mask = io.ready_out.s0_mask 146 val s0_sew = io.ready_out.s0_sew 147 val s0_vl = io.ready_out.s0_vl 148 149 val vfmacc = Seq.fill(NumAdder)(Module(new VectorFloatFMA())) 150 val src1 = Mux(in.srcType(0) === SrcType.vp, in.src(0), VecExtractor(in.fp_format, in.src(0))) 151 val src2 = Mux(in.srcType(1) === SrcType.vp, in.src(1), VecExtractor(in.fp_format, in.src(1))) 152 val src3 = Mux(in.srcType(2) === SrcType.vp, in.src(2), VecExtractor(in.fp_format, in.src(2))) 153 for (i <- 0 until NumAdder) { 154 vfmacc(i).io.fp_a := Mux(inHs, src1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 155 vfmacc(i).io.fp_b := Mux(inHs, src2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 156 vfmacc(i).io.fp_c := Mux(inHs, src3(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 157 vfmacc(i).io.is_vec := true.B // If you can enter, it must be vector 158 vfmacc(i).io.round_mode := in.round_mode 159 vfmacc(i).io.fp_format := Mux(inHs, in.fp_format, 3.U(2.W)) 160 vfmacc(i).io.res_widening := in.res_widening // TODO 161 } 162 163 // output signal generation 164 val s2_fflagsVec = VecInit(vfmacc.map(_.io.fflags)).asUInt() 165 val s2_fflags16vl = fflagsGen(s0_mask, s2_fflagsVec, List.range(0, 8)) 166 val s2_fflags32vl = fflagsGen(s0_mask, s2_fflagsVec, List(0, 1, 4, 5)) 167 val s2_fflags64vl = fflagsGen(s0_mask, s2_fflagsVec, List(0, 4)) 168 val s2_fflags = LookupTree(s0_sew(1, 0), List( 169 "b01".U -> Mux(s0_vl.orR, s2_fflags16vl(s0_vl - 1.U), 0.U(5.W)), 170 "b10".U -> Mux(s0_vl.orR, s2_fflags32vl(s0_vl - 1.U), 0.U(5.W)), 171 "b11".U -> Mux(s0_vl.orR, s2_fflags64vl(s0_vl - 1.U), 0.U(5.W)), 172 )) 173 out.fflags := s2_fflags 174 175 val s2_result = VecInit(vfmacc.map(_.io.fp_result)).asUInt() 176 out.result := s2_result 177 178 io.in.ready := !(validPipe.foldLeft(false.B)(_ | _)) && io.out.ready 179 io.out.valid := validPipe(Latency - 1) 180} 181 182class VfaluWrapper(implicit p: Parameters) extends XSModule{ 183 val Latency = 2 184 val AdderWidth = XLEN 185 val NumAdder = VLEN / XLEN 186 187 val io = IO(new VFPUWraaperBundle) 188 189 val in = io.in.bits 190 val out = io.out.bits 191 val inHs = io.in.fire() 192 193 // reg input signal 194 val validPipe = Seq.fill(Latency)(RegInit(false.B)) 195 validPipe.zipWithIndex.foreach { 196 case (valid, idx) => 197 val _valid = if (idx == 0) Mux(inHs, true.B, false.B) else validPipe(idx - 1) 198 valid := _valid 199 } 200 val s0_mask = io.ready_out.s0_mask 201 val s0_sew = io.ready_out.s0_sew 202 val s0_vl = io.ready_out.s0_vl 203 204 // connect the input signal 205 val vfalu = Seq.fill(NumAdder)(Module(new VectorFloatAdder())) 206 val src1 = Mux(in.srcType(0) === SrcType.vp, in.src(0), VecExtractor(in.fp_format, in.src(0))) 207 val src2 = Mux(in.srcType(1) === SrcType.vp, in.src(1), VecExtractor(in.fp_format, in.src(1))) 208 for (i <- 0 until NumAdder) { 209 vfalu(i).io.fp_a := Mux(inHs, src1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 210 vfalu(i).io.fp_b := Mux(inHs, src2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 211 vfalu(i).io.is_vec := true.B // If you can enter, it must be vector 212 vfalu(i).io.round_mode := in.round_mode 213 vfalu(i).io.fp_format := Mux(inHs, in.fp_format, 3.U(2.W)) 214 vfalu(i).io.opb_widening := in.opb_widening // TODO 215 vfalu(i).io.res_widening := in.res_widening // TODO 216 vfalu(i).io.op_code := in.op_code 217 } 218 219 // output signal generation 220 val s0_fflagsVec = VecInit(vfalu.map(_.io.fflags)).asUInt() 221 val s0_fflags16vl = fflagsGen(s0_mask, s0_fflagsVec, List.range(0, 8)) 222 val s0_fflags32vl = fflagsGen(s0_mask, s0_fflagsVec, List(0, 1, 4, 5)) 223 val s0_fflags64vl = fflagsGen(s0_mask, s0_fflagsVec, List(0, 4)) 224 val s0_fflags = LookupTree(s0_sew(1, 0), List( 225 "b01".U -> Mux(s0_vl.orR, s0_fflags16vl(s0_vl - 1.U), 0.U(5.W)), 226 "b10".U -> Mux(s0_vl.orR, s0_fflags32vl(s0_vl - 1.U), 0.U(5.W)), 227 "b11".U -> Mux(s0_vl.orR, s0_fflags64vl(s0_vl - 1.U), 0.U(5.W)), 228 )) 229 val s1_fflags = RegEnable(s0_fflags, validPipe(Latency-2)) 230 out.fflags := s1_fflags 231 232 val s0_result = LookupTree(s0_sew(1, 0), List( 233 "b01".U -> VecInit(vfalu.map(_.io.fp_f16_result)).asUInt(), 234 "b10".U -> VecInit(vfalu.map(_.io.fp_f32_result)).asUInt(), 235 "b11".U -> VecInit(vfalu.map(_.io.fp_f64_result)).asUInt(), 236 )) 237 val s1_result = RegEnable(s0_result, validPipe(Latency-2)) 238 out.result := s1_result 239 240 io.in.ready := !(validPipe.foldLeft(false.B)(_|_)) && io.out.ready 241 io.out.valid := validPipe(Latency-1) 242} 243 244object fflagsGen{ 245 def fflagsGen(vmask: UInt, fflagsResult:UInt, idx:List[Int] = List(0, 1, 4, 5)): Vec[UInt] = { 246 var num = idx.length 247 val fflags = Seq.fill(num)(Wire(UInt(5.W))) 248 fflags.zip(vmask(num-1, 0).asBools().reverse).zip(idx).foreach { 249 case ((fflags0, mask), id) => 250 fflags0 := Mux(mask, fflagsResult(id*5+4,id*5+0), 0.U) 251 } 252 val fflagsVl = Wire(Vec(num,UInt(5.W))) 253 for (i <- 0 until num) { 254 val _fflags = if (i == 0) fflags(i) else (fflagsVl(i - 1) | fflags(i)) 255 fflagsVl(i) := _fflags 256 } 257 fflagsVl 258 } 259 260 def apply(vmask: UInt, fflagsResult:UInt, idx:List[Int] = List(0, 1, 4, 5)): Vec[UInt] = { 261 fflagsGen(vmask, fflagsResult, idx) 262 } 263} 264 265object VecExtractor{ 266 def xf2v_sew(sew: UInt, xf:UInt): UInt = { 267 LookupTree(sew(1, 0), List( 268 "b00".U -> VecInit(Seq.fill(16)(xf(7, 0))).asUInt, 269 "b01".U -> VecInit(Seq.fill(8)(xf(15, 0))).asUInt, 270 "b10".U -> VecInit(Seq.fill(4)(xf(31, 0))).asUInt, 271 "b11".U -> VecInit(Seq.fill(2)(xf(63, 0))).asUInt, 272 )) 273 } 274 275 def apply(sew: UInt, xf: UInt): UInt = { 276 xf2v_sew(sew, xf) 277 } 278}