13ebdf758SXuan Hu///**************************************************************************************** 23ebdf758SXuan Hu// * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 33ebdf758SXuan Hu// * Copyright (c) 2020-2021 Peng Cheng Laboratory 43ebdf758SXuan Hu// * 53ebdf758SXuan Hu// * XiangShan is licensed under Mulan PSL v2. 63ebdf758SXuan Hu// * You can use this software according to the terms and conditions of the Mulan PSL v2. 73ebdf758SXuan Hu// * You may obtain a copy of Mulan PSL v2 at: 83ebdf758SXuan Hu// * http://license.coscl.org.cn/MulanPSL2 93ebdf758SXuan Hu// * 103ebdf758SXuan Hu// * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 113ebdf758SXuan Hu// * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 123ebdf758SXuan Hu// * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 133ebdf758SXuan Hu// * 143ebdf758SXuan Hu// * See the Mulan PSL v2 for more details. 153ebdf758SXuan Hu// **************************************************************************************** 163ebdf758SXuan Hu// */ 173ebdf758SXuan Hu// 183ebdf758SXuan Hu// 193ebdf758SXuan Hu//package xiangshan.backend.fu.vector 203ebdf758SXuan Hu// 21*83ba63b3SXuan Hu//import org.chipsalliance.cde.config.Parameters 223ebdf758SXuan Hu//import chisel3.{Mux, _} 233ebdf758SXuan Hu//import chisel3.util._ 243ebdf758SXuan Hu//import utils._ 253ebdf758SXuan Hu//import utility._ 263ebdf758SXuan Hu//import yunsuan.vector.{VectorFloatAdder,VectorFloatFMA,VectorFloatDivider} 273ebdf758SXuan Hu//import yunsuan.VfpuType 283ebdf758SXuan Hu//import xiangshan.{FuType, XSCoreParamsKey} 293ebdf758SXuan Hu// 303ebdf758SXuan Hu//class VFPU(implicit p: Parameters) extends VPUSubModule(p(XSCoreParamsKey).VLEN) { 313ebdf758SXuan Hu// XSError(io.in.valid && io.in.bits.uop.ctrl.fuOpType === VfpuType.dummy, "VFPU OpType not supported") 323ebdf758SXuan Hu// XSError(io.in.valid && (io.in.bits.uop.ctrl.vconfig.vtype.vsew === 0.U), "8 bits not supported in VFPU") 333ebdf758SXuan Hu// override val dataModule = Seq( 343ebdf758SXuan Hu// Module(new VfaluWrapper), 353ebdf758SXuan Hu// Module(new VfmaccWrapper), 363ebdf758SXuan Hu// Module(new VfdivWrapper) 373ebdf758SXuan Hu// ) 383ebdf758SXuan Hu// val select0 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfalu 393ebdf758SXuan Hu// val select1 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfmacc 403ebdf758SXuan Hu// val select2 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfdiv 413ebdf758SXuan Hu// override val select = Seq( 423ebdf758SXuan Hu// io.in.bits.uop.ctrl.fuType === FuType.vfpu && select0, 433ebdf758SXuan Hu// io.in.bits.uop.ctrl.fuType === FuType.vfpu && select1, 443ebdf758SXuan Hu// io.in.bits.uop.ctrl.fuType === FuType.vfpu && select2 453ebdf758SXuan Hu// ) 463ebdf758SXuan Hu// connectDataModule 473ebdf758SXuan Hu//} 483ebdf758SXuan Hu// 493ebdf758SXuan Hu//class VfdivWrapper(implicit p: Parameters) extends VPUDataModule{ 503ebdf758SXuan Hu// needReverse := false.B 513ebdf758SXuan Hu// needClearMask := false.B 523ebdf758SXuan Hu// 533ebdf758SXuan Hu// val Latency = List(5, 7, 12) 543ebdf758SXuan Hu// val AdderWidth = XLEN 553ebdf758SXuan Hu// val NumAdder = VLEN / XLEN 563ebdf758SXuan Hu// 573ebdf758SXuan Hu// // TODO: Place these logic within the functional unit 583ebdf758SXuan Hu// val inHs = io.in.fire() 593ebdf758SXuan Hu// val s0_mask = DataHoldBypass(in.src(3), inHs) 603ebdf758SXuan Hu// val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1,0), inHs) 613ebdf758SXuan Hu// val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs) 623ebdf758SXuan Hu// 633ebdf758SXuan Hu// val vfdiv = Seq.fill(NumAdder)(Module(new VectorFloatDivider())) 643ebdf758SXuan Hu// for (i <- 0 until NumAdder) { 653ebdf758SXuan Hu// vfdiv(i).io.opb_i := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 663ebdf758SXuan Hu// vfdiv(i).io.opa_i := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 673ebdf758SXuan Hu// vfdiv(i).io.is_vec_i := true.B // If you can enter, it must be vector 683ebdf758SXuan Hu// vfdiv(i).io.frs2_i := in.src(0)(63,0) // f[rs2] 693ebdf758SXuan Hu// vfdiv(i).io.frs1_i := in.src(1)(63,0) // f[rs1] 703ebdf758SXuan Hu// vfdiv(i).io.is_frs1_i := false.B // if true, vs2 / f[rs1] 713ebdf758SXuan Hu// vfdiv(i).io.is_frs2_i := false.B // if true, f[rs2] / vs1 723ebdf758SXuan Hu// vfdiv(i).io.is_sqrt_i := false.B // must false, not support sqrt now 733ebdf758SXuan Hu// vfdiv(i).io.rm_i := rm 743ebdf758SXuan Hu// vfdiv(i).io.fp_format_i := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W)) 753ebdf758SXuan Hu// vfdiv(i).io.start_valid_i := io.in.valid 763ebdf758SXuan Hu// vfdiv(i).io.finish_ready_i := io.out.ready 773ebdf758SXuan Hu// vfdiv(i).io.flush_i := false.B // TODO 783ebdf758SXuan Hu// } 793ebdf758SXuan Hu// 80*83ba63b3SXuan Hu// val s4_fflagsVec = VecInit(vfdiv.map(_.io.fflags_o)).asUInt 813ebdf758SXuan Hu// val s4_fflags16vl = fflagsGen(s0_mask, s4_fflagsVec, List.range(0, 8)) 823ebdf758SXuan Hu// val s4_fflags32vl = fflagsGen(s0_mask, s4_fflagsVec, List(0, 1, 4, 5)) 833ebdf758SXuan Hu// val s4_fflags64vl = fflagsGen(s0_mask, s4_fflagsVec, List(0, 4)) 843ebdf758SXuan Hu// val s4_fflags = LookupTree(s0_sew(1, 0), List( 853ebdf758SXuan Hu// "b01".U -> Mux(s0_vl.orR, s4_fflags16vl(s0_vl - 1.U), 0.U(5.W)), 863ebdf758SXuan Hu// "b10".U -> Mux(s0_vl.orR, s4_fflags32vl(s0_vl - 1.U), 0.U(5.W)), 873ebdf758SXuan Hu// "b11".U -> Mux(s0_vl.orR, s4_fflags64vl(s0_vl - 1.U), 0.U(5.W)), 883ebdf758SXuan Hu// )) 89*83ba63b3SXuan Hu// val s4_result = VecInit(vfdiv.map(_.io.fpdiv_res_o)).asUInt 903ebdf758SXuan Hu// 913ebdf758SXuan Hu// io.out.bits.data := s4_result 923ebdf758SXuan Hu// fflags := s4_fflags 93*83ba63b3SXuan Hu// io.in.ready := VecInit(vfdiv.map(_.io.start_ready_o)).asUInt.andR() 94*83ba63b3SXuan Hu// io.out.valid := VecInit(vfdiv.map(_.io.finish_valid_o)).asUInt.andR() 953ebdf758SXuan Hu//} 963ebdf758SXuan Hu// 973ebdf758SXuan Hu//class VfmaccWrapper(implicit p: Parameters) extends VPUDataModule{ 983ebdf758SXuan Hu// needReverse := false.B 993ebdf758SXuan Hu// needClearMask := false.B 1003ebdf758SXuan Hu// 1013ebdf758SXuan Hu// val Latency = 3 1023ebdf758SXuan Hu// val AdderWidth = XLEN 1033ebdf758SXuan Hu// val NumAdder = VLEN / XLEN 1043ebdf758SXuan Hu// 1053ebdf758SXuan Hu// val inHs = io.in.fire() 1063ebdf758SXuan Hu// 1073ebdf758SXuan Hu// val validPipe = Seq.fill(Latency)(RegInit(false.B)) 1083ebdf758SXuan Hu// validPipe.zipWithIndex.foreach { 1093ebdf758SXuan Hu// case (valid, idx) => 1103ebdf758SXuan Hu// val _valid = if (idx == 0) Mux(inHs, true.B, false.B) else validPipe(idx - 1) 1113ebdf758SXuan Hu// valid := _valid 1123ebdf758SXuan Hu// } 1133ebdf758SXuan Hu// // TODO: Place these logic within the functional unit 1143ebdf758SXuan Hu// val s0_mask = DataHoldBypass(in.src(3), inHs) 1153ebdf758SXuan Hu// val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1, 0), inHs) 1163ebdf758SXuan Hu// val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs) 1173ebdf758SXuan Hu// 1183ebdf758SXuan Hu// val vfmacc = Seq.fill(NumAdder)(Module(new VectorFloatFMA())) 1193ebdf758SXuan Hu// for (i <- 0 until NumAdder) { 1203ebdf758SXuan Hu// vfmacc(i).io.fp_a := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 1213ebdf758SXuan Hu// vfmacc(i).io.fp_b := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 1223ebdf758SXuan Hu// vfmacc(i).io.fp_c := Mux(inHs, in.src(2)(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 1233ebdf758SXuan Hu// vfmacc(i).io.widen_b := Mux(inHs, Cat(vs1((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs1((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U) 1243ebdf758SXuan Hu// vfmacc(i).io.widen_a := Mux(inHs, Cat(vs2((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs2((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U) 1253ebdf758SXuan Hu// vfmacc(i).io.frs1 := in.src(0)(63,0) 1263ebdf758SXuan Hu// vfmacc(i).io.is_frs1 := false.B // TODO: support vf inst 1273ebdf758SXuan Hu// vfmacc(i).io.uop_idx := in.uop.ctrl.uopIdx // TODO 1283ebdf758SXuan Hu// vfmacc(i).io.op_code := DontCare // TODO 1293ebdf758SXuan Hu// vfmacc(i).io.is_vec := true.B // If you can enter, it must be vector 1303ebdf758SXuan Hu// vfmacc(i).io.round_mode := rm 1313ebdf758SXuan Hu// vfmacc(i).io.fp_format := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W)) 1323ebdf758SXuan Hu// vfmacc(i).io.res_widening := false.B // TODO 1333ebdf758SXuan Hu// } 1343ebdf758SXuan Hu// 1353ebdf758SXuan Hu// // output signal generation 136*83ba63b3SXuan Hu// val s2_fflagsVec = VecInit(vfmacc.map(_.io.fflags)).asUInt 1373ebdf758SXuan Hu// val s2_fflags16vl = fflagsGen(s0_mask, s2_fflagsVec, List.range(0, 8)) 1383ebdf758SXuan Hu// val s2_fflags32vl = fflagsGen(s0_mask, s2_fflagsVec, List(0, 1, 4, 5)) 1393ebdf758SXuan Hu// val s2_fflags64vl = fflagsGen(s0_mask, s2_fflagsVec, List(0, 4)) 1403ebdf758SXuan Hu// val s2_fflags = LookupTree(s0_sew(1, 0), List( 1413ebdf758SXuan Hu// "b01".U -> Mux(s0_vl.orR, s2_fflags16vl(s0_vl - 1.U), 0.U(5.W)), 1423ebdf758SXuan Hu// "b10".U -> Mux(s0_vl.orR, s2_fflags32vl(s0_vl - 1.U), 0.U(5.W)), 1433ebdf758SXuan Hu// "b11".U -> Mux(s0_vl.orR, s2_fflags64vl(s0_vl - 1.U), 0.U(5.W)), 1443ebdf758SXuan Hu// )) 1453ebdf758SXuan Hu// 146*83ba63b3SXuan Hu// val s2_result = VecInit(vfmacc.map(_.io.fp_result)).asUInt 1473ebdf758SXuan Hu// 1483ebdf758SXuan Hu// io.out.bits.data := s2_result 1493ebdf758SXuan Hu// fflags := s2_fflags 1503ebdf758SXuan Hu// 1513ebdf758SXuan Hu// io.in.ready := true.B 1523ebdf758SXuan Hu// io.out.valid := validPipe(Latency - 1) 1533ebdf758SXuan Hu//} 1543ebdf758SXuan Hu// 1553ebdf758SXuan Hu//class VfaluWrapper(implicit p: Parameters) extends VPUDataModule{ 1563ebdf758SXuan Hu// needReverse := false.B 1573ebdf758SXuan Hu// needClearMask := false.B 1583ebdf758SXuan Hu// 1593ebdf758SXuan Hu// val Latency = 2 1603ebdf758SXuan Hu// val AdderWidth = XLEN 1613ebdf758SXuan Hu// val NumAdder = VLEN / XLEN 1623ebdf758SXuan Hu// 1633ebdf758SXuan Hu// val inHs = io.in.fire() 1643ebdf758SXuan Hu// 1653ebdf758SXuan Hu// // reg input signal 1663ebdf758SXuan Hu// val validPipe = Seq.fill(Latency)(RegInit(false.B)) 1673ebdf758SXuan Hu// validPipe.zipWithIndex.foreach { 1683ebdf758SXuan Hu// case (valid, idx) => 1693ebdf758SXuan Hu// val _valid = if (idx == 0) Mux(inHs, true.B, false.B) else validPipe(idx - 1) 1703ebdf758SXuan Hu// valid := _valid 1713ebdf758SXuan Hu// } 1723ebdf758SXuan Hu// // TODO: Place these logic within the functional unit 1733ebdf758SXuan Hu// val s0_mask = DataHoldBypass(in.src(3), inHs) 1743ebdf758SXuan Hu// val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1, 0), inHs) 1753ebdf758SXuan Hu// val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs) 1763ebdf758SXuan Hu// 1773ebdf758SXuan Hu// // connect the input signal 1783ebdf758SXuan Hu// val vfalu = Seq.fill(NumAdder)(Module(new VectorFloatAdder())) 1793ebdf758SXuan Hu// for (i <- 0 until NumAdder) { 1803ebdf758SXuan Hu// vfalu(i).io.fp_b := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 1813ebdf758SXuan Hu// vfalu(i).io.fp_a := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 1823ebdf758SXuan Hu// vfalu(i).io.widen_b := Mux(inHs, Cat(vs1((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs1((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U) 1833ebdf758SXuan Hu// vfalu(i).io.widen_a := Mux(inHs, Cat(vs2((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs2((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U) 1843ebdf758SXuan Hu// vfalu(i).io.frs1 := in.src(0)(63, 0) 1853ebdf758SXuan Hu// vfalu(i).io.is_frs1 := false.B // TODO: support vf inst 1863ebdf758SXuan Hu// vfalu(i).io.mask := 0.U //TODO 1873ebdf758SXuan Hu// vfalu(i).io.uop_idx := in.uop.ctrl.uopIdx // TODO 1883ebdf758SXuan Hu// vfalu(i).io.is_vec := true.B // If you can enter, it must be vector 1893ebdf758SXuan Hu// vfalu(i).io.round_mode := rm 1903ebdf758SXuan Hu// vfalu(i).io.fp_format := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W)) 1913ebdf758SXuan Hu// vfalu(i).io.opb_widening := false.B // TODO 1923ebdf758SXuan Hu// vfalu(i).io.res_widening := false.B // TODO 1933ebdf758SXuan Hu// vfalu(i).io.op_code := in.uop.ctrl.fuOpType 1943ebdf758SXuan Hu// } 1953ebdf758SXuan Hu// 1963ebdf758SXuan Hu// // output signal generation 197*83ba63b3SXuan Hu// val s0_fflagsVec = VecInit(vfalu.map(_.io.fflags)).asUInt 1983ebdf758SXuan Hu// val s0_fflags16vl = fflagsGen(s0_mask, s0_fflagsVec, List.range(0, 8)) 1993ebdf758SXuan Hu// val s0_fflags32vl = fflagsGen(s0_mask, s0_fflagsVec, List(0, 1, 4, 5)) 2003ebdf758SXuan Hu// val s0_fflags64vl = fflagsGen(s0_mask, s0_fflagsVec, List(0, 4)) 2013ebdf758SXuan Hu// val s0_fflags = LookupTree(s0_sew(1, 0), List( 2023ebdf758SXuan Hu// "b01".U -> Mux(s0_vl.orR, s0_fflags16vl(s0_vl - 1.U), 0.U(5.W)), 2033ebdf758SXuan Hu// "b10".U -> Mux(s0_vl.orR, s0_fflags32vl(s0_vl - 1.U), 0.U(5.W)), 2043ebdf758SXuan Hu// "b11".U -> Mux(s0_vl.orR, s0_fflags64vl(s0_vl - 1.U), 0.U(5.W)), 2053ebdf758SXuan Hu// )) 2063ebdf758SXuan Hu// val s1_fflags = RegEnable(s0_fflags, validPipe(Latency-2)) 207*83ba63b3SXuan Hu// val s0_result = VecInit(vfalu.map(_.io.fp_result)).asUInt 2083ebdf758SXuan Hu// val s1_result = RegEnable(s0_result, validPipe(Latency-2)) 2093ebdf758SXuan Hu// 2103ebdf758SXuan Hu// fflags := s1_fflags 2113ebdf758SXuan Hu// io.out.bits.data := s1_result 2123ebdf758SXuan Hu// 2133ebdf758SXuan Hu// io.in.ready := true.B 2143ebdf758SXuan Hu// io.out.valid := validPipe(Latency-1) 2153ebdf758SXuan Hu//} 2163ebdf758SXuan Hu// 2173ebdf758SXuan Hu//object fflagsGen{ 2183ebdf758SXuan Hu// def fflagsGen(vmask: UInt, fflagsResult:UInt, idx:List[Int] = List(0, 1, 4, 5)): Vec[UInt] = { 2193ebdf758SXuan Hu// var num = idx.length 2203ebdf758SXuan Hu// val fflags = Seq.fill(num)(Wire(UInt(5.W))) 2213ebdf758SXuan Hu// fflags.zip(vmask(num-1, 0).asBools().reverse).zip(idx).foreach { 2223ebdf758SXuan Hu// case ((fflags0, mask), id) => 2233ebdf758SXuan Hu// fflags0 := Mux(mask, fflagsResult(id*5+4,id*5+0), 0.U) 2243ebdf758SXuan Hu// } 2253ebdf758SXuan Hu// val fflagsVl = Wire(Vec(num,UInt(5.W))) 2263ebdf758SXuan Hu// for (i <- 0 until num) { 2273ebdf758SXuan Hu// val _fflags = if (i == 0) fflags(i) else (fflagsVl(i - 1) | fflags(i)) 2283ebdf758SXuan Hu// fflagsVl(i) := _fflags 2293ebdf758SXuan Hu// } 2303ebdf758SXuan Hu// fflagsVl 2313ebdf758SXuan Hu// } 2323ebdf758SXuan Hu// 2333ebdf758SXuan Hu// def apply(vmask: UInt, fflagsResult:UInt, idx:List[Int] = List(0, 1, 4, 5)): Vec[UInt] = { 2343ebdf758SXuan Hu// fflagsGen(vmask, fflagsResult, idx) 2353ebdf758SXuan Hu// } 2363ebdf758SXuan Hu//}