1///**************************************************************************************** 2// * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3// * Copyright (c) 2020-2021 Peng Cheng Laboratory 4// * 5// * XiangShan is licensed under Mulan PSL v2. 6// * You can use this software according to the terms and conditions of the Mulan PSL v2. 7// * You may obtain a copy of Mulan PSL v2 at: 8// * http://license.coscl.org.cn/MulanPSL2 9// * 10// * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11// * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12// * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13// * 14// * See the Mulan PSL v2 for more details. 15// **************************************************************************************** 16// */ 17// 18// 19//package xiangshan.backend.fu.vector 20// 21//import org.chipsalliance.cde.config.Parameters 22//import chisel3.{Mux, _} 23//import chisel3.util._ 24//import utils._ 25//import utility._ 26//import yunsuan.vector.{VectorFloatAdder,VectorFloatFMA,VectorFloatDivider} 27//import yunsuan.VfpuType 28//import xiangshan.{FuType, XSCoreParamsKey} 29// 30//class VFPU(implicit p: Parameters) extends VPUSubModule(p(XSCoreParamsKey).VLEN) { 31// XSError(io.in.valid && io.in.bits.uop.ctrl.fuOpType === VfpuType.dummy, "VFPU OpType not supported") 32// XSError(io.in.valid && (io.in.bits.uop.ctrl.vconfig.vtype.vsew === 0.U), "8 bits not supported in VFPU") 33// override val dataModule = Seq( 34// Module(new VfaluWrapper), 35// Module(new VfmaccWrapper), 36// Module(new VfdivWrapper) 37// ) 38// val select0 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfalu 39// val select1 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfmacc 40// val select2 = io.in.bits.uop.ctrl.fuOpType === VfpuType.isVfdiv 41// override val select = Seq( 42// io.in.bits.uop.ctrl.fuType === FuType.vfpu && select0, 43// io.in.bits.uop.ctrl.fuType === FuType.vfpu && select1, 44// io.in.bits.uop.ctrl.fuType === FuType.vfpu && select2 45// ) 46// connectDataModule 47//} 48// 49//class VfdivWrapper(implicit p: Parameters) extends VPUDataModule{ 50// needReverse := false.B 51// needClearMask := false.B 52// 53// val Latency = List(5, 7, 12) 54// val AdderWidth = XLEN 55// val NumAdder = VLEN / XLEN 56// 57// // TODO: Place these logic within the functional unit 58// val inHs = io.in.fire() 59// val s0_mask = DataHoldBypass(in.src(3), inHs) 60// val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1,0), inHs) 61// val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs) 62// 63// val vfdiv = Seq.fill(NumAdder)(Module(new VectorFloatDivider())) 64// for (i <- 0 until NumAdder) { 65// vfdiv(i).io.opb_i := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 66// vfdiv(i).io.opa_i := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 67// vfdiv(i).io.is_vec_i := true.B // If you can enter, it must be vector 68// vfdiv(i).io.frs2_i := in.src(0)(63,0) // f[rs2] 69// vfdiv(i).io.frs1_i := in.src(1)(63,0) // f[rs1] 70// vfdiv(i).io.is_frs1_i := false.B // if true, vs2 / f[rs1] 71// vfdiv(i).io.is_frs2_i := false.B // if true, f[rs2] / vs1 72// vfdiv(i).io.is_sqrt_i := false.B // must false, not support sqrt now 73// vfdiv(i).io.rm_i := rm 74// vfdiv(i).io.fp_format_i := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W)) 75// vfdiv(i).io.start_valid_i := io.in.valid 76// vfdiv(i).io.finish_ready_i := io.out.ready 77// vfdiv(i).io.flush_i := false.B // TODO 78// } 79// 80// val s4_fflagsVec = VecInit(vfdiv.map(_.io.fflags_o)).asUInt 81// val s4_fflags16vl = fflagsGen(s0_mask, s4_fflagsVec, List.range(0, 8)) 82// val s4_fflags32vl = fflagsGen(s0_mask, s4_fflagsVec, List(0, 1, 4, 5)) 83// val s4_fflags64vl = fflagsGen(s0_mask, s4_fflagsVec, List(0, 4)) 84// val s4_fflags = LookupTree(s0_sew(1, 0), List( 85// "b01".U -> Mux(s0_vl.orR, s4_fflags16vl(s0_vl - 1.U), 0.U(5.W)), 86// "b10".U -> Mux(s0_vl.orR, s4_fflags32vl(s0_vl - 1.U), 0.U(5.W)), 87// "b11".U -> Mux(s0_vl.orR, s4_fflags64vl(s0_vl - 1.U), 0.U(5.W)), 88// )) 89// val s4_result = VecInit(vfdiv.map(_.io.fpdiv_res_o)).asUInt 90// 91// io.out.bits.data := s4_result 92// fflags := s4_fflags 93// io.in.ready := VecInit(vfdiv.map(_.io.start_ready_o)).asUInt.andR() 94// io.out.valid := VecInit(vfdiv.map(_.io.finish_valid_o)).asUInt.andR() 95//} 96// 97//class VfmaccWrapper(implicit p: Parameters) extends VPUDataModule{ 98// needReverse := false.B 99// needClearMask := false.B 100// 101// val Latency = 3 102// val AdderWidth = XLEN 103// val NumAdder = VLEN / XLEN 104// 105// val inHs = io.in.fire() 106// 107// val validPipe = Seq.fill(Latency)(RegInit(false.B)) 108// validPipe.zipWithIndex.foreach { 109// case (valid, idx) => 110// val _valid = if (idx == 0) Mux(inHs, true.B, false.B) else validPipe(idx - 1) 111// valid := _valid 112// } 113// // TODO: Place these logic within the functional unit 114// val s0_mask = DataHoldBypass(in.src(3), inHs) 115// val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1, 0), inHs) 116// val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs) 117// 118// val vfmacc = Seq.fill(NumAdder)(Module(new VectorFloatFMA())) 119// for (i <- 0 until NumAdder) { 120// vfmacc(i).io.fp_a := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 121// vfmacc(i).io.fp_b := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 122// vfmacc(i).io.fp_c := Mux(inHs, in.src(2)(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 123// vfmacc(i).io.widen_b := Mux(inHs, Cat(vs1((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs1((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U) 124// vfmacc(i).io.widen_a := Mux(inHs, Cat(vs2((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs2((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U) 125// vfmacc(i).io.frs1 := in.src(0)(63,0) 126// vfmacc(i).io.is_frs1 := false.B // TODO: support vf inst 127// vfmacc(i).io.uop_idx := in.uop.ctrl.uopIdx // TODO 128// vfmacc(i).io.op_code := DontCare // TODO 129// vfmacc(i).io.is_vec := true.B // If you can enter, it must be vector 130// vfmacc(i).io.round_mode := rm 131// vfmacc(i).io.fp_format := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W)) 132// vfmacc(i).io.res_widening := false.B // TODO 133// } 134// 135// // output signal generation 136// val s2_fflagsVec = VecInit(vfmacc.map(_.io.fflags)).asUInt 137// val s2_fflags16vl = fflagsGen(s0_mask, s2_fflagsVec, List.range(0, 8)) 138// val s2_fflags32vl = fflagsGen(s0_mask, s2_fflagsVec, List(0, 1, 4, 5)) 139// val s2_fflags64vl = fflagsGen(s0_mask, s2_fflagsVec, List(0, 4)) 140// val s2_fflags = LookupTree(s0_sew(1, 0), List( 141// "b01".U -> Mux(s0_vl.orR, s2_fflags16vl(s0_vl - 1.U), 0.U(5.W)), 142// "b10".U -> Mux(s0_vl.orR, s2_fflags32vl(s0_vl - 1.U), 0.U(5.W)), 143// "b11".U -> Mux(s0_vl.orR, s2_fflags64vl(s0_vl - 1.U), 0.U(5.W)), 144// )) 145// 146// val s2_result = VecInit(vfmacc.map(_.io.fp_result)).asUInt 147// 148// io.out.bits.data := s2_result 149// fflags := s2_fflags 150// 151// io.in.ready := true.B 152// io.out.valid := validPipe(Latency - 1) 153//} 154// 155//class VfaluWrapper(implicit p: Parameters) extends VPUDataModule{ 156// needReverse := false.B 157// needClearMask := false.B 158// 159// val Latency = 2 160// val AdderWidth = XLEN 161// val NumAdder = VLEN / XLEN 162// 163// val inHs = io.in.fire() 164// 165// // reg input signal 166// val validPipe = Seq.fill(Latency)(RegInit(false.B)) 167// validPipe.zipWithIndex.foreach { 168// case (valid, idx) => 169// val _valid = if (idx == 0) Mux(inHs, true.B, false.B) else validPipe(idx - 1) 170// valid := _valid 171// } 172// // TODO: Place these logic within the functional unit 173// val s0_mask = DataHoldBypass(in.src(3), inHs) 174// val s0_sew = DataHoldBypass(in.uop.ctrl.vconfig.vtype.vsew(1, 0), inHs) 175// val s0_vl = DataHoldBypass(in.uop.ctrl.vconfig.vl, inHs) 176// 177// // connect the input signal 178// val vfalu = Seq.fill(NumAdder)(Module(new VectorFloatAdder())) 179// for (i <- 0 until NumAdder) { 180// vfalu(i).io.fp_b := Mux(inHs, vs1(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 181// vfalu(i).io.fp_a := Mux(inHs, vs2(AdderWidth * (i + 1) - 1, AdderWidth * i), 0.U) 182// vfalu(i).io.widen_b := Mux(inHs, Cat(vs1((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs1((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U) 183// vfalu(i).io.widen_a := Mux(inHs, Cat(vs2((AdderWidth / 2) * (i + 3) - 1, (AdderWidth / 2) * (i + 2)), vs2((AdderWidth / 2) * (i + 1) - 1, (AdderWidth / 2) * i)), 0.U) 184// vfalu(i).io.frs1 := in.src(0)(63, 0) 185// vfalu(i).io.is_frs1 := false.B // TODO: support vf inst 186// vfalu(i).io.mask := 0.U //TODO 187// vfalu(i).io.uop_idx := in.uop.ctrl.uopIdx // TODO 188// vfalu(i).io.is_vec := true.B // If you can enter, it must be vector 189// vfalu(i).io.round_mode := rm 190// vfalu(i).io.fp_format := Mux(inHs, in.uop.ctrl.vconfig.vtype.vsew(1,0), 3.U(2.W)) 191// vfalu(i).io.opb_widening := false.B // TODO 192// vfalu(i).io.res_widening := false.B // TODO 193// vfalu(i).io.op_code := in.uop.ctrl.fuOpType 194// } 195// 196// // output signal generation 197// val s0_fflagsVec = VecInit(vfalu.map(_.io.fflags)).asUInt 198// val s0_fflags16vl = fflagsGen(s0_mask, s0_fflagsVec, List.range(0, 8)) 199// val s0_fflags32vl = fflagsGen(s0_mask, s0_fflagsVec, List(0, 1, 4, 5)) 200// val s0_fflags64vl = fflagsGen(s0_mask, s0_fflagsVec, List(0, 4)) 201// val s0_fflags = LookupTree(s0_sew(1, 0), List( 202// "b01".U -> Mux(s0_vl.orR, s0_fflags16vl(s0_vl - 1.U), 0.U(5.W)), 203// "b10".U -> Mux(s0_vl.orR, s0_fflags32vl(s0_vl - 1.U), 0.U(5.W)), 204// "b11".U -> Mux(s0_vl.orR, s0_fflags64vl(s0_vl - 1.U), 0.U(5.W)), 205// )) 206// val s1_fflags = RegEnable(s0_fflags, validPipe(Latency-2)) 207// val s0_result = VecInit(vfalu.map(_.io.fp_result)).asUInt 208// val s1_result = RegEnable(s0_result, validPipe(Latency-2)) 209// 210// fflags := s1_fflags 211// io.out.bits.data := s1_result 212// 213// io.in.ready := true.B 214// io.out.valid := validPipe(Latency-1) 215//} 216// 217//object fflagsGen{ 218// def fflagsGen(vmask: UInt, fflagsResult:UInt, idx:List[Int] = List(0, 1, 4, 5)): Vec[UInt] = { 219// var num = idx.length 220// val fflags = Seq.fill(num)(Wire(UInt(5.W))) 221// fflags.zip(vmask(num-1, 0).asBools().reverse).zip(idx).foreach { 222// case ((fflags0, mask), id) => 223// fflags0 := Mux(mask, fflagsResult(id*5+4,id*5+0), 0.U) 224// } 225// val fflagsVl = Wire(Vec(num,UInt(5.W))) 226// for (i <- 0 until num) { 227// val _fflags = if (i == 0) fflags(i) else (fflagsVl(i - 1) | fflags(i)) 228// fflagsVl(i) := _fflags 229// } 230// fflagsVl 231// } 232// 233// def apply(vmask: UInt, fflagsResult:UInt, idx:List[Int] = List(0, 1, 4, 5)): Vec[UInt] = { 234// fflagsGen(vmask, fflagsResult, idx) 235// } 236//}