1cafb3558SLinJiaweipackage xiangshan.backend.fu 2cafb3558SLinJiawei 3cafb3558SLinJiaweiimport chisel3._ 4cafb3558SLinJiaweiimport chisel3.util._ 5cafb3558SLinJiaweiimport xiangshan._ 6b9fd1892SLinJiaweiimport utils._ 7cafb3558SLinJiaweiimport xiangshan.backend._ 8cafb3558SLinJiaweiimport xiangshan.backend.fu.FunctionUnit._ 9cafb3558SLinJiawei 10cafb3558SLinJiaweiclass MulDivCtrl extends Bundle{ 11cafb3558SLinJiawei val sign = Bool() 12cafb3558SLinJiawei val isW = Bool() 13cafb3558SLinJiawei val isHi = Bool() // return hi bits of result ? 14cafb3558SLinJiawei} 15cafb3558SLinJiawei 16cafb3558SLinJiaweiclass MulDivOutput extends XSBundle { 17cafb3558SLinJiawei val data = UInt(XLEN.W) 18cafb3558SLinJiawei val uop = new MicroOp 19cafb3558SLinJiawei} 20cafb3558SLinJiawei 21cafb3558SLinJiaweiclass MulDivIO(val len: Int) extends XSBundle { 22cafb3558SLinJiawei val in = Flipped(DecoupledIO(new Bundle() { 23cafb3558SLinJiawei val src1, src2 = UInt(len.W) 24cafb3558SLinJiawei val ctrl = new MulDivCtrl 25cafb3558SLinJiawei })) 26cafb3558SLinJiawei val out = DecoupledIO(new MulDivOutput) 27cafb3558SLinJiawei val redirect = Flipped(ValidIO(new Redirect)) 28cafb3558SLinJiawei} 29cafb3558SLinJiawei 30cafb3558SLinJiaweiabstract class Multiplier 31cafb3558SLinJiawei( 32cafb3558SLinJiawei val len: Int, 33cafb3558SLinJiawei val latency: Int = 3 34*3142d695SLinJiawei) extends PipelinedFunctionUnit(mulCfg, len, latency, new MulDivCtrl) { 35*3142d695SLinJiawei// val io = IO(new MulDivIO(len)) 36*3142d695SLinJiawei 37*3142d695SLinJiawei val (src1, src2) = (io.in.bits.src(0), io.in.bits.src(1)) 38*3142d695SLinJiawei 39cafb3558SLinJiawei} 40cafb3558SLinJiawei 41*3142d695SLinJiawei//trait HasPipelineReg { this: ArrayMultiplier => 42*3142d695SLinJiawei// 43*3142d695SLinJiawei// val validVec = io.in.valid +: Array.fill(latency)(RegInit(false.B)) 44*3142d695SLinJiawei// val rdyVec = Array.fill(latency)(Wire(Bool())) :+ io.out.ready 45*3142d695SLinJiawei// val ctrlVec = io.in.bits.ctrl +: Array.fill(latency)(Reg(new MulDivCtrl)) 46*3142d695SLinJiawei// val flushVec = ctrlVec.zip(validVec).map(x => x._2 && x._1.uop.needFlush(io.redirect)) 47*3142d695SLinJiawei// 48*3142d695SLinJiawei// for(i <- 0 until latency){ 49*3142d695SLinJiawei// rdyVec(i) := !validVec(i+1) || rdyVec(i+1) 50*3142d695SLinJiawei// } 51*3142d695SLinJiawei// 52*3142d695SLinJiawei// for(i <- 1 to latency){ 53*3142d695SLinJiawei// when(flushVec(i-1) || rdyVec(i) && !validVec(i-1)){ 54*3142d695SLinJiawei// validVec(i) := false.B 55*3142d695SLinJiawei// }.elsewhen(rdyVec(i-1) && validVec(i-1) && !flushVec(i-1)){ 56*3142d695SLinJiawei// validVec(i) := validVec(i-1) 57*3142d695SLinJiawei// ctrlVec(i) := ctrlVec(i-1) 58*3142d695SLinJiawei// } 59*3142d695SLinJiawei// } 60*3142d695SLinJiawei// 61*3142d695SLinJiawei// io.in.ready := rdyVec(0) 62*3142d695SLinJiawei// io.out.valid := validVec.last && !flushVec.last 63*3142d695SLinJiawei// io.out.bits.uop := ctrlVec.last.uop 64*3142d695SLinJiawei// 65*3142d695SLinJiawei// def PipelineReg[T<:Data](i: Int)(next: T) = RegEnable( 66*3142d695SLinJiawei// next, 67*3142d695SLinJiawei// enable = validVec(i-1) && rdyVec(i-1) && !flushVec(i-1) 68*3142d695SLinJiawei// ) 69*3142d695SLinJiawei// 70*3142d695SLinJiawei// def S1Reg[T<:Data](next: T):T = PipelineReg[T](1)(next) 71*3142d695SLinJiawei// def S2Reg[T<:Data](next: T):T = PipelineReg[T](2)(next) 72*3142d695SLinJiawei// def S3Reg[T<:Data](next: T):T = PipelineReg[T](3)(next) 73*3142d695SLinJiawei// def S4Reg[T<:Data](next: T):T = PipelineReg[T](4)(next) 74*3142d695SLinJiawei// def S5Reg[T<:Data](next: T):T = PipelineReg[T](5)(next) 75*3142d695SLinJiawei//} 76cafb3558SLinJiawei 77cafb3558SLinJiaweiclass ArrayMultiplier 78cafb3558SLinJiawei( 79cafb3558SLinJiawei len: Int, 80cafb3558SLinJiawei latency: Int = 3, 81cafb3558SLinJiawei realArray: Boolean = false 82*3142d695SLinJiawei) extends Multiplier(len, latency) { 83cafb3558SLinJiawei 84*3142d695SLinJiawei val mulRes = src1.asSInt() * src2.asSInt() 85cafb3558SLinJiawei 86cafb3558SLinJiawei var dataVec = Seq(mulRes.asUInt()) 87*3142d695SLinJiawei var ctrlVec = Seq(io.in.bits.ext.get) 88cafb3558SLinJiawei 89cafb3558SLinJiawei for(i <- 1 to latency){ 90cafb3558SLinJiawei dataVec = dataVec :+ PipelineReg(i)(dataVec(i-1)) 91*3142d695SLinJiawei ctrlVec = ctrlVec :+ PipelineReg(i)(ctrlVec(i-1)) 92cafb3558SLinJiawei } 93cafb3558SLinJiawei 94cafb3558SLinJiawei val xlen = io.out.bits.data.getWidth 95cafb3558SLinJiawei val res = Mux(ctrlVec.last.isHi, dataVec.last(2*xlen-1, xlen), dataVec.last(xlen-1,0)) 96cafb3558SLinJiawei io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31,0),xlen), res) 97cafb3558SLinJiawei 98cafb3558SLinJiawei XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n")(this.name) 99cafb3558SLinJiawei 100cafb3558SLinJiawei // printf(p"t=${GTimer()} in: v${io.in.valid} r:${io.in.ready}\n") 101cafb3558SLinJiawei // printf(p"t=${GTimer()} out: v:${io.out.valid} r:${io.out.ready} vec:${Binary(Cat(validVec))}\n") 102cafb3558SLinJiawei}