1cafb3558SLinJiaweipackage xiangshan.backend.fu 2cafb3558SLinJiawei 3cafb3558SLinJiaweiimport chisel3._ 4cafb3558SLinJiaweiimport chisel3.util._ 5cafb3558SLinJiaweiimport xiangshan._ 6b9fd1892SLinJiaweiimport utils._ 7cafb3558SLinJiaweiimport xiangshan.backend._ 8cafb3558SLinJiaweiimport xiangshan.backend.fu.FunctionUnit._ 9cafb3558SLinJiawei 10cafb3558SLinJiaweiclass MulDivCtrl extends Bundle{ 11cafb3558SLinJiawei val sign = Bool() 12cafb3558SLinJiawei val isW = Bool() 13cafb3558SLinJiawei val isHi = Bool() // return hi bits of result ? 14cafb3558SLinJiawei} 15cafb3558SLinJiawei 16cafb3558SLinJiaweiclass MulDivOutput extends XSBundle { 17cafb3558SLinJiawei val data = UInt(XLEN.W) 18cafb3558SLinJiawei val uop = new MicroOp 19cafb3558SLinJiawei} 20cafb3558SLinJiawei 21cafb3558SLinJiaweiclass MulDivIO(val len: Int) extends XSBundle { 22cafb3558SLinJiawei val in = Flipped(DecoupledIO(new Bundle() { 23cafb3558SLinJiawei val src1, src2 = UInt(len.W) 24cafb3558SLinJiawei val ctrl = new MulDivCtrl 25cafb3558SLinJiawei })) 26cafb3558SLinJiawei val out = DecoupledIO(new MulDivOutput) 27cafb3558SLinJiawei val redirect = Flipped(ValidIO(new Redirect)) 28cafb3558SLinJiawei} 29cafb3558SLinJiawei 30cafb3558SLinJiaweiabstract class Multiplier 31cafb3558SLinJiawei( 32cafb3558SLinJiawei val len: Int, 33*12bb47ddSLinJiawei latency: Int = 3 34*12bb47ddSLinJiawei) extends FunctionUnit(cfg = mulCfg, extIn = new MulDivCtrl, latency = latency) 35*12bb47ddSLinJiawei with HasPipelineReg[MulDivCtrl, Null] 36*12bb47ddSLinJiawei{ 373142d695SLinJiawei 383142d695SLinJiawei val (src1, src2) = (io.in.bits.src(0), io.in.bits.src(1)) 393142d695SLinJiawei 40cafb3558SLinJiawei} 41cafb3558SLinJiawei 423142d695SLinJiawei//trait HasPipelineReg { this: ArrayMultiplier => 433142d695SLinJiawei// 443142d695SLinJiawei// val validVec = io.in.valid +: Array.fill(latency)(RegInit(false.B)) 453142d695SLinJiawei// val rdyVec = Array.fill(latency)(Wire(Bool())) :+ io.out.ready 463142d695SLinJiawei// val ctrlVec = io.in.bits.ctrl +: Array.fill(latency)(Reg(new MulDivCtrl)) 473142d695SLinJiawei// val flushVec = ctrlVec.zip(validVec).map(x => x._2 && x._1.uop.needFlush(io.redirect)) 483142d695SLinJiawei// 493142d695SLinJiawei// for(i <- 0 until latency){ 503142d695SLinJiawei// rdyVec(i) := !validVec(i+1) || rdyVec(i+1) 513142d695SLinJiawei// } 523142d695SLinJiawei// 533142d695SLinJiawei// for(i <- 1 to latency){ 543142d695SLinJiawei// when(flushVec(i-1) || rdyVec(i) && !validVec(i-1)){ 553142d695SLinJiawei// validVec(i) := false.B 563142d695SLinJiawei// }.elsewhen(rdyVec(i-1) && validVec(i-1) && !flushVec(i-1)){ 573142d695SLinJiawei// validVec(i) := validVec(i-1) 583142d695SLinJiawei// ctrlVec(i) := ctrlVec(i-1) 593142d695SLinJiawei// } 603142d695SLinJiawei// } 613142d695SLinJiawei// 623142d695SLinJiawei// io.in.ready := rdyVec(0) 633142d695SLinJiawei// io.out.valid := validVec.last && !flushVec.last 643142d695SLinJiawei// io.out.bits.uop := ctrlVec.last.uop 653142d695SLinJiawei// 663142d695SLinJiawei// def PipelineReg[T<:Data](i: Int)(next: T) = RegEnable( 673142d695SLinJiawei// next, 683142d695SLinJiawei// enable = validVec(i-1) && rdyVec(i-1) && !flushVec(i-1) 693142d695SLinJiawei// ) 703142d695SLinJiawei// 713142d695SLinJiawei// def S1Reg[T<:Data](next: T):T = PipelineReg[T](1)(next) 723142d695SLinJiawei// def S2Reg[T<:Data](next: T):T = PipelineReg[T](2)(next) 733142d695SLinJiawei// def S3Reg[T<:Data](next: T):T = PipelineReg[T](3)(next) 743142d695SLinJiawei// def S4Reg[T<:Data](next: T):T = PipelineReg[T](4)(next) 753142d695SLinJiawei// def S5Reg[T<:Data](next: T):T = PipelineReg[T](5)(next) 763142d695SLinJiawei//} 77cafb3558SLinJiawei 78cafb3558SLinJiaweiclass ArrayMultiplier 79cafb3558SLinJiawei( 80cafb3558SLinJiawei len: Int, 81cafb3558SLinJiawei latency: Int = 3, 82cafb3558SLinJiawei realArray: Boolean = false 833142d695SLinJiawei) extends Multiplier(len, latency) { 84cafb3558SLinJiawei 853142d695SLinJiawei val mulRes = src1.asSInt() * src2.asSInt() 86cafb3558SLinJiawei 87cafb3558SLinJiawei var dataVec = Seq(mulRes.asUInt()) 883142d695SLinJiawei var ctrlVec = Seq(io.in.bits.ext.get) 89cafb3558SLinJiawei 90cafb3558SLinJiawei for(i <- 1 to latency){ 91cafb3558SLinJiawei dataVec = dataVec :+ PipelineReg(i)(dataVec(i-1)) 923142d695SLinJiawei ctrlVec = ctrlVec :+ PipelineReg(i)(ctrlVec(i-1)) 93cafb3558SLinJiawei } 94cafb3558SLinJiawei 95cafb3558SLinJiawei val xlen = io.out.bits.data.getWidth 96cafb3558SLinJiawei val res = Mux(ctrlVec.last.isHi, dataVec.last(2*xlen-1, xlen), dataVec.last(xlen-1,0)) 97cafb3558SLinJiawei io.out.bits.data := Mux(ctrlVec.last.isW, SignExt(res(31,0),xlen), res) 98cafb3558SLinJiawei 99cafb3558SLinJiawei XSDebug(p"validVec:${Binary(Cat(validVec))} flushVec:${Binary(Cat(flushVec))}\n")(this.name) 100cafb3558SLinJiawei 101cafb3558SLinJiawei // printf(p"t=${GTimer()} in: v${io.in.valid} r:${io.in.ready}\n") 102cafb3558SLinJiawei // printf(p"t=${GTimer()} out: v:${io.out.valid} r:${io.out.ready} vec:${Binary(Cat(validVec))}\n") 103cafb3558SLinJiawei}