1*5018a303SLinJiaweipackage xiangshan.backend.fu 2*5018a303SLinJiawei 3*5018a303SLinJiaweiimport chisel3._ 4*5018a303SLinJiaweiimport chisel3.util._ 5*5018a303SLinJiaweiimport xiangshan._ 6*5018a303SLinJiaweiimport utils._ 7*5018a303SLinJiawei 8*5018a303SLinJiaweiabstract class AbstractDivider(len: Int) extends FunctionUnit( 9*5018a303SLinJiawei FuConfig(FuType.div, 2, 0, writeIntRf = true, writeFpRf = false, hasRedirect = false, UncertainLatency()), 10*5018a303SLinJiawei len 11*5018a303SLinJiawei){ 12*5018a303SLinJiawei val ctrl = IO(Input(new MulDivCtrl)) 13*5018a303SLinJiawei val sign = ctrl.sign 14*5018a303SLinJiawei} 15*5018a303SLinJiawei 16*5018a303SLinJiaweiclass Radix2Divider(len: Int) extends AbstractDivider(len) { 17*5018a303SLinJiawei 18*5018a303SLinJiawei def abs(a: UInt, sign: Bool): (Bool, UInt) = { 19*5018a303SLinJiawei val s = a(len - 1) && sign 20*5018a303SLinJiawei (s, Mux(s, -a, a)) 21*5018a303SLinJiawei } 22*5018a303SLinJiawei 23*5018a303SLinJiawei val s_idle :: s_log2 :: s_shift :: s_compute :: s_finish :: Nil = Enum(5) 24*5018a303SLinJiawei val state = RegInit(s_idle) 25*5018a303SLinJiawei val newReq = (state === s_idle) && io.in.fire() 26*5018a303SLinJiawei 27*5018a303SLinJiawei val (a, b) = (io.in.bits.src(0), io.in.bits.src(1)) 28*5018a303SLinJiawei val divBy0 = b === 0.U(len.W) 29*5018a303SLinJiawei val divBy0Reg = RegEnable(divBy0, newReq) 30*5018a303SLinJiawei 31*5018a303SLinJiawei val shiftReg = Reg(UInt((1 + len * 2).W)) 32*5018a303SLinJiawei val hi = shiftReg(len * 2, len) 33*5018a303SLinJiawei val lo = shiftReg(len - 1, 0) 34*5018a303SLinJiawei 35*5018a303SLinJiawei val uop = io.in.bits.uop 36*5018a303SLinJiawei 37*5018a303SLinJiawei val (aSign, aVal) = abs(a, sign) 38*5018a303SLinJiawei val (bSign, bVal) = abs(b, sign) 39*5018a303SLinJiawei val aSignReg = RegEnable(aSign, newReq) 40*5018a303SLinJiawei val qSignReg = RegEnable((aSign ^ bSign) && !divBy0, newReq) 41*5018a303SLinJiawei val bReg = RegEnable(bVal, newReq) 42*5018a303SLinJiawei val aValx2Reg = RegEnable(Cat(aVal, "b0".U), newReq) 43*5018a303SLinJiawei val ctrlReg = RegEnable(ctrl, newReq) 44*5018a303SLinJiawei val uopReg = RegEnable(uop, newReq) 45*5018a303SLinJiawei 46*5018a303SLinJiawei val cnt = Counter(len) 47*5018a303SLinJiawei when (newReq) { 48*5018a303SLinJiawei state := s_log2 49*5018a303SLinJiawei } .elsewhen (state === s_log2) { 50*5018a303SLinJiawei // `canSkipShift` is calculated as following: 51*5018a303SLinJiawei // bEffectiveBit = Log2(bVal, XLEN) + 1.U 52*5018a303SLinJiawei // aLeadingZero = 64.U - aEffectiveBit = 64.U - (Log2(aVal, XLEN) + 1.U) 53*5018a303SLinJiawei // canSkipShift = aLeadingZero + bEffectiveBit 54*5018a303SLinJiawei // = 64.U - (Log2(aVal, XLEN) + 1.U) + Log2(bVal, XLEN) + 1.U 55*5018a303SLinJiawei // = 64.U + Log2(bVal, XLEN) - Log2(aVal, XLEN) 56*5018a303SLinJiawei // = (64.U | Log2(bVal, XLEN)) - Log2(aVal, XLEN) // since Log2(bVal, XLEN) < 64.U 57*5018a303SLinJiawei val canSkipShift = (64.U | Log2(bReg)) - Log2(aValx2Reg) 58*5018a303SLinJiawei // When divide by 0, the quotient should be all 1's. 59*5018a303SLinJiawei // Therefore we can not shift in 0s here. 60*5018a303SLinJiawei // We do not skip any shift to avoid this. 61*5018a303SLinJiawei cnt.value := Mux(divBy0Reg, 0.U, Mux(canSkipShift >= (len-1).U, (len-1).U, canSkipShift)) 62*5018a303SLinJiawei state := s_shift 63*5018a303SLinJiawei } .elsewhen (state === s_shift) { 64*5018a303SLinJiawei shiftReg := aValx2Reg << cnt.value 65*5018a303SLinJiawei state := s_compute 66*5018a303SLinJiawei } .elsewhen (state === s_compute) { 67*5018a303SLinJiawei val enough = hi.asUInt >= bReg.asUInt 68*5018a303SLinJiawei shiftReg := Cat(Mux(enough, hi - bReg, hi)(len - 1, 0), lo, enough) 69*5018a303SLinJiawei cnt.inc() 70*5018a303SLinJiawei when (cnt.value === (len-1).U) { state := s_finish } 71*5018a303SLinJiawei } .elsewhen (state === s_finish) { 72*5018a303SLinJiawei when(io.out.ready){ 73*5018a303SLinJiawei state := s_idle 74*5018a303SLinJiawei } 75*5018a303SLinJiawei } 76*5018a303SLinJiawei 77*5018a303SLinJiawei when(state=/=s_idle && uopReg.roqIdx.needFlush(io.redirectIn)){ 78*5018a303SLinJiawei state := s_idle 79*5018a303SLinJiawei } 80*5018a303SLinJiawei 81*5018a303SLinJiawei val r = hi(len, 1) 82*5018a303SLinJiawei val resQ = Mux(qSignReg, -lo, lo) 83*5018a303SLinJiawei val resR = Mux(aSignReg, -r, r) 84*5018a303SLinJiawei 85*5018a303SLinJiawei val xlen = io.out.bits.data.getWidth 86*5018a303SLinJiawei val res = Mux(ctrlReg.isHi, resR, resQ) 87*5018a303SLinJiawei io.out.bits.data := Mux(ctrlReg.isW, SignExt(res(31,0),xlen), res) 88*5018a303SLinJiawei io.out.bits.uop := uopReg 89*5018a303SLinJiawei 90*5018a303SLinJiawei io.out.valid := state === s_finish 91*5018a303SLinJiawei io.in.ready := state === s_idle 92*5018a303SLinJiawei}