1c6d43980SLemover/*************************************************************************************** 2c6d43980SLemover* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3f320e0f0SYinan Xu* Copyright (c) 2020-2021 Peng Cheng Laboratory 4c6d43980SLemover* 5c6d43980SLemover* XiangShan is licensed under Mulan PSL v2. 6c6d43980SLemover* You can use this software according to the terms and conditions of the Mulan PSL v2. 7c6d43980SLemover* You may obtain a copy of Mulan PSL v2 at: 8c6d43980SLemover* http://license.coscl.org.cn/MulanPSL2 9c6d43980SLemover* 10c6d43980SLemover* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11c6d43980SLemover* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12c6d43980SLemover* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13c6d43980SLemover* 14c6d43980SLemover* See the Mulan PSL v2 for more details. 15c6d43980SLemover***************************************************************************************/ 16c6d43980SLemover 17*a58e3351SLi Qianruo// The "SRT4DividerDataModule" in this file is a scala rewrite of SRT4 divider by Yifei He, see 18*a58e3351SLi Qianruo// https://github.com/OpenXiangShan/XS-Verilog-Library/tree/main/int_div_radix_4_v1 19*a58e3351SLi Qianruo// Email of original author: [email protected] 20*a58e3351SLi Qianruo 215018a303SLinJiaweipackage xiangshan.backend.fu 225018a303SLinJiawei 232225d46eSJiawei Linimport chipsalliance.rocketchip.config.Parameters 245018a303SLinJiaweiimport chisel3._ 255018a303SLinJiaweiimport chisel3.util._ 265018a303SLinJiaweiimport utils.SignExt 277f1506e3SLinJiaweiimport xiangshan.backend.fu.util.CSA3_2 285018a303SLinJiawei 295018a303SLinJiawei/** A Radix-4 SRT Integer Divider 305018a303SLinJiawei * 315018a303SLinJiawei * 2 ~ (5 + (len+3)/2) cycles are needed for each division. 325018a303SLinJiawei */ 33afefbad5SLinJiaweiclass SRT4DividerDataModule(len: Int) extends Module { 34afefbad5SLinJiawei val io = IO(new Bundle() { 352bd5334dSYinan Xu val src = Vec(2, Input(UInt(len.W))) 36afefbad5SLinJiawei val valid, sign, kill_w, kill_r, isHi, isW = Input(Bool()) 37afefbad5SLinJiawei val in_ready = Output(Bool()) 38afefbad5SLinJiawei val out_valid = Output(Bool()) 39afefbad5SLinJiawei val out_data = Output(UInt(len.W)) 40afefbad5SLinJiawei val out_ready = Input(Bool()) 41afefbad5SLinJiawei }) 42afefbad5SLinJiawei 43*a58e3351SLi Qianruo // consts 44*a58e3351SLi Qianruo val lzc_width = log2Up(len) 45*a58e3351SLi Qianruo val itn_len = 1 + len + 2 + 1 46*a58e3351SLi Qianruo require(lzc_width == 6) 47*a58e3351SLi Qianruo 48*a58e3351SLi Qianruo val (a, d, sign, valid, kill_w, kill_r, isHi, isW) = 492bd5334dSYinan Xu (io.src(0), io.src(1), io.sign, io.valid, io.kill_w, io.kill_r, io.isHi, io.isW) 50afefbad5SLinJiawei val in_fire = valid && io.in_ready 51afefbad5SLinJiawei val out_fire = io.out_ready && io.out_valid 52afefbad5SLinJiawei val newReq = in_fire 53*a58e3351SLi Qianruo val startHandShake = io.in_ready && valid 54*a58e3351SLi Qianruo val s_idle :: s_pre_0 :: s_pre_1 :: s_iter :: s_post_0 :: s_post_1 :: s_finish :: Nil = Enum(7) 555018a303SLinJiawei 56*a58e3351SLi Qianruo val state = RegInit(UIntToOH(s_idle, 7)) 57afefbad5SLinJiawei 58*a58e3351SLi Qianruo val quot_neg_2 :: quot_neg_1 :: quot_0 :: quot_pos_1 :: quot_pos_2 :: Nil = Enum(5) 595018a303SLinJiawei 60*a58e3351SLi Qianruo val finished = state(s_finish) 61*a58e3351SLi Qianruo 62*a58e3351SLi Qianruo // reused wire declarations 63*a58e3351SLi Qianruo val aIsZero = Wire(Bool()) 64*a58e3351SLi Qianruo val dIsZero = Wire(Bool()) 65*a58e3351SLi Qianruo val aTooSmall = Wire(Bool()) // this is output of reg! 66*a58e3351SLi Qianruo val noIter = Wire(Bool()) // this is output of reg! 67*a58e3351SLi Qianruo val finalIter = Wire(Bool()) 68*a58e3351SLi Qianruo val aLZC = Wire(UInt((lzc_width + 1).W)) 69*a58e3351SLi Qianruo val dLZC = Wire(UInt((lzc_width + 1).W)) 70*a58e3351SLi Qianruo val aNormAbs = Wire(UInt((len + 1).W)) 71*a58e3351SLi Qianruo val dNormAbs = Wire(UInt((len + 1).W)) 72*a58e3351SLi Qianruo val aInverter = Wire(UInt(len.W)) // results of global inverter 73*a58e3351SLi Qianruo val dInverter = Wire(UInt(len.W)) 74*a58e3351SLi Qianruo 75*a58e3351SLi Qianruo val rPreShifted = Wire(UInt((len + 1).W)) 76*a58e3351SLi Qianruo 77*a58e3351SLi Qianruo val quotIter = Wire(UInt(len.W)) 78*a58e3351SLi Qianruo val quotM1Iter = Wire(UInt(len.W)) 79*a58e3351SLi Qianruo val qIterEnd = Wire(UInt(5.W)) 80*a58e3351SLi Qianruo 81*a58e3351SLi Qianruo val rNext = Wire(UInt(itn_len.W)) 82*a58e3351SLi Qianruo val rNextPd = Wire(UInt(itn_len.W)) // non-redundant remainder plus d, 68, 67 83*a58e3351SLi Qianruo //reused ctrl regs 84*a58e3351SLi Qianruo 85*a58e3351SLi Qianruo //reused other regs 86*a58e3351SLi Qianruo val aNormAbsReg = RegEnable(aNormAbs, startHandShake | state(s_pre_0) | state(s_post_0)) // reg for normalized a & d and rem & rem+d 87*a58e3351SLi Qianruo val dNormAbsReg = RegEnable(dNormAbs, startHandShake | state(s_pre_0) | state(s_post_0)) 88*a58e3351SLi Qianruo val quotIterReg = RegEnable(quotIter, state(s_pre_1) | state(s_iter) | state(s_post_0)) 89*a58e3351SLi Qianruo val quotM1IterReg = RegEnable(quotM1Iter, state(s_pre_1) | state(s_iter) | state(s_post_0)) 90*a58e3351SLi Qianruo 91afefbad5SLinJiawei when(kill_r) { 92*a58e3351SLi Qianruo state := UIntToOH(s_idle, 7) 93*a58e3351SLi Qianruo } .elsewhen(state(s_idle) && in_fire && !kill_w) { 94*a58e3351SLi Qianruo state := UIntToOH(s_pre_0, 7) 95*a58e3351SLi Qianruo } .elsewhen(state(s_pre_0)) { // leading zero detection 96*a58e3351SLi Qianruo state := UIntToOH(s_pre_1, 7) 97*a58e3351SLi Qianruo } .elsewhen(state(s_pre_1)) { // shift a/b 98*a58e3351SLi Qianruo state := Mux(dIsZero | aTooSmall | noIter, UIntToOH(s_post_0, 7), UIntToOH(s_iter, 7)) 99*a58e3351SLi Qianruo } .elsewhen(state(s_iter)) { // (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d 100*a58e3351SLi Qianruo state := Mux(finalIter, UIntToOH(s_post_0, 7), UIntToOH(s_iter, 7)) 101*a58e3351SLi Qianruo } .elsewhen(state(s_post_0)) { // if rem < 0, rem = rem + d 102*a58e3351SLi Qianruo state := UIntToOH(s_post_1, 7) 103*a58e3351SLi Qianruo } .elsewhen(state(s_post_1)) { 104*a58e3351SLi Qianruo state := UIntToOH(s_finish, 7) 105*a58e3351SLi Qianruo } .elsewhen(state(s_finish) && out_fire) { 106*a58e3351SLi Qianruo state := UIntToOH(s_idle, 7) 107*a58e3351SLi Qianruo } .otherwise { 108*a58e3351SLi Qianruo state := state 1095018a303SLinJiawei } 1105018a303SLinJiawei 111*a58e3351SLi Qianruo // First cycle: 112*a58e3351SLi Qianruo // State is idle, we gain absolute value of a and b, using global inverter 113afefbad5SLinJiawei 114*a58e3351SLi Qianruo io.in_ready := state(s_idle) 1155018a303SLinJiawei 116*a58e3351SLi Qianruo aInverter := -Mux(state(s_idle), a, quotIterReg) // 64, 0 117*a58e3351SLi Qianruo dInverter := -Mux(state(s_idle), d, quotM1IterReg) // 64, 0 1185018a303SLinJiawei 119*a58e3351SLi Qianruo val aSign = io.sign && a(len - 1) // 1 120*a58e3351SLi Qianruo val dSign = io.sign && d(len - 1) 1215018a303SLinJiawei 122*a58e3351SLi Qianruo val aAbs = Mux(aSign, aInverter, a) // 64, 0 123*a58e3351SLi Qianruo val dAbs = Mux(dSign, dInverter, d) 124*a58e3351SLi Qianruo val aNorm = (aNormAbsReg(len - 1, 0) << aLZC(lzc_width - 1, 0))(len - 1, 0) // 64, 65 125*a58e3351SLi Qianruo val dNorm = (dNormAbsReg(len - 1, 0) << dLZC(lzc_width - 1, 0))(len - 1, 0) 1265018a303SLinJiawei 127*a58e3351SLi Qianruo aNormAbs := Mux1H(Seq( 128*a58e3351SLi Qianruo state(s_idle) -> Cat(0.U(1.W), aAbs), // 65, 0 129*a58e3351SLi Qianruo state(s_pre_0) -> Cat(0.U(1.W), aNorm), // 65, 0 130*a58e3351SLi Qianruo state(s_post_0) -> rNext(len + 3, 3) // remainder 65, 64. highest is sign bit 131*a58e3351SLi Qianruo )) 132*a58e3351SLi Qianruo dNormAbs := Mux1H(Seq( 133*a58e3351SLi Qianruo state(s_idle) -> Cat(0.U(1.W), dAbs), 134*a58e3351SLi Qianruo state(s_pre_0) -> Cat(0.U(1.W), dNorm), 135*a58e3351SLi Qianruo state(s_post_0) -> rNextPd(len + 3, 3) 1365018a303SLinJiawei )) 1375018a303SLinJiawei 138*a58e3351SLi Qianruo // Second cycle, state is pre_0 139*a58e3351SLi Qianruo // calculate lzc and move div* and lzc diff check if no_iter_needed 1405018a303SLinJiawei 141*a58e3351SLi Qianruo aLZC := PriorityEncoder(aNormAbsReg(len - 1, 0).asBools().reverse) 142*a58e3351SLi Qianruo dLZC := PriorityEncoder(dNormAbsReg(len - 1, 0).asBools().reverse) 143*a58e3351SLi Qianruo val aLZCReg = RegEnable(aLZC, state(s_pre_0)) // 7, 0 144*a58e3351SLi Qianruo val dLZCReg = RegEnable(dLZC, state(s_pre_0)) 145*a58e3351SLi Qianruo 146*a58e3351SLi Qianruo 147*a58e3351SLi Qianruo 148*a58e3351SLi Qianruo val lzcWireDiff = Cat(0.U(1.W), dLZC(lzc_width - 1, 0)) - Cat(0.U(1.W), aLZC(lzc_width - 1, 0)) // 7, 0 149*a58e3351SLi Qianruo val lzcRegDiff = Cat(0.U(1.W), dLZCReg(lzc_width - 1, 0)) - Cat(0.U(1.W), aLZCReg(lzc_width - 1, 0)) 150*a58e3351SLi Qianruo val lzcDiff = Mux(state(s_pre_0), lzcWireDiff, lzcRegDiff) 151*a58e3351SLi Qianruo aIsZero := aLZC(lzc_width) // this is state pre_0 152*a58e3351SLi Qianruo dIsZero := dLZCReg(lzc_width) // this is pre_1 and all stages after 153*a58e3351SLi Qianruo val dIsOne = dLZC(lzc_width - 1, 0).andR() // this is pre_0 154*a58e3351SLi Qianruo val noIterReg = RegEnable(dIsOne & aNormAbsReg(len - 1), state(s_pre_0)) // This means dividend has lzc 0 so iter is 17 155*a58e3351SLi Qianruo noIter := noIterReg 156*a58e3351SLi Qianruo val aTooSmallReg = RegEnable(aIsZero | lzcDiff(lzc_width), state(s_pre_0)) // a is zero or a smaller than d 157*a58e3351SLi Qianruo aTooSmall := aTooSmallReg 158*a58e3351SLi Qianruo 159*a58e3351SLi Qianruo val quotSign = Mux(state(s_idle), aSign ^ dSign, true.B) // if not s_idle then must be s_pre_1 & dIsZero, and that we have 160*a58e3351SLi Qianruo val rSign = aSign 161*a58e3351SLi Qianruo val quotSignReg = RegEnable(quotSign, startHandShake | (state(s_pre_1) & dIsZero)) 162*a58e3351SLi Qianruo val rSignReg = RegEnable(rSign, startHandShake) 163*a58e3351SLi Qianruo 164*a58e3351SLi Qianruo val rShift = lzcDiff(0) // odd lzc diff, for SRT4 165*a58e3351SLi Qianruo val rightShifted = Wire(UInt(len.W)) 166*a58e3351SLi Qianruo val rSumInit = Mux(aTooSmallReg | aIsZero, Cat(0.U(1.W), rightShifted, 0.U(3.W)), // right shift the dividend (which is already l-shifted) 167*a58e3351SLi Qianruo Mux(noIterReg, 0.U(itn_len.W), // 168*a58e3351SLi Qianruo Cat(0.U(3.W), 169*a58e3351SLi Qianruo Mux(rShift, Cat(0.U(1.W), aNormAbsReg(len - 1, 0)), Cat(aNormAbsReg(len - 1, 0), 0.U(1.W))) 170*a58e3351SLi Qianruo ) // Normal init value. 68, 67; For even lzcDiff, 0.001xxx0; for odd lzcDiff 0.0001xxx 1715018a303SLinJiawei ) 172*a58e3351SLi Qianruo ) // state is s_pre_1 173*a58e3351SLi Qianruo val rCarryInit = 0.U(itn_len.W) 1745018a303SLinJiawei 175*a58e3351SLi Qianruo val rightShifter = Module(new RightShifter(len, lzc_width)) 176*a58e3351SLi Qianruo rightShifter.io.in := Mux(state(s_pre_1), aNormAbsReg(len - 1, 0), rPreShifted(len - 1, 0)) 177*a58e3351SLi Qianruo rightShifter.io.shiftNum := Mux(state(s_pre_1), aLZCReg, 178*a58e3351SLi Qianruo Mux(aTooSmallReg | dIsZero, 0.U(lzc_width.W), dLZCReg)) 179*a58e3351SLi Qianruo rightShifter.io.msb := state(s_post_1) & rSignReg & rPreShifted(len) 180*a58e3351SLi Qianruo rightShifted := rightShifter.io.out 181*a58e3351SLi Qianruo 182*a58e3351SLi Qianruo // obtaining 1st quotient 183*a58e3351SLi Qianruo val rSumInitTrunc = Cat(0.U(1.W), rSumInit(itn_len - 4, itn_len - 4 - 4 + 1)) // 0.00___ 184*a58e3351SLi Qianruo val mInitPos1 = MuxLookup(dNormAbsReg(len - 2, len - 2 - 3 + 1), "b00100".U(5.W), 185*a58e3351SLi Qianruo Array( 186*a58e3351SLi Qianruo 0.U -> "b00100".U(5.W), 187*a58e3351SLi Qianruo 1.U -> "b00100".U(5.W), 188*a58e3351SLi Qianruo 2.U -> "b00100".U(5.W), 189*a58e3351SLi Qianruo 3.U -> "b00110".U(5.W), 190*a58e3351SLi Qianruo 4.U -> "b00110".U(5.W), 191*a58e3351SLi Qianruo 5.U -> "b00110".U(5.W), 192*a58e3351SLi Qianruo 6.U -> "b00110".U(5.W), 193*a58e3351SLi Qianruo 7.U -> "b01000".U(5.W), 1940fb3674eSJiawei Lin ) 1955018a303SLinJiawei ) 196*a58e3351SLi Qianruo val mInitPos2 = MuxLookup(dNormAbsReg(len - 2, len - 2 - 3 + 1), "b01100".U(5.W), 197*a58e3351SLi Qianruo Array( 198*a58e3351SLi Qianruo 0.U -> "b01100".U(5.W), 199*a58e3351SLi Qianruo 1.U -> "b01110".U(5.W), 200*a58e3351SLi Qianruo 2.U -> "b01111".U(5.W), 201*a58e3351SLi Qianruo 3.U -> "b10000".U(5.W), 202*a58e3351SLi Qianruo 4.U -> "b10010".U(5.W), 203*a58e3351SLi Qianruo 5.U -> "b10100".U(5.W), 204*a58e3351SLi Qianruo 6.U -> "b10110".U(5.W), 205*a58e3351SLi Qianruo 7.U -> "b10110".U(5.W), 206*a58e3351SLi Qianruo ) 207*a58e3351SLi Qianruo ) 208*a58e3351SLi Qianruo val initCmpPos1 = rSumInitTrunc >= mInitPos1 209*a58e3351SLi Qianruo val initCmpPos2 = rSumInitTrunc >= mInitPos2 210*a58e3351SLi Qianruo val qInit = Mux(initCmpPos2, UIntToOH(quot_pos_2, 5), Mux(initCmpPos1, UIntToOH(quot_pos_1, 5), UIntToOH(quot_0, 5))) 211*a58e3351SLi Qianruo val qPrev = Mux(state(s_pre_1), qInit, qIterEnd) 212*a58e3351SLi Qianruo val qPrevReg = RegEnable(qPrev, state(s_pre_1) | state(s_iter)) 213*a58e3351SLi Qianruo val specialDivisorReg = RegEnable(dNormAbsReg(len - 2, len - 2 - 3 + 1) === 0.U, state(s_pre_1)) // d=0.1000xxx 2145018a303SLinJiawei 215*a58e3351SLi Qianruo // rCarry and rSum in Iteration 216*a58e3351SLi Qianruo val qXd = Mux1H(Seq( 217*a58e3351SLi Qianruo qPrevReg(quot_neg_2) -> Cat(dNormAbsReg(len - 1, 0), 0.U(4.W)), // 68, 67 1.xxxxx0000 218*a58e3351SLi Qianruo qPrevReg(quot_neg_1) -> Cat(0.U(1.W), dNormAbsReg(len - 1, 0), 0.U(3.W)), // 0.1xxxxx000 219*a58e3351SLi Qianruo qPrevReg(quot_0) -> 0.U(itn_len.W), 220*a58e3351SLi Qianruo qPrevReg(quot_pos_1) -> ~Cat(0.U(1.W), dNormAbsReg(len - 1, 0), 0.U(3.W)), // don't forget to plus 1 later 221*a58e3351SLi Qianruo qPrevReg(quot_pos_2) -> ~Cat(dNormAbsReg(len - 1, 0), 0.U(4.W)) // don't forget to plus 1 later 2225018a303SLinJiawei )) 223*a58e3351SLi Qianruo val csa = Module(new CSA3_2(itn_len)) 2245018a303SLinJiawei 225*a58e3351SLi Qianruo val rSumIter = csa.io.out(0) 226*a58e3351SLi Qianruo val rCarryIter = Cat(csa.io.out(1)(itn_len - 2, 0), qPrevReg(quot_pos_1) | qPrevReg(quot_pos_2)) 227*a58e3351SLi Qianruo val rSumReg = RegEnable(Mux(state(s_pre_1), rSumInit, rSumIter), state(s_pre_1) | state(s_iter)) // 68, 67 228*a58e3351SLi Qianruo val rCarryReg = RegEnable(Mux(state(s_pre_1), rCarryInit, rCarryIter), state(s_pre_1) | state(s_iter)) 229*a58e3351SLi Qianruo csa.io.in(0) := rSumReg << 2 230*a58e3351SLi Qianruo csa.io.in(1) := rCarryReg << 2 231*a58e3351SLi Qianruo csa.io.in(2) := qXd 232*a58e3351SLi Qianruo 233*a58e3351SLi Qianruo val qds = Module(new SRT4QDS(len, itn_len)) 234*a58e3351SLi Qianruo qds.io.remSum := rSumReg 235*a58e3351SLi Qianruo qds.io.remCarry := rCarryReg 236*a58e3351SLi Qianruo qds.io.d := dNormAbsReg(len - 1, 0) // Maybe optimize here to lower power consumption? 237*a58e3351SLi Qianruo qds.io.specialDivisor := specialDivisorReg 238*a58e3351SLi Qianruo qds.io.qPrev := qPrevReg 239*a58e3351SLi Qianruo qIterEnd := qds.io.qIterEnd 240*a58e3351SLi Qianruo 241*a58e3351SLi Qianruo //on the fly conversion 242*a58e3351SLi Qianruo val quotIterNext = Wire(UInt(len.W)) 243*a58e3351SLi Qianruo val quotIterM1Next = Wire(UInt(len.W)) 244*a58e3351SLi Qianruo quotIterNext := Mux1H(Seq( 245*a58e3351SLi Qianruo qPrevReg(quot_pos_2) -> (quotIterReg << 2 | "b10".U), 246*a58e3351SLi Qianruo qPrevReg(quot_pos_1) -> (quotIterReg << 2 | "b01".U), 247*a58e3351SLi Qianruo qPrevReg(quot_0) -> (quotIterReg << 2 | "b00".U), 248*a58e3351SLi Qianruo qPrevReg(quot_neg_1) -> (quotM1IterReg << 2 | "b11".U), 249*a58e3351SLi Qianruo qPrevReg(quot_neg_2) -> (quotM1IterReg << 2 | "b10".U) 250*a58e3351SLi Qianruo )) 251*a58e3351SLi Qianruo quotIterM1Next := Mux1H(Seq( 252*a58e3351SLi Qianruo qPrevReg(quot_pos_2) -> (quotIterReg << 2 | "b01".U), 253*a58e3351SLi Qianruo qPrevReg(quot_pos_1) -> (quotIterReg << 2 | "b00".U), 254*a58e3351SLi Qianruo qPrevReg(quot_0) -> (quotM1IterReg << 2 | "b11".U), 255*a58e3351SLi Qianruo qPrevReg(quot_neg_1) -> (quotM1IterReg << 2 | "b10".U), 256*a58e3351SLi Qianruo qPrevReg(quot_neg_2) -> (quotM1IterReg << 2 | "b01".U) 257*a58e3351SLi Qianruo )) 258*a58e3351SLi Qianruo 259*a58e3351SLi Qianruo 260*a58e3351SLi Qianruo quotIter := Mux(state(s_pre_1), 261*a58e3351SLi Qianruo Mux(dIsZero, VecInit(Seq.fill(len)(true.B)).asUInt, 262*a58e3351SLi Qianruo Mux(noIterReg, aNormAbsReg(len - 1, 0), 0.U(len.W))), 263*a58e3351SLi Qianruo Mux(state(s_iter), quotIterNext, 264*a58e3351SLi Qianruo Mux(quotSignReg, aInverter, quotIterReg))) 265*a58e3351SLi Qianruo quotM1Iter := Mux(state(s_pre_1), 266*a58e3351SLi Qianruo 0.U(len.W), Mux(state(s_iter), quotIterM1Next, 267*a58e3351SLi Qianruo Mux(quotSignReg, dInverter, quotM1IterReg))) 268*a58e3351SLi Qianruo 269*a58e3351SLi Qianruo 270*a58e3351SLi Qianruo // iter num 271*a58e3351SLi Qianruo val iterNum = Wire(UInt((lzc_width - 1).W)) 272*a58e3351SLi Qianruo val iterNumReg = RegEnable(iterNum, state(s_pre_1) | state(s_iter)) 273*a58e3351SLi Qianruo 274*a58e3351SLi Qianruo iterNum := Mux(state(s_pre_1), lzcDiff(lzc_width - 1, 1) +% lzcDiff(0), iterNumReg -% 1.U) 275*a58e3351SLi Qianruo finalIter := iterNumReg === 0.U 276*a58e3351SLi Qianruo 277*a58e3351SLi Qianruo // Post Process 278*a58e3351SLi Qianruo 279*a58e3351SLi Qianruo when(rSignReg) { 280*a58e3351SLi Qianruo rNext := ~rSumReg + ~rCarryReg + 2.U 281*a58e3351SLi Qianruo rNextPd := ~rSumReg + ~rCarryReg + ~Cat(0.U(1.W), dNormAbsReg(len - 1, 0), 0.U(3.W)) + 3.U 282*a58e3351SLi Qianruo } .otherwise { 283*a58e3351SLi Qianruo rNext := rSumReg + rCarryReg 284*a58e3351SLi Qianruo rNextPd := rSumReg + rCarryReg + Cat(0.U(1.W), dNormAbsReg(len - 1, 0), 0.U(3.W)) 2855018a303SLinJiawei } 2865018a303SLinJiawei 287*a58e3351SLi Qianruo val r = aNormAbsReg 288*a58e3351SLi Qianruo val rPd = dNormAbsReg 289*a58e3351SLi Qianruo val rIsZero = ~(r.orR()) 290*a58e3351SLi Qianruo val needCorr = (~dIsZero & ~noIterReg) & Mux(rSignReg, ~r(len) & ~rIsZero, r(len)) // when we get pos rem for d<0 or neg rem for d>0 291*a58e3351SLi Qianruo rPreShifted := Mux(needCorr, rPd, r) 292*a58e3351SLi Qianruo val rFinal = RegEnable(rightShifted, state(s_post_1))// right shifted remainder. shift by the number of bits divisor is shifted 293*a58e3351SLi Qianruo val qFinal = Mux(needCorr, quotM1IterReg, quotIterReg) 2945018a303SLinJiawei 295*a58e3351SLi Qianruo val res = Mux(isHi, rFinal, qFinal) 296afefbad5SLinJiawei io.out_data := Mux(isW, 2975018a303SLinJiawei SignExt(res(31, 0), len), 2985018a303SLinJiawei res 2995018a303SLinJiawei ) 300*a58e3351SLi Qianruo io.in_ready := state(s_idle) 301*a58e3351SLi Qianruo io.out_valid := state(s_finish) // state === s_finish 302afefbad5SLinJiawei} 3035018a303SLinJiawei 304*a58e3351SLi Qianruoclass RightShifter(len: Int, lzc_width: Int) extends Module { 305*a58e3351SLi Qianruo val io = IO(new Bundle() { 306*a58e3351SLi Qianruo val shiftNum = Input(UInt(lzc_width.W)) 307*a58e3351SLi Qianruo val in = Input(UInt(len.W)) 308*a58e3351SLi Qianruo val msb = Input(Bool()) 309*a58e3351SLi Qianruo val out = Output(UInt(len.W)) 310*a58e3351SLi Qianruo }) 311*a58e3351SLi Qianruo require(len == 64 || len == 32) 312*a58e3351SLi Qianruo val shift = io.shiftNum 313*a58e3351SLi Qianruo val msb = io.msb 314*a58e3351SLi Qianruo val s0 = Mux(shift(0), Cat(VecInit(Seq.fill(1)(msb)).asUInt, io.in(len - 1, 1)), io.in) 315*a58e3351SLi Qianruo val s1 = Mux(shift(1), Cat(VecInit(Seq.fill(2)(msb)).asUInt, s0(len - 1, 2)), s0) 316*a58e3351SLi Qianruo val s2 = Mux(shift(2), Cat(VecInit(Seq.fill(4)(msb)).asUInt, s1(len - 1, 4)), s1) 317*a58e3351SLi Qianruo val s3 = Mux(shift(3), Cat(VecInit(Seq.fill(8)(msb)).asUInt, s2(len - 1, 8)), s2) 318*a58e3351SLi Qianruo val s4 = Mux(shift(4), Cat(VecInit(Seq.fill(16)(msb)).asUInt, s3(len - 1, 16)), s3) 319*a58e3351SLi Qianruo val s5 = Wire(UInt(len.W)) 320*a58e3351SLi Qianruo if (len == 64) { 321*a58e3351SLi Qianruo s5 := Mux(shift(5), Cat(VecInit(Seq.fill(32)(msb)).asUInt, s4(len - 1, 32)), s4) 322*a58e3351SLi Qianruo } else if (len == 32) { 323*a58e3351SLi Qianruo s5 := s4 324*a58e3351SLi Qianruo } 325*a58e3351SLi Qianruo io.out := s5 326*a58e3351SLi Qianruo} 327*a58e3351SLi Qianruo 328*a58e3351SLi Qianruoobject mLookUpTable { 329*a58e3351SLi Qianruo // Usage : 330*a58e3351SLi Qianruo // result := decoder(QMCMinimizer, index, mLookupTable.xxx) 331*a58e3351SLi Qianruo val minus_m = Seq( 332*a58e3351SLi Qianruo Array( // -m[-1] 333*a58e3351SLi Qianruo 0.U -> "b00_11010".U, 334*a58e3351SLi Qianruo 1.U -> "b00_11110".U, 335*a58e3351SLi Qianruo 2.U -> "b01_00000".U, 336*a58e3351SLi Qianruo 3.U -> "b01_00100".U, 337*a58e3351SLi Qianruo 4.U -> "b01_00110".U, 338*a58e3351SLi Qianruo 5.U -> "b01_01010".U, 339*a58e3351SLi Qianruo 6.U -> "b01_01100".U, 340*a58e3351SLi Qianruo 7.U -> "b01_10000".U 341*a58e3351SLi Qianruo ), 342*a58e3351SLi Qianruo Array( // -m[0] 343*a58e3351SLi Qianruo 0.U -> "b000_0101".U, 344*a58e3351SLi Qianruo 1.U -> "b000_0110".U, 345*a58e3351SLi Qianruo 2.U -> "b000_0110".U, 346*a58e3351SLi Qianruo 3.U -> "b000_0110".U, 347*a58e3351SLi Qianruo 4.U -> "b000_1001".U, 348*a58e3351SLi Qianruo 5.U -> "b000_1000".U, 349*a58e3351SLi Qianruo 6.U -> "b000_1000".U, 350*a58e3351SLi Qianruo 7.U -> "b000_1000".U 351*a58e3351SLi Qianruo ), 352*a58e3351SLi Qianruo Array( //-m[1] 353*a58e3351SLi Qianruo 0.U -> "b111_1101".U, 354*a58e3351SLi Qianruo 1.U -> "b111_1100".U, 355*a58e3351SLi Qianruo 2.U -> "b111_1100".U, 356*a58e3351SLi Qianruo 3.U -> "b111_1100".U, 357*a58e3351SLi Qianruo 4.U -> "b111_1011".U, 358*a58e3351SLi Qianruo 5.U -> "b111_1010".U, 359*a58e3351SLi Qianruo 6.U -> "b111_1010".U, 360*a58e3351SLi Qianruo 7.U -> "b111_1010".U 361*a58e3351SLi Qianruo ), 362*a58e3351SLi Qianruo Array( //-m[2] 363*a58e3351SLi Qianruo 0.U -> "b11_01000".U, 364*a58e3351SLi Qianruo 1.U -> "b11_00100".U, 365*a58e3351SLi Qianruo 2.U -> "b11_00010".U, 366*a58e3351SLi Qianruo 3.U -> "b10_11110".U, 367*a58e3351SLi Qianruo 4.U -> "b10_11100".U, 368*a58e3351SLi Qianruo 5.U -> "b10_11000".U, 369*a58e3351SLi Qianruo 6.U -> "b10_10110".U, 370*a58e3351SLi Qianruo 7.U -> "b10_10010".U 371*a58e3351SLi Qianruo )) 372*a58e3351SLi Qianruo} 373*a58e3351SLi Qianruo 374*a58e3351SLi Qianruoclass SRT4QDS(len: Int, itn_len: Int) extends Module { 375*a58e3351SLi Qianruo // srt4 quotientr digit selection 376*a58e3351SLi Qianruo val io = IO(new Bundle() { 377*a58e3351SLi Qianruo val remSum = Input(UInt(itn_len.W)) // 68, 67 378*a58e3351SLi Qianruo val remCarry = Input(UInt(itn_len.W)) 379*a58e3351SLi Qianruo val d = Input(UInt(len.W)) // 64, 64 380*a58e3351SLi Qianruo val specialDivisor = Input(Bool()) 381*a58e3351SLi Qianruo val qPrev = Input(UInt(5.W)) 382*a58e3351SLi Qianruo val qIterEnd = Output(UInt(5.W)) 383*a58e3351SLi Qianruo }) 384*a58e3351SLi Qianruo val remSumX16 = io.remSum << 4 // 72, 67 Top 2 bits unused 385*a58e3351SLi Qianruo val remCarryX16 = io.remCarry << 4 386*a58e3351SLi Qianruo def trunc25(rem: UInt): UInt = {rem(itn_len, itn_len - 7 + 1)} 387*a58e3351SLi Qianruo def trunc34(rem: UInt): UInt = {rem(itn_len + 1, itn_len + 1 - 7 + 1)} 388*a58e3351SLi Qianruo 389*a58e3351SLi Qianruo val quot_neg_2 :: quot_neg_1 :: quot_0 :: quot_pos_1 :: quot_pos_2 :: Nil = Enum(5) 390*a58e3351SLi Qianruo 391*a58e3351SLi Qianruo val d = Cat(0.U(1.W), io.d, 0.U(3.W)) // 68, 67 392*a58e3351SLi Qianruo val (dX4, dX8, dXNeg4, dXNeg8) = (d << 2, d(itn_len - 2, 0) << 3, ~(d << 2), ~(d(itn_len - 2, 0) << 3)) // 70, 67 393*a58e3351SLi Qianruo val dForLookup = io.d(len - 2, len - 2 - 3 + 1) 394*a58e3351SLi Qianruo 395*a58e3351SLi Qianruo val dXq = Mux1H(Seq( 396*a58e3351SLi Qianruo io.qPrev(quot_neg_2) -> dX8, 397*a58e3351SLi Qianruo io.qPrev(quot_neg_1) -> dX4, 398*a58e3351SLi Qianruo io.qPrev(quot_0) -> 0.U((itn_len + 2).W), 399*a58e3351SLi Qianruo io.qPrev(quot_pos_1) -> dXNeg4, 400*a58e3351SLi Qianruo io.qPrev(quot_pos_2) -> dXNeg8 401*a58e3351SLi Qianruo )) 402*a58e3351SLi Qianruo val signs = VecInit(Seq.tabulate(4){ // -1 0 1 2 403*a58e3351SLi Qianruo i => { 404*a58e3351SLi Qianruo val csa1 = Module(new CSA3_2(7)) 405*a58e3351SLi Qianruo val csa2 = Module(new CSA3_2(7)) 406*a58e3351SLi Qianruo if (i == 1 || i == 2) { 407*a58e3351SLi Qianruo csa1.io.in(0) := trunc34(remSumX16) 408*a58e3351SLi Qianruo csa1.io.in(1) := trunc34(remCarryX16) 409*a58e3351SLi Qianruo csa2.io.in(2) := trunc34(dXq) 410*a58e3351SLi Qianruo } else { 411*a58e3351SLi Qianruo csa1.io.in(0) := trunc25(remSumX16) 412*a58e3351SLi Qianruo csa1.io.in(1) := trunc25(remCarryX16) 413*a58e3351SLi Qianruo csa2.io.in(2) := trunc25(dXq) 414*a58e3351SLi Qianruo } 415*a58e3351SLi Qianruo csa1.io.in(2) := MuxLookup(dForLookup, "b0000000".U, mLookUpTable.minus_m(i)) 416*a58e3351SLi Qianruo csa2.io.in(0) := csa1.io.out(0) 417*a58e3351SLi Qianruo csa2.io.in(1) := csa1.io.out(1)(5, 0) << 1 418*a58e3351SLi Qianruo (csa2.io.out(0) + (csa2.io.out(1)(5, 0) << 1))(6) 419*a58e3351SLi Qianruo } 420*a58e3351SLi Qianruo }) 421*a58e3351SLi Qianruo val qVec = Wire(Vec(5, Bool())) 422*a58e3351SLi Qianruo qVec(quot_neg_2) := signs(0) && signs(1) && signs(2) 423*a58e3351SLi Qianruo qVec(quot_neg_1) := ~signs(0) && signs(1) && signs(2) 424*a58e3351SLi Qianruo qVec(quot_0) := signs(2) && ~signs(1) 425*a58e3351SLi Qianruo qVec(quot_pos_1) := signs(3) && ~signs(2) && ~signs(1) 426*a58e3351SLi Qianruo qVec(quot_pos_2) := ~signs(3) && ~signs(2) && ~signs(1) 427*a58e3351SLi Qianruo io.qIterEnd := qVec.asUInt 428*a58e3351SLi Qianruo // assert(PopCount(qVec) === 1.U) 429*a58e3351SLi Qianruo} 430*a58e3351SLi Qianruo 431*a58e3351SLi Qianruo 4322225d46eSJiawei Linclass SRT4Divider(len: Int)(implicit p: Parameters) extends AbstractDivider(len) { 433afefbad5SLinJiawei 434afefbad5SLinJiawei val newReq = io.in.fire() 435afefbad5SLinJiawei 436afefbad5SLinJiawei val uop = io.in.bits.uop 437afefbad5SLinJiawei val uopReg = RegEnable(uop, newReq) 438afefbad5SLinJiawei val ctrlReg = RegEnable(ctrl, newReq) 439afefbad5SLinJiawei 440afefbad5SLinJiawei val divDataModule = Module(new SRT4DividerDataModule(len)) 441afefbad5SLinJiawei 442afefbad5SLinJiawei val kill_w = uop.roqIdx.needFlush(io.redirectIn, io.flushIn) 443afefbad5SLinJiawei val kill_r = !divDataModule.io.in_ready && uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn) 444afefbad5SLinJiawei 4452bd5334dSYinan Xu divDataModule.io.src(0) := io.in.bits.src(0) 4462bd5334dSYinan Xu divDataModule.io.src(1) := io.in.bits.src(1) 447afefbad5SLinJiawei divDataModule.io.valid := io.in.valid 448afefbad5SLinJiawei divDataModule.io.sign := sign 449afefbad5SLinJiawei divDataModule.io.kill_w := kill_w 450afefbad5SLinJiawei divDataModule.io.kill_r := kill_r 451afefbad5SLinJiawei divDataModule.io.isHi := ctrlReg.isHi 452afefbad5SLinJiawei divDataModule.io.isW := ctrlReg.isW 453afefbad5SLinJiawei divDataModule.io.out_ready := io.out.ready 454afefbad5SLinJiawei 455afefbad5SLinJiawei io.in.ready := divDataModule.io.in_ready 456afefbad5SLinJiawei io.out.valid := divDataModule.io.out_valid 457afefbad5SLinJiawei io.out.bits.data := divDataModule.io.out_data 458afefbad5SLinJiawei io.out.bits.uop := uopReg 4595018a303SLinJiawei} 460