1c6d43980SLemover/*************************************************************************************** 2c6d43980SLemover* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3*f320e0f0SYinan Xu* Copyright (c) 2020-2021 Peng Cheng Laboratory 4c6d43980SLemover* 5c6d43980SLemover* XiangShan is licensed under Mulan PSL v2. 6c6d43980SLemover* You can use this software according to the terms and conditions of the Mulan PSL v2. 7c6d43980SLemover* You may obtain a copy of Mulan PSL v2 at: 8c6d43980SLemover* http://license.coscl.org.cn/MulanPSL2 9c6d43980SLemover* 10c6d43980SLemover* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11c6d43980SLemover* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12c6d43980SLemover* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13c6d43980SLemover* 14c6d43980SLemover* See the Mulan PSL v2 for more details. 15c6d43980SLemover***************************************************************************************/ 16c6d43980SLemover 175018a303SLinJiaweipackage xiangshan.backend.fu 185018a303SLinJiawei 192225d46eSJiawei Linimport chipsalliance.rocketchip.config.Parameters 205018a303SLinJiaweiimport chisel3._ 215018a303SLinJiaweiimport chisel3.util._ 225018a303SLinJiaweiimport utils.SignExt 237f1506e3SLinJiaweiimport xiangshan.backend.fu.util.CSA3_2 245018a303SLinJiawei 255018a303SLinJiawei/** A Radix-4 SRT Integer Divider 265018a303SLinJiawei * 275018a303SLinJiawei * 2 ~ (5 + (len+3)/2) cycles are needed for each division. 285018a303SLinJiawei */ 29afefbad5SLinJiaweiclass SRT4DividerDataModule(len: Int) extends Module { 30afefbad5SLinJiawei val io = IO(new Bundle() { 312bd5334dSYinan Xu val src = Vec(2, Input(UInt(len.W))) 32afefbad5SLinJiawei val valid, sign, kill_w, kill_r, isHi, isW = Input(Bool()) 33afefbad5SLinJiawei val in_ready = Output(Bool()) 34afefbad5SLinJiawei val out_valid = Output(Bool()) 35afefbad5SLinJiawei val out_data = Output(UInt(len.W)) 36afefbad5SLinJiawei val out_ready = Input(Bool()) 37afefbad5SLinJiawei }) 38afefbad5SLinJiawei 39afefbad5SLinJiawei val (a, b, sign, valid, kill_w, kill_r, isHi, isW) = 402bd5334dSYinan Xu (io.src(0), io.src(1), io.sign, io.valid, io.kill_w, io.kill_r, io.isHi, io.isW) 41afefbad5SLinJiawei val in_fire = valid && io.in_ready 42afefbad5SLinJiawei val out_fire = io.out_ready && io.out_valid 435018a303SLinJiawei 44c12bd822Sljw // s_pad_* is not used 45c12bd822Sljw val s_idle :: s_lzd :: s_normlize :: s_recurrence :: s_recovery_1 :: s_recovery_2 :: s_pad_1 :: s_pad_2 :: s_finish :: Nil = Enum(9) 46c12bd822Sljw require(s_finish.litValue() == 8) 47c12bd822Sljw 485018a303SLinJiawei val state = RegInit(s_idle) 49c12bd822Sljw val finished = state(3).asBool // state === s_finish 50c12bd822Sljw 515018a303SLinJiawei val cnt_next = Wire(UInt(log2Up((len + 3) / 2).W)) 525018a303SLinJiawei val cnt = RegEnable(cnt_next, state === s_normlize || state === s_recurrence) 535018a303SLinJiawei val rec_enough = cnt_next === 0.U 54afefbad5SLinJiawei val newReq = in_fire 555018a303SLinJiawei 565018a303SLinJiawei def abs(a: UInt, sign: Bool): (Bool, UInt) = { 575018a303SLinJiawei val s = a(len - 1) && sign 585018a303SLinJiawei (s, Mux(s, -a, a)) 595018a303SLinJiawei } 60afefbad5SLinJiawei 615018a303SLinJiawei val (aSign, aVal) = abs(a, sign) 625018a303SLinJiawei val (bSign, bVal) = abs(b, sign) 635018a303SLinJiawei val aSignReg = RegEnable(aSign, newReq) 645018a303SLinJiawei val qSignReg = RegEnable(aSign ^ bSign, newReq) 655018a303SLinJiawei val divZero = b === 0.U 665018a303SLinJiawei val divZeroReg = RegEnable(divZero, newReq) 675018a303SLinJiawei 685018a303SLinJiawei switch(state) { 695018a303SLinJiawei is(s_idle) { 70afefbad5SLinJiawei when(in_fire && !kill_w) { 714680597eSYinan Xu state := Mux(divZero, s_finish, s_lzd) 724680597eSYinan Xu } 735018a303SLinJiawei } 745018a303SLinJiawei is(s_lzd) { // leading zero detection 755018a303SLinJiawei state := s_normlize 765018a303SLinJiawei } 775018a303SLinJiawei is(s_normlize) { // shift a/b 785018a303SLinJiawei state := s_recurrence 795018a303SLinJiawei } 805018a303SLinJiawei is(s_recurrence) { // (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d 81afefbad5SLinJiawei when(rec_enough) { 82afefbad5SLinJiawei state := s_recovery_1 83afefbad5SLinJiawei } 845018a303SLinJiawei } 85f93cfde5SLinJiawei is(s_recovery_1) { // if rem < 0, rem = rem + d 86f93cfde5SLinJiawei state := s_recovery_2 87f93cfde5SLinJiawei } 88f93cfde5SLinJiawei is(s_recovery_2) { // recovery shift 895018a303SLinJiawei state := s_finish 905018a303SLinJiawei } 915018a303SLinJiawei is(s_finish) { 92afefbad5SLinJiawei when(out_fire) { 93afefbad5SLinJiawei state := s_idle 945018a303SLinJiawei } 955018a303SLinJiawei } 96afefbad5SLinJiawei } 97afefbad5SLinJiawei when(kill_r) { 985018a303SLinJiawei state := s_idle 995018a303SLinJiawei } 1005018a303SLinJiawei 1015018a303SLinJiawei /** Calculate abs(a)/abs(b) by recurrence 1025018a303SLinJiawei * 1035018a303SLinJiawei * ws, wc: partial remainder in carry-save form, 1045018a303SLinJiawei * in recurrence steps, ws/wc = 4ws[j]/4wc[j]; 1055018a303SLinJiawei * in recovery step, ws/wc = ws[j]/wc[j]; 1065018a303SLinJiawei * in final step, ws = abs(a)/abs(b). 1075018a303SLinJiawei * 1085018a303SLinJiawei * d: normlized divisor(1/2<=d<1) 1095018a303SLinJiawei * 1105018a303SLinJiawei * wLen = 3 integer bits + (len+1) frac bits 1115018a303SLinJiawei */ 1125018a303SLinJiawei def wLen = 3 + len + 1 113afefbad5SLinJiawei 1145018a303SLinJiawei val ws, wc = Reg(UInt(wLen.W)) 1155018a303SLinJiawei val ws_next, wc_next = Wire(UInt(wLen.W)) 1165018a303SLinJiawei val d = Reg(UInt(wLen.W)) 1175018a303SLinJiawei 1185018a303SLinJiawei val aLeadingZeros = RegEnable( 1195018a303SLinJiawei next = PriorityEncoder(ws(len - 1, 0).asBools().reverse), 1205018a303SLinJiawei enable = state === s_lzd 1215018a303SLinJiawei ) 1225018a303SLinJiawei val bLeadingZeros = RegEnable( 1235018a303SLinJiawei next = PriorityEncoder(d(len - 1, 0).asBools().reverse), 1245018a303SLinJiawei enable = state === s_lzd 1255018a303SLinJiawei ) 1265018a303SLinJiawei val diff = Cat(0.U(1.W), bLeadingZeros).asSInt() - Cat(0.U(1.W), aLeadingZeros).asSInt() 1275018a303SLinJiawei val isNegDiff = diff(diff.getWidth - 1) 1285018a303SLinJiawei val quotientBits = Mux(isNegDiff, 0.U, diff.asUInt()) 1295018a303SLinJiawei val qBitsIsOdd = quotientBits(0) 1305018a303SLinJiawei val recoveryShift = RegEnable(len.U - bLeadingZeros, state === s_normlize) 1315018a303SLinJiawei val a_shifted, b_shifted = Wire(UInt(len.W)) 1325018a303SLinJiawei a_shifted := Mux(isNegDiff, 1335018a303SLinJiawei ws(len - 1, 0) << bLeadingZeros, 1345018a303SLinJiawei ws(len - 1, 0) << aLeadingZeros 1355018a303SLinJiawei ) 1365018a303SLinJiawei b_shifted := d(len - 1, 0) << bLeadingZeros 1375018a303SLinJiawei 1385018a303SLinJiawei val rem_temp = ws + wc 1390fb3674eSJiawei Lin val rem_fixed = RegEnable(Mux(rem_temp(wLen - 1), rem_temp + d, rem_temp), state === s_recovery_1) 1400fb3674eSJiawei Lin val rem_abs = RegEnable((rem_fixed << recoveryShift) (2 * len, len + 1), state === s_recovery_2) 1415018a303SLinJiawei 1425018a303SLinJiawei when(newReq) { 1435018a303SLinJiawei ws := Cat(0.U(4.W), Mux(divZero, a, aVal)) 1445018a303SLinJiawei wc := 0.U 1455018a303SLinJiawei d := Cat(0.U(4.W), bVal) 1465018a303SLinJiawei }.elsewhen(state === s_normlize) { 1475018a303SLinJiawei d := Cat(0.U(3.W), b_shifted, 0.U(1.W)) 1485018a303SLinJiawei ws := Mux(qBitsIsOdd, a_shifted, a_shifted << 1) 1495018a303SLinJiawei }.elsewhen(state === s_recurrence) { 1505018a303SLinJiawei ws := Mux(rec_enough, ws_next, ws_next << 2) 1515018a303SLinJiawei wc := Mux(rec_enough, wc_next, wc_next << 2) 1525018a303SLinJiawei } 1535018a303SLinJiawei 1545018a303SLinJiawei cnt_next := Mux(state === s_normlize, (quotientBits + 3.U) >> 1, cnt - 1.U) 1555018a303SLinJiawei 1565018a303SLinJiawei /** Quotient selection 1575018a303SLinJiawei * 1585018a303SLinJiawei * the quotient selection table use truncated 7-bit remainder 1595018a303SLinJiawei * and 3-bit divisor 1605018a303SLinJiawei */ 1615018a303SLinJiawei val sel_0 :: sel_d :: sel_dx2 :: sel_neg_d :: sel_neg_dx2 :: Nil = Enum(5) 1625018a303SLinJiawei val dx2, neg_d, neg_dx2 = Wire(UInt(wLen.W)) 1635018a303SLinJiawei dx2 := d << 1 1645018a303SLinJiawei neg_d := (~d).asUInt() // add '1' in carry-save adder later 1655018a303SLinJiawei neg_dx2 := neg_d << 1 1665018a303SLinJiawei 1675018a303SLinJiawei val q_sel = Wire(UInt(3.W)) 1685018a303SLinJiawei val wc_adj = MuxLookup(q_sel, 0.U(2.W), Seq( 1695018a303SLinJiawei sel_d -> 1.U(2.W), 1705018a303SLinJiawei sel_dx2 -> 2.U(2.W) 1715018a303SLinJiawei )) 1725018a303SLinJiawei 1735018a303SLinJiawei val w_truncated = (ws(wLen - 1, wLen - 1 - 6) + wc(wLen - 1, wLen - 1 - 6)).asSInt() 1740fb3674eSJiawei Lin val d_truncated = b_shifted.tail(1).head(3) 1755018a303SLinJiawei 1765018a303SLinJiawei val qSelTable = Array( 1775018a303SLinJiawei Array(12, 4, -4, -13), 1785018a303SLinJiawei Array(14, 4, -6, -15), 1795018a303SLinJiawei Array(15, 4, -6, -16), 1805018a303SLinJiawei Array(16, 4, -6, -18), 1815018a303SLinJiawei Array(18, 6, -8, -20), 1825018a303SLinJiawei Array(20, 6, -8, -20), 1835018a303SLinJiawei Array(20, 8, -8, -22), 1845018a303SLinJiawei Array(24, 8, -8, -24) 1855018a303SLinJiawei ) 1865018a303SLinJiawei 1870fb3674eSJiawei Lin val table = RegEnable( 1880fb3674eSJiawei Lin VecInit(qSelTable.map(row => 1890fb3674eSJiawei Lin VecInit(row.map(k => k.S(7.W))) 1900fb3674eSJiawei Lin ))(d_truncated), 1910fb3674eSJiawei Lin state === s_normlize 1920fb3674eSJiawei Lin ) 1930fb3674eSJiawei Lin 1940fb3674eSJiawei Lin q_sel := MuxCase(sel_neg_dx2, 1950fb3674eSJiawei Lin table.zip(Seq(sel_dx2, sel_d, sel_0, sel_neg_d)).map { 1960fb3674eSJiawei Lin case (k, s) => (w_truncated >= k) -> s 1975018a303SLinJiawei } 1985018a303SLinJiawei ) 1995018a303SLinJiawei 2005018a303SLinJiawei /** Calculate (ws[j+1],wc[j+1]) by a [3-2]carry-save adder 2015018a303SLinJiawei * 2025018a303SLinJiawei * (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d 2035018a303SLinJiawei */ 2045018a303SLinJiawei val csa = Module(new CSA3_2(wLen)) 2055018a303SLinJiawei csa.io.in(0) := ws 2065018a303SLinJiawei csa.io.in(1) := Cat(wc(wLen - 1, 2), wc_adj) 2075018a303SLinJiawei csa.io.in(2) := MuxLookup(q_sel, 0.U, Seq( 2085018a303SLinJiawei sel_d -> neg_d, 2095018a303SLinJiawei sel_dx2 -> neg_dx2, 2105018a303SLinJiawei sel_neg_d -> d, 2115018a303SLinJiawei sel_neg_dx2 -> dx2 2125018a303SLinJiawei )) 2135018a303SLinJiawei ws_next := csa.io.out(0) 2145018a303SLinJiawei wc_next := csa.io.out(1) << 1 2155018a303SLinJiawei 2165018a303SLinJiawei // On the fly quotient conversion 2175018a303SLinJiawei val q, qm = Reg(UInt(len.W)) 2185018a303SLinJiawei when(newReq) { 2195018a303SLinJiawei q := 0.U 2205018a303SLinJiawei qm := 0.U 2215018a303SLinJiawei }.elsewhen(state === s_recurrence) { 2225018a303SLinJiawei val qMap = Seq( 2235018a303SLinJiawei sel_0 -> (q, 0), 2245018a303SLinJiawei sel_d -> (q, 1), 2255018a303SLinJiawei sel_dx2 -> (q, 2), 2265018a303SLinJiawei sel_neg_d -> (qm, 3), 2275018a303SLinJiawei sel_neg_dx2 -> (qm, 2) 2285018a303SLinJiawei ) 2295018a303SLinJiawei q := MuxLookup(q_sel, 0.U, 2305018a303SLinJiawei qMap.map(m => m._1 -> Cat(m._2._1(len - 3, 0), m._2._2.U(2.W))) 2315018a303SLinJiawei ) 2325018a303SLinJiawei val qmMap = Seq( 2335018a303SLinJiawei sel_0 -> (qm, 3), 2345018a303SLinJiawei sel_d -> (q, 0), 2355018a303SLinJiawei sel_dx2 -> (q, 1), 2365018a303SLinJiawei sel_neg_d -> (qm, 2), 2375018a303SLinJiawei sel_neg_dx2 -> (qm, 1) 2385018a303SLinJiawei ) 2395018a303SLinJiawei qm := MuxLookup(q_sel, 0.U, 2405018a303SLinJiawei qmMap.map(m => m._1 -> Cat(m._2._1(len - 3, 0), m._2._2.U(2.W))) 2415018a303SLinJiawei ) 242f93cfde5SLinJiawei }.elsewhen(state === s_recovery_1) { 2435018a303SLinJiawei q := Mux(rem_temp(wLen - 1), qm, q) 2445018a303SLinJiawei } 2455018a303SLinJiawei 2465018a303SLinJiawei 2470fb3674eSJiawei Lin val remainder = Mux(aSignReg, -rem_abs(len - 1, 0), rem_abs(len - 1, 0)) 2485018a303SLinJiawei val quotient = Mux(qSignReg, -q, q) 2495018a303SLinJiawei 250afefbad5SLinJiawei val res = Mux(isHi, 2515018a303SLinJiawei Mux(divZeroReg, ws(len - 1, 0), remainder), 2525018a303SLinJiawei Mux(divZeroReg, Fill(len, 1.U(1.W)), quotient) 2535018a303SLinJiawei ) 254afefbad5SLinJiawei io.out_data := Mux(isW, 2555018a303SLinJiawei SignExt(res(31, 0), len), 2565018a303SLinJiawei res 2575018a303SLinJiawei ) 258afefbad5SLinJiawei io.in_ready := state === s_idle 259c12bd822Sljw io.out_valid := finished // state === s_finish 260afefbad5SLinJiawei} 2615018a303SLinJiawei 2622225d46eSJiawei Linclass SRT4Divider(len: Int)(implicit p: Parameters) extends AbstractDivider(len) { 263afefbad5SLinJiawei 264afefbad5SLinJiawei val newReq = io.in.fire() 265afefbad5SLinJiawei 266afefbad5SLinJiawei val uop = io.in.bits.uop 267afefbad5SLinJiawei val uopReg = RegEnable(uop, newReq) 268afefbad5SLinJiawei val ctrlReg = RegEnable(ctrl, newReq) 269afefbad5SLinJiawei 270afefbad5SLinJiawei val divDataModule = Module(new SRT4DividerDataModule(len)) 271afefbad5SLinJiawei 272afefbad5SLinJiawei val kill_w = uop.roqIdx.needFlush(io.redirectIn, io.flushIn) 273afefbad5SLinJiawei val kill_r = !divDataModule.io.in_ready && uopReg.roqIdx.needFlush(io.redirectIn, io.flushIn) 274afefbad5SLinJiawei 2752bd5334dSYinan Xu divDataModule.io.src(0) := io.in.bits.src(0) 2762bd5334dSYinan Xu divDataModule.io.src(1) := io.in.bits.src(1) 277afefbad5SLinJiawei divDataModule.io.valid := io.in.valid 278afefbad5SLinJiawei divDataModule.io.sign := sign 279afefbad5SLinJiawei divDataModule.io.kill_w := kill_w 280afefbad5SLinJiawei divDataModule.io.kill_r := kill_r 281afefbad5SLinJiawei divDataModule.io.isHi := ctrlReg.isHi 282afefbad5SLinJiawei divDataModule.io.isW := ctrlReg.isW 283afefbad5SLinJiawei divDataModule.io.out_ready := io.out.ready 284afefbad5SLinJiawei 285afefbad5SLinJiawei io.in.ready := divDataModule.io.in_ready 286afefbad5SLinJiawei io.out.valid := divDataModule.io.out_valid 287afefbad5SLinJiawei io.out.bits.data := divDataModule.io.out_data 288afefbad5SLinJiawei io.out.bits.uop := uopReg 2895018a303SLinJiawei} 290