1a58e3351SLi Qianruo/*************************************************************************************** 2e3da8badSTang Haojin* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3e3da8badSTang Haojin* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4a58e3351SLi Qianruo* Copyright (c) 2020-2021 Peng Cheng Laboratory 5a58e3351SLi Qianruo* 6a58e3351SLi Qianruo* XiangShan is licensed under Mulan PSL v2. 7a58e3351SLi Qianruo* You can use this software according to the terms and conditions of the Mulan PSL v2. 8a58e3351SLi Qianruo* You may obtain a copy of Mulan PSL v2 at: 9a58e3351SLi Qianruo* http://license.coscl.org.cn/MulanPSL2 10a58e3351SLi Qianruo* 11a58e3351SLi Qianruo* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12a58e3351SLi Qianruo* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13a58e3351SLi Qianruo* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14a58e3351SLi Qianruo* 15a58e3351SLi Qianruo* See the Mulan PSL v2 for more details. 16*c49ebec8SHaoyuan Feng* 17*c49ebec8SHaoyuan Feng* 18*c49ebec8SHaoyuan Feng* Acknowledgement 19*c49ebec8SHaoyuan Feng* 20*c49ebec8SHaoyuan Feng* This implementation is inspired by several key papers: 21*c49ebec8SHaoyuan Feng* [1] Elisardo Antelo, Tomas Lang, Paolo Montuschi, and Alberto Nannarelli. "[Digit-recurrence dividers with reduced 22*c49ebec8SHaoyuan Feng* logical depth.](https://doi.org/10.1109/TC.2005.115)" IEEE Transactions on Computers 54.7: 837-851. 2005. 23a58e3351SLi Qianruo***************************************************************************************/ 24a58e3351SLi Qianruo 25a58e3351SLi Qianruo// This file contains components originally written by Yifei He, see 26a58e3351SLi Qianruo// https://github.com/OpenXiangShan/XS-Verilog-Library/tree/main/int_div_radix_4_v1 27a58e3351SLi Qianruo// Email of original author: [email protected] 28a58e3351SLi Qianruo 29a58e3351SLi Qianruopackage xiangshan.backend.fu 30a58e3351SLi Qianruo 318891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters 32a58e3351SLi Qianruoimport chisel3._ 33a58e3351SLi Qianruoimport chisel3.util._ 341c62c387SYinan Xuimport utils._ 353c02ee8fSwakafaimport utility._ 361c62c387SYinan Xuimport xiangshan._ 37a58e3351SLi Qianruoimport xiangshan.backend.fu.util.CSA3_2 38a58e3351SLi Qianruo 39a58e3351SLi Qianruoclass SRT16DividerDataModule(len: Int) extends Module { 40a58e3351SLi Qianruo val io = IO(new Bundle() { 41a58e3351SLi Qianruo val src = Vec(2, Input(UInt(len.W))) 42a58e3351SLi Qianruo val valid, sign, kill_w, kill_r, isHi, isW = Input(Bool()) 43a58e3351SLi Qianruo val in_ready = Output(Bool()) 44a58e3351SLi Qianruo val out_valid = Output(Bool()) 4507e4f25bSXuan Hu val out_validNext = Output(Bool()) 46a58e3351SLi Qianruo val out_data = Output(UInt(len.W)) 47a58e3351SLi Qianruo val out_ready = Input(Bool()) 48a58e3351SLi Qianruo }) 49a58e3351SLi Qianruo 50a58e3351SLi Qianruo // consts 51a58e3351SLi Qianruo val lzc_width = log2Up(len) 52a58e3351SLi Qianruo val itn_len = 1 + len + 2 + 1 53a58e3351SLi Qianruo 54a58e3351SLi Qianruo val (a, d, sign, valid, kill_w, kill_r, isHi, isW) = 55a58e3351SLi Qianruo (io.src(0), io.src(1), io.sign, io.valid, io.kill_w, io.kill_r, io.isHi, io.isW) 56a58e3351SLi Qianruo val in_fire = valid && io.in_ready 57a58e3351SLi Qianruo val out_fire = io.out_ready && io.out_valid 58a58e3351SLi Qianruo val newReq = in_fire 59a58e3351SLi Qianruo val s_idle :: s_pre_0 :: s_pre_1 :: s_iter :: s_post_0 :: s_post_1 :: s_finish :: Nil = Enum(7) 60a58e3351SLi Qianruo val quot_neg_2 :: quot_neg_1 :: quot_0 :: quot_pos_1 :: quot_pos_2 :: Nil = Enum(5) 61a58e3351SLi Qianruo 62a58e3351SLi Qianruo 6367ba96b4SYinan Xu val state = RegInit((1 << s_idle.litValue.toInt).U(7.W)) 64a58e3351SLi Qianruo 65a58e3351SLi Qianruo // reused wires 66a58e3351SLi Qianruo// val aNormAbs = Wire(UInt((len + 1).W)) // Inputs of xNormAbs regs below 67a58e3351SLi Qianruo// val dNormAbs = Wire(UInt((len + 1).W)) 68a58e3351SLi Qianruo val quotIter = Wire(UInt(len.W)) 69a58e3351SLi Qianruo val quotM1Iter = Wire(UInt(len.W)) 70a58e3351SLi Qianruo val aLZC = Wire(UInt((lzc_width + 1).W)) 71a58e3351SLi Qianruo val dLZC = Wire(UInt((lzc_width + 1).W)) 72a58e3351SLi Qianruo 73a58e3351SLi Qianruo val rNext = Wire(UInt(itn_len.W)) 74a58e3351SLi Qianruo val rNextPd = Wire(UInt(itn_len.W)) 75a58e3351SLi Qianruo 76a58e3351SLi Qianruo val aInverter = Wire(UInt(len.W)) // results of global inverter 77a58e3351SLi Qianruo val dInverter = Wire(UInt(len.W)) 78a58e3351SLi Qianruo 79a58e3351SLi Qianruo val finalIter = Wire(Bool()) 80a58e3351SLi Qianruo val special = Wire(Bool()) 81a58e3351SLi Qianruo 82a58e3351SLi Qianruo // reused regs 83a58e3351SLi Qianruo// val aNormAbsReg = RegEnable(aNormAbs, newReq | state(s_pre_0) | state(s_post_0)) // reg for normalized a & d and rem & rem+d 84a58e3351SLi Qianruo// val dNormAbsReg = RegEnable(dNormAbs, newReq | state(s_pre_0) | state(s_post_0)) 85a58e3351SLi Qianruo val quotIterReg = RegEnable(quotIter, state(s_pre_1) | state(s_iter) | state(s_post_0)) 86a58e3351SLi Qianruo val quotM1IterReg = RegEnable(quotM1Iter, state(s_pre_1) | state(s_iter) | state(s_post_0)) 87a58e3351SLi Qianruo val specialReg = RegEnable(special, state(s_pre_1)) 88a58e3351SLi Qianruo val aReg = RegEnable(a, in_fire) 89a58e3351SLi Qianruo 90a58e3351SLi Qianruo when(kill_r) { 91a58e3351SLi Qianruo state := UIntToOH(s_idle, 7) 92a58e3351SLi Qianruo } .elsewhen(state(s_idle) && in_fire && !kill_w) { 93a58e3351SLi Qianruo state := UIntToOH(s_pre_0, 7) 94a58e3351SLi Qianruo } .elsewhen(state(s_pre_0)) { // leading zero detection 95a58e3351SLi Qianruo state := UIntToOH(s_pre_1, 7) 96a58e3351SLi Qianruo } .elsewhen(state(s_pre_1)) { // shift a/b 97a58e3351SLi Qianruo state := Mux(special, UIntToOH(s_post_1, 7), UIntToOH(s_iter, 7)) 98a58e3351SLi Qianruo } .elsewhen(state(s_iter)) { // (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d 99a58e3351SLi Qianruo state := Mux(finalIter, UIntToOH(s_post_0, 7), UIntToOH(s_iter, 7)) 100a58e3351SLi Qianruo } .elsewhen(state(s_post_0)) { // if rem < 0, rem = rem + d 101a58e3351SLi Qianruo state := UIntToOH(s_post_1, 7) 10207e4f25bSXuan Hu } .elsewhen(state(s_post_1)) { 103a58e3351SLi Qianruo state := UIntToOH(s_finish, 7) 10407e4f25bSXuan Hu } .elsewhen(state(s_finish) && io.out_ready) { 105a58e3351SLi Qianruo state := UIntToOH(s_idle, 7) 106a58e3351SLi Qianruo } .otherwise { 107a58e3351SLi Qianruo state := state 108a58e3351SLi Qianruo } 109a58e3351SLi Qianruo 1109d9b0bfaSjunxiong-ji // io.in_ready := state(s_idle) 111a58e3351SLi Qianruo aInverter := -Mux(state(s_idle), a, quotIterReg) // 64, 0 112a58e3351SLi Qianruo dInverter := -Mux(state(s_idle), d, quotM1IterReg) // 64, 0 113a58e3351SLi Qianruo 114a58e3351SLi Qianruo val aSign = io.sign && a(len - 1) // 1 115a58e3351SLi Qianruo val dSign = io.sign && d(len - 1) 116f7e0356aSLi Qianruo val dSignReg = RegEnable(dSign, newReq) 117a58e3351SLi Qianruo 118a58e3351SLi Qianruo val aAbs = Mux(aSign, aInverter, a) // 64, 0 119a58e3351SLi Qianruo val dAbs = Mux(dSign, dInverter, d) 120a58e3351SLi Qianruo val aAbsReg = RegEnable(aAbs, newReq) 121a58e3351SLi Qianruo val dAbsReg = RegEnable(dAbs, newReq) 122a58e3351SLi Qianruo 123a58e3351SLi Qianruo val aNorm = (aAbsReg(len - 1, 0) << aLZC(lzc_width - 1, 0))(len - 1, 0) // 64, 65 124a58e3351SLi Qianruo val dNorm = (dAbsReg(len - 1, 0) << dLZC(lzc_width - 1, 0))(len - 1, 0) 125a58e3351SLi Qianruo 126a58e3351SLi Qianruo val aNormReg = RegEnable(aNorm, state(s_pre_0)) 127a58e3351SLi Qianruo val dNormReg = RegEnable(dNorm, state(s_pre_0)) 128a58e3351SLi Qianruo 129a58e3351SLi Qianruo// aNormAbs := Mux1H(Seq( 130a58e3351SLi Qianruo// state(s_idle) -> Cat(0.U(1.W), aAbs), // 65, 0 131a58e3351SLi Qianruo// state(s_pre_0) -> Cat(0.U(1.W), aNorm), // 65, 0 132a58e3351SLi Qianruo// state(s_post_0) -> rNext(len + 3, 3) // remainder 65, 64. highest is sign bit 133a58e3351SLi Qianruo// )) 134a58e3351SLi Qianruo// dNormAbs := Mux1H(Seq( 135a58e3351SLi Qianruo// state(s_idle) -> Cat(0.U(1.W), dAbs), 136a58e3351SLi Qianruo// state(s_pre_0) -> Cat(0.U(1.W), dNorm), 137a58e3351SLi Qianruo// state(s_post_0) -> rNextPd(len + 3, 3) 138a58e3351SLi Qianruo// )) 139a58e3351SLi Qianruo 140a58e3351SLi Qianruo // Second cycle, state is pre_0 141a58e3351SLi Qianruo // calculate lzc and move div* and lzc diff check if no_iter_needed 142a58e3351SLi Qianruo 143935edac4STang Haojin aLZC := PriorityEncoder(aAbsReg(len - 1, 0).asBools.reverse) 144935edac4STang Haojin dLZC := PriorityEncoder(dAbsReg(len - 1, 0).asBools.reverse) 145a58e3351SLi Qianruo val aLZCReg = RegEnable(aLZC, state(s_pre_0)) // 7, 0 146a58e3351SLi Qianruo val dLZCReg = RegEnable(dLZC, state(s_pre_0)) 147a58e3351SLi Qianruo 148a58e3351SLi Qianruo val lzcWireDiff = Cat(0.U(1.W), dLZC(lzc_width - 1, 0)) - Cat(0.U(1.W), aLZC(lzc_width - 1, 0)) // 7, 0 149a58e3351SLi Qianruo val lzcRegDiff = Cat(0.U(1.W), dLZCReg(lzc_width - 1, 0)) - Cat(0.U(1.W), aLZCReg(lzc_width - 1, 0)) 150a58e3351SLi Qianruo// val lzcDiff = Mux(state(s_pre_0), lzcWireDiff, lzcRegDiff) 151a58e3351SLi Qianruo 152a58e3351SLi Qianruo // special case: 153a58e3351SLi Qianruo // divisor is 1 or -1; dividend has less bits than divisor; divisor is zero 154a58e3351SLi Qianruo // s_pre_0: 155935edac4STang Haojin val dIsOne = dLZC(lzc_width - 1, 0).andR 156935edac4STang Haojin val dIsZero = ~dNormReg.orR 157a58e3351SLi Qianruo val aIsZero = RegEnable(aLZC(lzc_width), state(s_pre_0)) 158a58e3351SLi Qianruo val aTooSmall = RegEnable(aLZC(lzc_width) | lzcWireDiff(lzc_width), state(s_pre_0)) 159a58e3351SLi Qianruo special := dIsOne | dIsZero | aTooSmall 160a58e3351SLi Qianruo 161a58e3351SLi Qianruo val quotSpecial = Mux(dIsZero, VecInit(Seq.fill(len)(true.B)).asUInt, 162a58e3351SLi Qianruo Mux(aTooSmall, 0.U, 1637eabd47cSLi Qianruo Mux(dSignReg, -aReg, aReg) // signed 2^(len-1) 164a58e3351SLi Qianruo )) 1657eabd47cSLi Qianruo val remSpecial = Mux(dIsZero || aTooSmall, aReg, 0.U) 166a58e3351SLi Qianruo val quotSpecialReg = RegEnable(quotSpecial, state(s_pre_1)) 167a58e3351SLi Qianruo val remSpecialReg = RegEnable(remSpecial, state(s_pre_1)) 168a58e3351SLi Qianruo 169a58e3351SLi Qianruo // s_pre_1 170a58e3351SLi Qianruo val quotSign = Mux(state(s_idle), aSign ^ dSign, true.B) // if not s_idle then must be s_pre_1 & dIsZero, and that we have 171a58e3351SLi Qianruo val rSign = aSign 172a58e3351SLi Qianruo val quotSignReg = RegEnable(quotSign, in_fire | (state(s_pre_1) & dIsZero)) 173a58e3351SLi Qianruo val rSignReg = RegEnable(rSign, in_fire) 174a58e3351SLi Qianruo 175a58e3351SLi Qianruo val rShift = lzcRegDiff(0) 176a58e3351SLi Qianruo val oddIter = lzcRegDiff(1) ^ lzcRegDiff(0) 177a58e3351SLi Qianruo val iterNum = Wire(UInt((lzc_width - 2).W)) 178a58e3351SLi Qianruo val iterNumReg = RegEnable(iterNum, state(s_pre_1) | state(s_iter)) 179a58e3351SLi Qianruo iterNum := Mux(state(s_pre_1), (lzcRegDiff + 1.U) >> 2, iterNumReg -% 1.U) 180a58e3351SLi Qianruo finalIter := iterNumReg === 0.U 181a58e3351SLi Qianruo 182a58e3351SLi Qianruo val rSumInit = Cat(0.U(3.W), Mux(rShift, Cat(0.U(1.W), aNormReg), Cat(aNormReg, 0.U(1.W)))) //(1, 67), 0.001xxx 183a58e3351SLi Qianruo val rCarryInit = 0.U(itn_len.W) 184a58e3351SLi Qianruo 185a58e3351SLi Qianruo val rSumInitTrunc = Cat(0.U(1.W), rSumInit(itn_len - 4, itn_len - 4 - 4 + 1)) // 0.00___ 18645f43e6eSTang Haojin val mInitPos1 = MuxLookup(dNormReg(len-2, len-4), "b00100".U(5.W))( 187e3da8badSTang Haojin Seq( 188a58e3351SLi Qianruo 0.U -> "b00100".U(5.W), 189a58e3351SLi Qianruo 1.U -> "b00100".U(5.W), 190a58e3351SLi Qianruo 2.U -> "b00100".U(5.W), 191a58e3351SLi Qianruo 3.U -> "b00110".U(5.W), 192a58e3351SLi Qianruo 4.U -> "b00110".U(5.W), 193a58e3351SLi Qianruo 5.U -> "b00110".U(5.W), 194a58e3351SLi Qianruo 6.U -> "b00110".U(5.W), 195a58e3351SLi Qianruo 7.U -> "b01000".U(5.W), 196a58e3351SLi Qianruo ) 197a58e3351SLi Qianruo ) 19845f43e6eSTang Haojin val mInitPos2 = MuxLookup(dNormReg(len-2, len-4), "b01100".U(5.W))( 199e3da8badSTang Haojin Seq( 200a58e3351SLi Qianruo 0.U -> "b01100".U(5.W), 201a58e3351SLi Qianruo 1.U -> "b01110".U(5.W), 202a58e3351SLi Qianruo 2.U -> "b01111".U(5.W), 203a58e3351SLi Qianruo 3.U -> "b10000".U(5.W), 204a58e3351SLi Qianruo 4.U -> "b10010".U(5.W), 205a58e3351SLi Qianruo 5.U -> "b10100".U(5.W), 206a58e3351SLi Qianruo 6.U -> "b10110".U(5.W), 207a58e3351SLi Qianruo 7.U -> "b10110".U(5.W), 208a58e3351SLi Qianruo ) 209a58e3351SLi Qianruo ) 210a58e3351SLi Qianruo val initCmpPos1 = rSumInitTrunc >= mInitPos1 211a58e3351SLi Qianruo val initCmpPos2 = rSumInitTrunc >= mInitPos2 212a58e3351SLi Qianruo val qInit = Mux(initCmpPos2, UIntToOH(quot_pos_2, 5), Mux(initCmpPos1, UIntToOH(quot_pos_1, 5), UIntToOH(quot_0, 5))) 213a58e3351SLi Qianruo 214a58e3351SLi Qianruo // in pre_1 we also obtain m_i + 16u * d for all u 215a58e3351SLi Qianruo // udNeg -> (rud, r2ud) -> (rudPmNeg, r2udPmNeg) 216a58e3351SLi Qianruo val dPos = Cat(0.U(1.W), dNormReg) // +d, 0.1xxx, (1, 64) 217a58e3351SLi Qianruo val dNeg = -Cat(0.U(1.W), dNormReg) // -d, 1.xxxx, (1, 64) 218a58e3351SLi Qianruo // val m = Wire(Vec(4, UInt(7.W))) // we have to sigext them to calculate rqd-m_k 219a58e3351SLi Qianruo 220a58e3351SLi Qianruo // index 0 is for q=-2 and 4 is for q=2!!! 221a58e3351SLi Qianruo val mNeg = Wire(Vec(4, UInt(12.W))) // selected m, extended to (6, 6) bits 222a58e3351SLi Qianruo val rudNeg = Wire(Vec(5, UInt(10.W))) // (4, 6) 223a58e3351SLi Qianruo val r2udNeg = Wire(Vec(5, UInt(12.W))) // (6, 6) 224a58e3351SLi Qianruo 225a58e3351SLi Qianruo // Selection Block with improved timing 226a58e3351SLi Qianruo val rudPmNeg = Wire(Vec(5, Vec(4, UInt(10.W)))) // -(r*u*d+m_k), (5, 5) bits 227a58e3351SLi Qianruo val r2ws = Wire(UInt(10.W)) // r^2*ws (5, 5) bits 228a58e3351SLi Qianruo val r2wc = Wire(UInt(10.W)) 229a58e3351SLi Qianruo // calculating exact values of w 230a58e3351SLi Qianruo val udNeg = Wire(Vec(5, UInt(itn_len.W))) // (3, 65), 1 signExt'ed Bit 231a58e3351SLi Qianruo // val r3udNeg = Wire(Vec(5, UInt(13.W))) 232a58e3351SLi Qianruo 233a58e3351SLi Qianruo // Speculative Block 234a58e3351SLi Qianruo val r2udPmNeg = Wire(Vec(5, Vec(4, UInt(13.W)))) // -(r^2*d*d+m_k), (7, 6) bits. 1st index for q 2nd for m 235a58e3351SLi Qianruo val r3ws = Wire(UInt(13.W)) // r^3*ws, (7, 6) bits 236a58e3351SLi Qianruo val r3wc = Wire(UInt(13.W)) 237a58e3351SLi Qianruo val qSpec = Wire(Vec(5, UInt(5.W))) // 5 speculative results of qNext2 238a58e3351SLi Qianruo // output wires 239a58e3351SLi Qianruo val qNext = Wire(UInt(5.W)) 240a58e3351SLi Qianruo val qNext2 = Wire(UInt(5.W)) 241a58e3351SLi Qianruo val rCarryIter = Wire(UInt(itn_len.W)) // (1, 67) 242a58e3351SLi Qianruo val rSumIter = Wire(UInt(itn_len.W)) 243a58e3351SLi Qianruo // val r3wsIter = Wire(UInt(13.W)) 244a58e3351SLi Qianruo // val r3wcIter = Wire(UInt(13.W)) 245a58e3351SLi Qianruo // Input Regs of whole Spec + Sel + sum adder block 246a58e3351SLi Qianruo val qPrevReg = RegEnable(Mux(state(s_pre_1), qInit, qNext2), state(s_pre_1) | state(s_iter)) 247a58e3351SLi Qianruo val rSumReg = RegEnable(Mux(state(s_pre_1), rSumInit, rSumIter), state(s_pre_1) | state(s_iter)) // (1, 67) 248a58e3351SLi Qianruo val rCarryReg = RegEnable(Mux(state(s_pre_1), rCarryInit, rCarryIter), state(s_pre_1) | state(s_iter)) 249a58e3351SLi Qianruo 250a58e3351SLi Qianruo // Give values to the regs and wires above... 251a58e3351SLi Qianruo val dForLookup = dPos(len-2, len-4) 25245f43e6eSTang Haojin mNeg := VecInit(Cat(SignExt(MuxLookup(dNormReg(len-2, len-4), "b00000000".U(7.W))(mLookUpTable2.minus_m(0)), 11), 0.U(1.W)), // (2, 5) -> (6, 6) 25345f43e6eSTang Haojin Cat(SignExt(MuxLookup(dNormReg(len-2, len-4), "b00000000".U(7.W))(mLookUpTable2.minus_m(1)), 10) ,0.U(2.W)), // (3, 4) -> (6, 6) 25445f43e6eSTang Haojin Cat(SignExt(MuxLookup(dNormReg(len-2, len-4), "b00000000".U(7.W))(mLookUpTable2.minus_m(2)), 10) ,0.U(2.W)), 25545f43e6eSTang Haojin Cat(SignExt(MuxLookup(dNormReg(len-2, len-4), "b00000000".U(7.W))(mLookUpTable2.minus_m(3)), 11) ,0.U(1.W)) 256a58e3351SLi Qianruo ) 257a58e3351SLi Qianruo udNeg := VecInit( Cat(SignExt(dPos, 66), 0.U(2.W)), 258a58e3351SLi Qianruo Cat(SignExt(dPos, 67), 0.U(1.W)), 259a58e3351SLi Qianruo 0.U, 260a58e3351SLi Qianruo Cat(SignExt(dNeg, 67), 0.U(1.W)), 261a58e3351SLi Qianruo Cat(SignExt(dNeg, 66), 0.U(2.W)) 262a58e3351SLi Qianruo ) 263a58e3351SLi Qianruo 264a58e3351SLi Qianruo rudNeg := VecInit(Seq.tabulate(5){i => udNeg(i)(itn_len-2, itn_len-11)}) 265a58e3351SLi Qianruo r2udNeg := VecInit(Seq.tabulate(5){i => udNeg(i)(itn_len-2, itn_len-13)}) 266a58e3351SLi Qianruo // r3udNeg := VecInit(Seq.tabulate(5){i => udNeg(i)(itn_len-2, itn_len-13)}) 267a58e3351SLi Qianruo rudPmNeg := VecInit(Seq.tabulate(5){i => VecInit(Seq.tabulate(4){ j => SignExt(rudNeg(i)(9, 1), 10) + mNeg(j)(10, 1)})}) 268a58e3351SLi Qianruo r2udPmNeg := VecInit(Seq.tabulate(5){i => VecInit(Seq.tabulate(4){ j => SignExt(r2udNeg(i), 13) + SignExt(mNeg(j), 13)})}) 269a58e3351SLi Qianruo r3ws := rSumReg(itn_len-1, itn_len-13) 270a58e3351SLi Qianruo r3wc := rCarryReg(itn_len-1, itn_len-13) 271a58e3351SLi Qianruo 272a58e3351SLi Qianruo r2ws := rSumReg(itn_len-1, itn_len-10) 273a58e3351SLi Qianruo r2wc := rCarryReg(itn_len-1, itn_len-10) 274a58e3351SLi Qianruo 275a58e3351SLi Qianruo val udNegReg = RegEnable(udNeg, state(s_pre_1)) 276a58e3351SLi Qianruo// val rudNegReg = RegEnable(rudNeg, state(s_pre_1)) 277a58e3351SLi Qianruo val rudPmNegReg = RegEnable(rudPmNeg, state(s_pre_1)) 278a58e3351SLi Qianruo val r2udPmNegReg = RegEnable(r2udPmNeg, state(s_pre_1)) 279a58e3351SLi Qianruo 280a58e3351SLi Qianruo def DetectSign(signs: UInt, name: String): UInt = { 281a58e3351SLi Qianruo val qVec = Wire(Vec(5, Bool())).suggestName(name) 282a58e3351SLi Qianruo qVec(quot_neg_2) := signs(0) && signs(1) && signs(2) 283a58e3351SLi Qianruo qVec(quot_neg_1) := ~signs(0) && signs(1) && signs(2) 284a58e3351SLi Qianruo qVec(quot_0) := signs(2) && ~signs(1) 285a58e3351SLi Qianruo qVec(quot_pos_1) := signs(3) && ~signs(2) && ~signs(1) 286a58e3351SLi Qianruo qVec(quot_pos_2) := ~signs(3) && ~signs(2) && ~signs(1) 287a58e3351SLi Qianruo qVec.asUInt 288a58e3351SLi Qianruo } 289a58e3351SLi Qianruo // Selection block 290a58e3351SLi Qianruo val signs = VecInit(Seq.tabulate(4){ i => { 291a58e3351SLi Qianruo val csa = Module(new CSA3_2(10)).suggestName(s"csa_sel_${i}") 292a58e3351SLi Qianruo csa.io.in(0) := r2ws 293a58e3351SLi Qianruo csa.io.in(1) := r2wc 294a58e3351SLi Qianruo csa.io.in(2) := Mux1H(qPrevReg, rudPmNegReg.toSeq)(i) // rudPmNeg(OHToUInt(qPrevReg))(i) 295a58e3351SLi Qianruo 296a58e3351SLi Qianruo (csa.io.out(0) + (csa.io.out(1)(8, 0) << 1))(9) 297a58e3351SLi Qianruo }}) 298a58e3351SLi Qianruo qNext := DetectSign(signs.asUInt, s"sel_q") 299a58e3351SLi Qianruo val csaWide1 = Module(new CSA3_2(itn_len)).suggestName("csa_sel_wide_1") 300a58e3351SLi Qianruo val csaWide2 = Module(new CSA3_2(itn_len)).suggestName("csa_sel_wide_2") 301a58e3351SLi Qianruo csaWide1.io.in(0) := rSumReg << 2 302a58e3351SLi Qianruo csaWide1.io.in(1) := rCarryReg << 2 303a58e3351SLi Qianruo csaWide1.io.in(2) := Mux1H(qPrevReg, udNegReg.toSeq) << 2//udNeg(OHToUInt(qPrevReg)) << 2 304a58e3351SLi Qianruo csaWide2.io.in(0) := csaWide1.io.out(0) << 2 305a58e3351SLi Qianruo csaWide2.io.in(1) := (csaWide1.io.out(1) << 1)(itn_len-1, 0) << 2 306a58e3351SLi Qianruo csaWide2.io.in(2) := Mux1H(qNext, udNegReg.toSeq) << 2 // udNeg(OHToUInt(qNext)) << 2 307a58e3351SLi Qianruo rSumIter := Mux(~oddIter & finalIter, csaWide1.io.out(0), csaWide2.io.out(0)) 308a58e3351SLi Qianruo rCarryIter := Mux(~oddIter & finalIter, (csaWide1.io.out(1) << 1)(itn_len-1, 0), (csaWide2.io.out(1) << 1)(itn_len-1, 0)) 309a58e3351SLi Qianruo // r3wsIter := r3udNeg(OHToUInt(qNext)) 310a58e3351SLi Qianruo // r3wcIter := (csaWide1.io.out(0)(itn_len-3, itn_len-16) + (csaWide1.io.out(1) << 1)(itn_len-3, itn_len-16))(13,1) 311a58e3351SLi Qianruo // Speculative block 312a58e3351SLi Qianruo qSpec := VecInit(Seq.tabulate(5){ q_spec => { 313a58e3351SLi Qianruo val csa1 = Module(new CSA3_2(13)).suggestName(s"csa_spec_${q_spec}") 314a58e3351SLi Qianruo csa1.io.in(0) := r3ws 315a58e3351SLi Qianruo csa1.io.in(1) := r3wc 316a58e3351SLi Qianruo csa1.io.in(2) := SignExt(udNegReg(q_spec)(itn_len-2, itn_len-11), 13) // (4, 6) -> (7, 6) 317a58e3351SLi Qianruo val signs2 = VecInit(Seq.tabulate(4){ i => { 318a58e3351SLi Qianruo val csa2 = Module(new CSA3_2(13)).suggestName(s"csa_spec_${q_spec}_${i}") 319a58e3351SLi Qianruo csa2.io.in(0) := csa1.io.out(0) 320a58e3351SLi Qianruo csa2.io.in(1) := (csa1.io.out(1) << 1)(12, 0) 321a58e3351SLi Qianruo csa2.io.in(2) := Mux1H(qPrevReg, r2udPmNegReg.toSeq)(i) // r2udPmNeg(OHToUInt(qPrevReg))(i) 322a58e3351SLi Qianruo (csa2.io.out(0) + (csa2.io.out(1)(11, 0) << 1))(12) 323a58e3351SLi Qianruo }}) 324a58e3351SLi Qianruo val qVec2 = DetectSign(signs2.asUInt, s"spec_q_${q_spec}") 325a58e3351SLi Qianruo qVec2 326a58e3351SLi Qianruo }}) 327a58e3351SLi Qianruo // qNext2 := qSpec(OHToUInt(qNext)) // TODO: Use Mux1H!! 328a58e3351SLi Qianruo 329a58e3351SLi Qianruo qNext2 := Mux1H(qNext, qSpec.toSeq) 330a58e3351SLi Qianruo 331a58e3351SLi Qianruo // on the fly quotient conversion 332a58e3351SLi Qianruo val quotHalfIter = Wire(UInt(64.W)) 333a58e3351SLi Qianruo val quotM1HalfIter = Wire(UInt(64.W)) 334a58e3351SLi Qianruo val quotIterNext = Wire(UInt(64.W)) 335a58e3351SLi Qianruo val quotM1IterNext = Wire(UInt(64.W)) 336a58e3351SLi Qianruo def OTFC(q: UInt, quot: UInt, quotM1: UInt): (UInt, UInt) = { 337a58e3351SLi Qianruo val quotNext = Mux1H(Seq( 338a58e3351SLi Qianruo q(quot_pos_2) -> (quot << 2 | "b10".U), 339a58e3351SLi Qianruo q(quot_pos_1) -> (quot << 2 | "b01".U), 340a58e3351SLi Qianruo q(quot_0) -> (quot << 2 | "b00".U), 341a58e3351SLi Qianruo q(quot_neg_1) -> (quotM1 << 2 | "b11".U), 342a58e3351SLi Qianruo q(quot_neg_2) -> (quotM1 << 2 | "b10".U) 343a58e3351SLi Qianruo )) 344a58e3351SLi Qianruo val quotM1Next = Mux1H(Seq( 345a58e3351SLi Qianruo q(quot_pos_2) -> (quot << 2 | "b01".U), 346a58e3351SLi Qianruo q(quot_pos_1) -> (quot << 2 | "b00".U), 347a58e3351SLi Qianruo q(quot_0) -> (quotM1 << 2 | "b11".U), 348a58e3351SLi Qianruo q(quot_neg_1) -> (quotM1 << 2 | "b10".U), 349a58e3351SLi Qianruo q(quot_neg_2) -> (quotM1 << 2 | "b01".U) 350a58e3351SLi Qianruo )) 351a58e3351SLi Qianruo (quotNext(len-1, 0), quotM1Next(len-1, 0)) 352a58e3351SLi Qianruo } 353a58e3351SLi Qianruo quotHalfIter := OTFC(qPrevReg, quotIterReg, quotM1IterReg)._1 354a58e3351SLi Qianruo quotM1HalfIter := OTFC(qPrevReg, quotIterReg, quotM1IterReg)._2 355a58e3351SLi Qianruo quotIterNext := Mux(~oddIter && finalIter, quotHalfIter, OTFC(qNext, quotHalfIter, quotM1HalfIter)._1) 356a58e3351SLi Qianruo quotM1IterNext := Mux(~oddIter && finalIter, quotM1HalfIter, OTFC(qNext, quotHalfIter, quotM1HalfIter)._2) 357a58e3351SLi Qianruo // quotIter := Mux(state(s_pre_1), 0.U(len.W), 358a58e3351SLi Qianruo // Mux(state(s_iter), quotIterNext, 359a58e3351SLi Qianruo // Mux(quotSignReg, aInverter, quotIterReg))) 360a58e3351SLi Qianruo // quotM1Iter := Mux(state(s_pre_1), 361a58e3351SLi Qianruo // 0.U(len.W), Mux(state(s_iter), quotM1IterNext, 362a58e3351SLi Qianruo // Mux(quotSignReg, dInverter, quotM1IterReg))) 363a58e3351SLi Qianruo 364a58e3351SLi Qianruo quotIter := Mux(state(s_iter), quotIterNext, 365a58e3351SLi Qianruo Mux(state(s_pre_1), 0.U(len.W), 366a58e3351SLi Qianruo Mux(quotSignReg, aInverter, quotIterReg))) 367a58e3351SLi Qianruo quotM1Iter := Mux(state(s_iter), quotM1IterNext, 368a58e3351SLi Qianruo Mux(state(s_pre_1), 0.U(len.W), 369a58e3351SLi Qianruo Mux(quotSignReg, dInverter, quotM1IterReg))) 370a58e3351SLi Qianruo // finally, to the recovery stages! 371a58e3351SLi Qianruo 372a58e3351SLi Qianruo when(rSignReg) { 373a58e3351SLi Qianruo rNext := ~rSumReg + ~rCarryReg + 2.U 374a58e3351SLi Qianruo rNextPd := ~rSumReg + ~rCarryReg + ~Cat(0.U(1.W), dNormReg, 0.U(3.W)) + 3.U 375a58e3351SLi Qianruo } .otherwise { 376a58e3351SLi Qianruo rNext := rSumReg + rCarryReg 377a58e3351SLi Qianruo rNextPd := rSumReg + rCarryReg + Cat(0.U(1.W), dNormReg, 0.U(3.W)) 378a58e3351SLi Qianruo } 379a58e3351SLi Qianruo val rNextReg = RegEnable(rNext(len + 3, 3), state(s_post_0)) 380a58e3351SLi Qianruo val rNextPdReg = RegEnable(rNextPd(len + 3, 3), state(s_post_0)) 381a58e3351SLi Qianruo dontTouch(rNextReg) 382a58e3351SLi Qianruo // post_1 383a58e3351SLi Qianruo val r = rNextReg 384a58e3351SLi Qianruo val rPd = rNextPdReg 385935edac4STang Haojin val rIsZero = ~(r.orR) 386935edac4STang Haojin val needCorr = Mux(rSignReg, ~r(len) & r.orR, r(len)) // when we get pos rem for a<0 or neg rem for a>0 387a58e3351SLi Qianruo val rPreShifted = Mux(needCorr, rPd, r) 388a58e3351SLi Qianruo val rightShifter = Module(new RightShifter(len, lzc_width)) 389a58e3351SLi Qianruo rightShifter.io.in := rPreShifted 390a58e3351SLi Qianruo rightShifter.io.shiftNum := dLZCReg 391935edac4STang Haojin rightShifter.io.msb := Mux(~(rPreShifted.orR), 0.U, rSignReg) 392a58e3351SLi Qianruo val rShifted = rightShifter.io.out 393a58e3351SLi Qianruo val rFinal = RegEnable(Mux(specialReg, remSpecialReg, rShifted), state(s_post_1))// right shifted remainder. shift by the number of bits divisor is shifted 394a58e3351SLi Qianruo val qFinal = RegEnable(Mux(specialReg, quotSpecialReg, Mux(needCorr, quotM1IterReg, quotIterReg)), state(s_post_1)) 395a58e3351SLi Qianruo val res = Mux(isHi, rFinal, qFinal) 396a58e3351SLi Qianruo io.out_data := Mux(isW, 397a58e3351SLi Qianruo SignExt(res(31, 0), len), 398a58e3351SLi Qianruo res 399a58e3351SLi Qianruo ) 400a58e3351SLi Qianruo io.in_ready := state(s_idle) 40107e4f25bSXuan Hu io.out_valid := state(s_finish) 40207e4f25bSXuan Hu io.out_validNext := state(s_post_1) 403a58e3351SLi Qianruo} 404a58e3351SLi Qianruo 405a58e3351SLi Qianruoobject mLookUpTable2 { 406a58e3351SLi Qianruo // Usage : 407a58e3351SLi Qianruo // result := decoder(QMCMinimizer, index, mLookupTable.xxx) 408a58e3351SLi Qianruo val minus_m = Seq( 409e3da8badSTang Haojin Seq( // -m[-1] 410a58e3351SLi Qianruo 0.U -> "b00_11010".U(7.W), 411a58e3351SLi Qianruo 1.U -> "b00_11110".U(7.W), 412a58e3351SLi Qianruo 2.U -> "b01_00000".U(7.W), 413a58e3351SLi Qianruo 3.U -> "b01_00100".U(7.W), 414a58e3351SLi Qianruo 4.U -> "b01_00110".U(7.W), 415a58e3351SLi Qianruo 5.U -> "b01_01010".U(7.W), 416a58e3351SLi Qianruo 6.U -> "b01_01100".U(7.W), 417a58e3351SLi Qianruo 7.U -> "b01_10000".U(7.W) 418a58e3351SLi Qianruo ), 419e3da8badSTang Haojin Seq( // -m[0] 420a58e3351SLi Qianruo 0.U -> "b000_0100".U(7.W), 421a58e3351SLi Qianruo 1.U -> "b000_0110".U(7.W), 422a58e3351SLi Qianruo 2.U -> "b000_0110".U(7.W), 423a58e3351SLi Qianruo 3.U -> "b000_0110".U(7.W), 424a58e3351SLi Qianruo 4.U -> "b000_1000".U(7.W), 425a58e3351SLi Qianruo 5.U -> "b000_1000".U(7.W), 426a58e3351SLi Qianruo 6.U -> "b000_1000".U(7.W), 427a58e3351SLi Qianruo 7.U -> "b000_1000".U(7.W) 428a58e3351SLi Qianruo ), 429e3da8badSTang Haojin Seq( //-m[1] 430a58e3351SLi Qianruo 0.U -> "b111_1101".U(7.W), 431a58e3351SLi Qianruo 1.U -> "b111_1100".U(7.W), 432a58e3351SLi Qianruo 2.U -> "b111_1100".U(7.W), 433a58e3351SLi Qianruo 3.U -> "b111_1100".U(7.W), 434a58e3351SLi Qianruo 4.U -> "b111_1011".U(7.W), 435a58e3351SLi Qianruo 5.U -> "b111_1010".U(7.W), 436a58e3351SLi Qianruo 6.U -> "b111_1010".U(7.W), 437a58e3351SLi Qianruo 7.U -> "b111_1010".U(7.W) 438a58e3351SLi Qianruo ), 439e3da8badSTang Haojin Seq( //-m[2] 440a58e3351SLi Qianruo 0.U -> "b11_01000".U(7.W), 441a58e3351SLi Qianruo 1.U -> "b11_00100".U(7.W), 442a58e3351SLi Qianruo 2.U -> "b11_00010".U(7.W), 443a58e3351SLi Qianruo 3.U -> "b10_11110".U(7.W), 444a58e3351SLi Qianruo 4.U -> "b10_11100".U(7.W), 445a58e3351SLi Qianruo 5.U -> "b10_11000".U(7.W), 446a58e3351SLi Qianruo 6.U -> "b10_10110".U(7.W), 447a58e3351SLi Qianruo 7.U -> "b10_10010".U(7.W) 448a58e3351SLi Qianruo )) 449a58e3351SLi Qianruo} 450