xref: /XiangShan/src/main/scala/xiangshan/backend/fu/SRT16Divider.scala (revision c49ebec88f6e402aefec681225e3537e2c511430)
1a58e3351SLi Qianruo/***************************************************************************************
2e3da8badSTang Haojin* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3e3da8badSTang Haojin* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4a58e3351SLi Qianruo* Copyright (c) 2020-2021 Peng Cheng Laboratory
5a58e3351SLi Qianruo*
6a58e3351SLi Qianruo* XiangShan is licensed under Mulan PSL v2.
7a58e3351SLi Qianruo* You can use this software according to the terms and conditions of the Mulan PSL v2.
8a58e3351SLi Qianruo* You may obtain a copy of Mulan PSL v2 at:
9a58e3351SLi Qianruo*          http://license.coscl.org.cn/MulanPSL2
10a58e3351SLi Qianruo*
11a58e3351SLi Qianruo* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12a58e3351SLi Qianruo* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13a58e3351SLi Qianruo* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14a58e3351SLi Qianruo*
15a58e3351SLi Qianruo* See the Mulan PSL v2 for more details.
16*c49ebec8SHaoyuan Feng*
17*c49ebec8SHaoyuan Feng*
18*c49ebec8SHaoyuan Feng* Acknowledgement
19*c49ebec8SHaoyuan Feng*
20*c49ebec8SHaoyuan Feng* This implementation is inspired by several key papers:
21*c49ebec8SHaoyuan Feng* [1] Elisardo Antelo, Tomas Lang, Paolo Montuschi, and Alberto Nannarelli. "[Digit-recurrence dividers with reduced
22*c49ebec8SHaoyuan Feng* logical depth.](https://doi.org/10.1109/TC.2005.115)" IEEE Transactions on Computers 54.7: 837-851. 2005.
23a58e3351SLi Qianruo***************************************************************************************/
24a58e3351SLi Qianruo
25a58e3351SLi Qianruo// This file contains components originally written by Yifei He, see
26a58e3351SLi Qianruo// https://github.com/OpenXiangShan/XS-Verilog-Library/tree/main/int_div_radix_4_v1
27a58e3351SLi Qianruo// Email of original author: [email protected]
28a58e3351SLi Qianruo
29a58e3351SLi Qianruopackage xiangshan.backend.fu
30a58e3351SLi Qianruo
318891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters
32a58e3351SLi Qianruoimport chisel3._
33a58e3351SLi Qianruoimport chisel3.util._
341c62c387SYinan Xuimport utils._
353c02ee8fSwakafaimport utility._
361c62c387SYinan Xuimport xiangshan._
37a58e3351SLi Qianruoimport xiangshan.backend.fu.util.CSA3_2
38a58e3351SLi Qianruo
39a58e3351SLi Qianruoclass SRT16DividerDataModule(len: Int) extends Module {
40a58e3351SLi Qianruo  val io = IO(new Bundle() {
41a58e3351SLi Qianruo    val src = Vec(2, Input(UInt(len.W)))
42a58e3351SLi Qianruo    val valid, sign, kill_w, kill_r, isHi, isW = Input(Bool())
43a58e3351SLi Qianruo    val in_ready = Output(Bool())
44a58e3351SLi Qianruo    val out_valid = Output(Bool())
4507e4f25bSXuan Hu    val out_validNext = Output(Bool())
46a58e3351SLi Qianruo    val out_data = Output(UInt(len.W))
47a58e3351SLi Qianruo    val out_ready = Input(Bool())
48a58e3351SLi Qianruo  })
49a58e3351SLi Qianruo
50a58e3351SLi Qianruo  // consts
51a58e3351SLi Qianruo  val lzc_width = log2Up(len)
52a58e3351SLi Qianruo  val itn_len = 1 + len + 2 + 1
53a58e3351SLi Qianruo
54a58e3351SLi Qianruo  val (a, d, sign, valid, kill_w, kill_r, isHi, isW) =
55a58e3351SLi Qianruo    (io.src(0), io.src(1), io.sign, io.valid, io.kill_w, io.kill_r, io.isHi, io.isW)
56a58e3351SLi Qianruo  val in_fire = valid && io.in_ready
57a58e3351SLi Qianruo  val out_fire = io.out_ready && io.out_valid
58a58e3351SLi Qianruo  val newReq = in_fire
59a58e3351SLi Qianruo  val s_idle :: s_pre_0 :: s_pre_1 :: s_iter :: s_post_0 :: s_post_1 :: s_finish :: Nil = Enum(7)
60a58e3351SLi Qianruo  val quot_neg_2 :: quot_neg_1 :: quot_0 :: quot_pos_1 :: quot_pos_2 :: Nil = Enum(5)
61a58e3351SLi Qianruo
62a58e3351SLi Qianruo
6367ba96b4SYinan Xu  val state = RegInit((1 << s_idle.litValue.toInt).U(7.W))
64a58e3351SLi Qianruo
65a58e3351SLi Qianruo  // reused wires
66a58e3351SLi Qianruo//  val aNormAbs = Wire(UInt((len + 1).W)) // Inputs of xNormAbs regs below
67a58e3351SLi Qianruo//  val dNormAbs = Wire(UInt((len + 1).W))
68a58e3351SLi Qianruo  val quotIter = Wire(UInt(len.W))
69a58e3351SLi Qianruo  val quotM1Iter = Wire(UInt(len.W))
70a58e3351SLi Qianruo  val aLZC = Wire(UInt((lzc_width + 1).W))
71a58e3351SLi Qianruo  val dLZC = Wire(UInt((lzc_width + 1).W))
72a58e3351SLi Qianruo
73a58e3351SLi Qianruo  val rNext = Wire(UInt(itn_len.W))
74a58e3351SLi Qianruo  val rNextPd = Wire(UInt(itn_len.W))
75a58e3351SLi Qianruo
76a58e3351SLi Qianruo  val aInverter = Wire(UInt(len.W)) // results of global inverter
77a58e3351SLi Qianruo  val dInverter = Wire(UInt(len.W))
78a58e3351SLi Qianruo
79a58e3351SLi Qianruo  val finalIter = Wire(Bool())
80a58e3351SLi Qianruo  val special = Wire(Bool())
81a58e3351SLi Qianruo
82a58e3351SLi Qianruo  // reused regs
83a58e3351SLi Qianruo//  val aNormAbsReg = RegEnable(aNormAbs, newReq | state(s_pre_0) | state(s_post_0)) // reg for normalized a & d and rem & rem+d
84a58e3351SLi Qianruo//  val dNormAbsReg = RegEnable(dNormAbs, newReq | state(s_pre_0) | state(s_post_0))
85a58e3351SLi Qianruo  val quotIterReg = RegEnable(quotIter, state(s_pre_1) | state(s_iter) | state(s_post_0))
86a58e3351SLi Qianruo  val quotM1IterReg = RegEnable(quotM1Iter, state(s_pre_1) | state(s_iter) | state(s_post_0))
87a58e3351SLi Qianruo  val specialReg = RegEnable(special, state(s_pre_1))
88a58e3351SLi Qianruo  val aReg = RegEnable(a, in_fire)
89a58e3351SLi Qianruo
90a58e3351SLi Qianruo  when(kill_r) {
91a58e3351SLi Qianruo    state := UIntToOH(s_idle, 7)
92a58e3351SLi Qianruo  } .elsewhen(state(s_idle) && in_fire && !kill_w) {
93a58e3351SLi Qianruo    state := UIntToOH(s_pre_0, 7)
94a58e3351SLi Qianruo  } .elsewhen(state(s_pre_0)) { // leading zero detection
95a58e3351SLi Qianruo    state := UIntToOH(s_pre_1, 7)
96a58e3351SLi Qianruo  } .elsewhen(state(s_pre_1)) { // shift a/b
97a58e3351SLi Qianruo    state := Mux(special, UIntToOH(s_post_1, 7), UIntToOH(s_iter, 7))
98a58e3351SLi Qianruo  } .elsewhen(state(s_iter)) { // (ws[j+1], wc[j+1]) = 4(ws[j],wc[j]) - q(j+1)*d
99a58e3351SLi Qianruo    state := Mux(finalIter, UIntToOH(s_post_0, 7), UIntToOH(s_iter, 7))
100a58e3351SLi Qianruo  } .elsewhen(state(s_post_0)) { // if rem < 0, rem = rem + d
101a58e3351SLi Qianruo    state := UIntToOH(s_post_1, 7)
10207e4f25bSXuan Hu  } .elsewhen(state(s_post_1)) {
103a58e3351SLi Qianruo    state := UIntToOH(s_finish, 7)
10407e4f25bSXuan Hu  } .elsewhen(state(s_finish) && io.out_ready) {
105a58e3351SLi Qianruo    state := UIntToOH(s_idle, 7)
106a58e3351SLi Qianruo  } .otherwise {
107a58e3351SLi Qianruo    state := state
108a58e3351SLi Qianruo  }
109a58e3351SLi Qianruo
1109d9b0bfaSjunxiong-ji  // io.in_ready := state(s_idle)
111a58e3351SLi Qianruo  aInverter := -Mux(state(s_idle), a, quotIterReg) // 64, 0
112a58e3351SLi Qianruo  dInverter := -Mux(state(s_idle), d, quotM1IterReg) // 64, 0
113a58e3351SLi Qianruo
114a58e3351SLi Qianruo  val aSign = io.sign && a(len - 1) // 1
115a58e3351SLi Qianruo  val dSign = io.sign && d(len - 1)
116f7e0356aSLi Qianruo  val dSignReg = RegEnable(dSign, newReq)
117a58e3351SLi Qianruo
118a58e3351SLi Qianruo  val aAbs = Mux(aSign, aInverter, a) // 64, 0
119a58e3351SLi Qianruo  val dAbs = Mux(dSign, dInverter, d)
120a58e3351SLi Qianruo  val aAbsReg = RegEnable(aAbs, newReq)
121a58e3351SLi Qianruo  val dAbsReg = RegEnable(dAbs, newReq)
122a58e3351SLi Qianruo
123a58e3351SLi Qianruo  val aNorm = (aAbsReg(len - 1, 0) << aLZC(lzc_width - 1, 0))(len - 1, 0) // 64, 65
124a58e3351SLi Qianruo  val dNorm = (dAbsReg(len - 1, 0) << dLZC(lzc_width - 1, 0))(len - 1, 0)
125a58e3351SLi Qianruo
126a58e3351SLi Qianruo  val aNormReg = RegEnable(aNorm, state(s_pre_0))
127a58e3351SLi Qianruo  val dNormReg = RegEnable(dNorm, state(s_pre_0))
128a58e3351SLi Qianruo
129a58e3351SLi Qianruo//  aNormAbs := Mux1H(Seq(
130a58e3351SLi Qianruo//    state(s_idle) -> Cat(0.U(1.W), aAbs), // 65, 0
131a58e3351SLi Qianruo//    state(s_pre_0) -> Cat(0.U(1.W), aNorm), // 65, 0
132a58e3351SLi Qianruo//    state(s_post_0) -> rNext(len + 3, 3) // remainder 65, 64. highest is sign bit
133a58e3351SLi Qianruo//  ))
134a58e3351SLi Qianruo//  dNormAbs := Mux1H(Seq(
135a58e3351SLi Qianruo//    state(s_idle) -> Cat(0.U(1.W), dAbs),
136a58e3351SLi Qianruo//    state(s_pre_0) -> Cat(0.U(1.W), dNorm),
137a58e3351SLi Qianruo//    state(s_post_0) -> rNextPd(len + 3, 3)
138a58e3351SLi Qianruo//    ))
139a58e3351SLi Qianruo
140a58e3351SLi Qianruo  // Second cycle, state is pre_0
141a58e3351SLi Qianruo  // calculate lzc and move div* and lzc diff check if no_iter_needed
142a58e3351SLi Qianruo
143935edac4STang Haojin  aLZC := PriorityEncoder(aAbsReg(len - 1, 0).asBools.reverse)
144935edac4STang Haojin  dLZC := PriorityEncoder(dAbsReg(len - 1, 0).asBools.reverse)
145a58e3351SLi Qianruo  val aLZCReg = RegEnable(aLZC, state(s_pre_0)) // 7, 0
146a58e3351SLi Qianruo  val dLZCReg = RegEnable(dLZC, state(s_pre_0))
147a58e3351SLi Qianruo
148a58e3351SLi Qianruo  val lzcWireDiff = Cat(0.U(1.W), dLZC(lzc_width - 1, 0)) - Cat(0.U(1.W), aLZC(lzc_width - 1, 0)) // 7, 0
149a58e3351SLi Qianruo  val lzcRegDiff = Cat(0.U(1.W), dLZCReg(lzc_width - 1, 0)) - Cat(0.U(1.W), aLZCReg(lzc_width - 1, 0))
150a58e3351SLi Qianruo//  val lzcDiff = Mux(state(s_pre_0), lzcWireDiff, lzcRegDiff)
151a58e3351SLi Qianruo
152a58e3351SLi Qianruo  // special case:
153a58e3351SLi Qianruo  // divisor is 1 or -1; dividend has less bits than divisor; divisor is zero
154a58e3351SLi Qianruo  // s_pre_0:
155935edac4STang Haojin  val dIsOne = dLZC(lzc_width - 1, 0).andR
156935edac4STang Haojin  val dIsZero = ~dNormReg.orR
157a58e3351SLi Qianruo  val aIsZero = RegEnable(aLZC(lzc_width), state(s_pre_0))
158a58e3351SLi Qianruo  val aTooSmall = RegEnable(aLZC(lzc_width) | lzcWireDiff(lzc_width), state(s_pre_0))
159a58e3351SLi Qianruo  special := dIsOne | dIsZero | aTooSmall
160a58e3351SLi Qianruo
161a58e3351SLi Qianruo  val quotSpecial = Mux(dIsZero, VecInit(Seq.fill(len)(true.B)).asUInt,
162a58e3351SLi Qianruo                            Mux(aTooSmall, 0.U,
1637eabd47cSLi Qianruo                              Mux(dSignReg, -aReg, aReg) //  signed 2^(len-1)
164a58e3351SLi Qianruo                            ))
1657eabd47cSLi Qianruo  val remSpecial = Mux(dIsZero || aTooSmall, aReg, 0.U)
166a58e3351SLi Qianruo  val quotSpecialReg = RegEnable(quotSpecial, state(s_pre_1))
167a58e3351SLi Qianruo  val remSpecialReg = RegEnable(remSpecial, state(s_pre_1))
168a58e3351SLi Qianruo
169a58e3351SLi Qianruo  // s_pre_1
170a58e3351SLi Qianruo  val quotSign = Mux(state(s_idle), aSign ^ dSign, true.B) // if not s_idle then must be s_pre_1 & dIsZero, and that we have
171a58e3351SLi Qianruo  val rSign = aSign
172a58e3351SLi Qianruo  val quotSignReg = RegEnable(quotSign, in_fire | (state(s_pre_1) & dIsZero))
173a58e3351SLi Qianruo  val rSignReg = RegEnable(rSign, in_fire)
174a58e3351SLi Qianruo
175a58e3351SLi Qianruo  val rShift = lzcRegDiff(0)
176a58e3351SLi Qianruo  val oddIter = lzcRegDiff(1) ^ lzcRegDiff(0)
177a58e3351SLi Qianruo  val iterNum = Wire(UInt((lzc_width - 2).W))
178a58e3351SLi Qianruo  val iterNumReg = RegEnable(iterNum, state(s_pre_1) | state(s_iter))
179a58e3351SLi Qianruo  iterNum := Mux(state(s_pre_1), (lzcRegDiff + 1.U) >> 2, iterNumReg -% 1.U)
180a58e3351SLi Qianruo  finalIter := iterNumReg === 0.U
181a58e3351SLi Qianruo
182a58e3351SLi Qianruo  val rSumInit = Cat(0.U(3.W), Mux(rShift, Cat(0.U(1.W), aNormReg), Cat(aNormReg, 0.U(1.W)))) //(1, 67), 0.001xxx
183a58e3351SLi Qianruo  val rCarryInit = 0.U(itn_len.W)
184a58e3351SLi Qianruo
185a58e3351SLi Qianruo  val rSumInitTrunc = Cat(0.U(1.W), rSumInit(itn_len - 4, itn_len - 4 - 4 + 1)) // 0.00___
18645f43e6eSTang Haojin  val mInitPos1 = MuxLookup(dNormReg(len-2, len-4), "b00100".U(5.W))(
187e3da8badSTang Haojin    Seq(
188a58e3351SLi Qianruo      0.U -> "b00100".U(5.W),
189a58e3351SLi Qianruo      1.U -> "b00100".U(5.W),
190a58e3351SLi Qianruo      2.U -> "b00100".U(5.W),
191a58e3351SLi Qianruo      3.U -> "b00110".U(5.W),
192a58e3351SLi Qianruo      4.U -> "b00110".U(5.W),
193a58e3351SLi Qianruo      5.U -> "b00110".U(5.W),
194a58e3351SLi Qianruo      6.U -> "b00110".U(5.W),
195a58e3351SLi Qianruo      7.U -> "b01000".U(5.W),
196a58e3351SLi Qianruo    )
197a58e3351SLi Qianruo  )
19845f43e6eSTang Haojin  val mInitPos2 = MuxLookup(dNormReg(len-2, len-4), "b01100".U(5.W))(
199e3da8badSTang Haojin    Seq(
200a58e3351SLi Qianruo      0.U -> "b01100".U(5.W),
201a58e3351SLi Qianruo      1.U -> "b01110".U(5.W),
202a58e3351SLi Qianruo      2.U -> "b01111".U(5.W),
203a58e3351SLi Qianruo      3.U -> "b10000".U(5.W),
204a58e3351SLi Qianruo      4.U -> "b10010".U(5.W),
205a58e3351SLi Qianruo      5.U -> "b10100".U(5.W),
206a58e3351SLi Qianruo      6.U -> "b10110".U(5.W),
207a58e3351SLi Qianruo      7.U -> "b10110".U(5.W),
208a58e3351SLi Qianruo    )
209a58e3351SLi Qianruo  )
210a58e3351SLi Qianruo  val initCmpPos1 = rSumInitTrunc >= mInitPos1
211a58e3351SLi Qianruo  val initCmpPos2 = rSumInitTrunc >= mInitPos2
212a58e3351SLi Qianruo  val qInit = Mux(initCmpPos2, UIntToOH(quot_pos_2, 5), Mux(initCmpPos1, UIntToOH(quot_pos_1, 5), UIntToOH(quot_0, 5)))
213a58e3351SLi Qianruo
214a58e3351SLi Qianruo  // in pre_1 we also obtain m_i + 16u * d for all u
215a58e3351SLi Qianruo  // udNeg -> (rud, r2ud) -> (rudPmNeg, r2udPmNeg)
216a58e3351SLi Qianruo  val dPos = Cat(0.U(1.W), dNormReg)                          // +d, 0.1xxx, (1, 64)
217a58e3351SLi Qianruo  val dNeg = -Cat(0.U(1.W), dNormReg) // -d, 1.xxxx, (1, 64)
218a58e3351SLi Qianruo  // val m = Wire(Vec(4, UInt(7.W)))     // we have to sigext them to calculate rqd-m_k
219a58e3351SLi Qianruo
220a58e3351SLi Qianruo  // index 0 is for q=-2 and 4 is for q=2!!!
221a58e3351SLi Qianruo  val mNeg = Wire(Vec(4, UInt(12.W))) // selected m, extended to (6, 6) bits
222a58e3351SLi Qianruo  val rudNeg = Wire(Vec(5, UInt(10.W))) // (4, 6)
223a58e3351SLi Qianruo  val r2udNeg = Wire(Vec(5, UInt(12.W))) // (6, 6)
224a58e3351SLi Qianruo
225a58e3351SLi Qianruo  // Selection Block with improved timing
226a58e3351SLi Qianruo  val rudPmNeg = Wire(Vec(5, Vec(4, UInt(10.W)))) // -(r*u*d+m_k), (5, 5) bits
227a58e3351SLi Qianruo  val r2ws = Wire(UInt(10.W)) // r^2*ws (5, 5) bits
228a58e3351SLi Qianruo  val r2wc = Wire(UInt(10.W))
229a58e3351SLi Qianruo  // calculating exact values of w
230a58e3351SLi Qianruo  val udNeg = Wire(Vec(5, UInt(itn_len.W))) // (3, 65), 1 signExt'ed Bit
231a58e3351SLi Qianruo  // val r3udNeg = Wire(Vec(5, UInt(13.W)))
232a58e3351SLi Qianruo
233a58e3351SLi Qianruo  // Speculative Block
234a58e3351SLi Qianruo  val r2udPmNeg = Wire(Vec(5, Vec(4, UInt(13.W)))) // -(r^2*d*d+m_k), (7, 6) bits. 1st index for q 2nd for m
235a58e3351SLi Qianruo  val r3ws = Wire(UInt(13.W)) // r^3*ws, (7, 6) bits
236a58e3351SLi Qianruo  val r3wc = Wire(UInt(13.W))
237a58e3351SLi Qianruo  val qSpec = Wire(Vec(5, UInt(5.W))) // 5 speculative results of qNext2
238a58e3351SLi Qianruo  // output wires
239a58e3351SLi Qianruo  val qNext = Wire(UInt(5.W))
240a58e3351SLi Qianruo  val qNext2 = Wire(UInt(5.W))
241a58e3351SLi Qianruo  val rCarryIter = Wire(UInt(itn_len.W)) // (1, 67)
242a58e3351SLi Qianruo  val rSumIter = Wire(UInt(itn_len.W))
243a58e3351SLi Qianruo  // val r3wsIter = Wire(UInt(13.W))
244a58e3351SLi Qianruo  // val r3wcIter = Wire(UInt(13.W))
245a58e3351SLi Qianruo  // Input Regs of whole Spec + Sel + sum adder block
246a58e3351SLi Qianruo  val qPrevReg = RegEnable(Mux(state(s_pre_1), qInit, qNext2), state(s_pre_1) | state(s_iter))
247a58e3351SLi Qianruo  val rSumReg = RegEnable(Mux(state(s_pre_1), rSumInit, rSumIter), state(s_pre_1) | state(s_iter)) // (1, 67)
248a58e3351SLi Qianruo  val rCarryReg = RegEnable(Mux(state(s_pre_1), rCarryInit, rCarryIter), state(s_pre_1) | state(s_iter))
249a58e3351SLi Qianruo
250a58e3351SLi Qianruo  // Give values to the regs and wires above...
251a58e3351SLi Qianruo  val dForLookup = dPos(len-2, len-4)
25245f43e6eSTang Haojin  mNeg := VecInit(Cat(SignExt(MuxLookup(dNormReg(len-2, len-4), "b00000000".U(7.W))(mLookUpTable2.minus_m(0)), 11), 0.U(1.W)), // (2, 5) -> (6, 6)
25345f43e6eSTang Haojin                  Cat(SignExt(MuxLookup(dNormReg(len-2, len-4), "b00000000".U(7.W))(mLookUpTable2.minus_m(1)), 10) ,0.U(2.W)), // (3, 4) -> (6, 6)
25445f43e6eSTang Haojin                  Cat(SignExt(MuxLookup(dNormReg(len-2, len-4), "b00000000".U(7.W))(mLookUpTable2.minus_m(2)), 10) ,0.U(2.W)),
25545f43e6eSTang Haojin                  Cat(SignExt(MuxLookup(dNormReg(len-2, len-4), "b00000000".U(7.W))(mLookUpTable2.minus_m(3)), 11) ,0.U(1.W))
256a58e3351SLi Qianruo  )
257a58e3351SLi Qianruo  udNeg := VecInit( Cat(SignExt(dPos, 66), 0.U(2.W)),
258a58e3351SLi Qianruo                    Cat(SignExt(dPos, 67), 0.U(1.W)),
259a58e3351SLi Qianruo                    0.U,
260a58e3351SLi Qianruo                    Cat(SignExt(dNeg, 67), 0.U(1.W)),
261a58e3351SLi Qianruo                    Cat(SignExt(dNeg, 66), 0.U(2.W))
262a58e3351SLi Qianruo  )
263a58e3351SLi Qianruo
264a58e3351SLi Qianruo  rudNeg := VecInit(Seq.tabulate(5){i => udNeg(i)(itn_len-2, itn_len-11)})
265a58e3351SLi Qianruo  r2udNeg := VecInit(Seq.tabulate(5){i => udNeg(i)(itn_len-2, itn_len-13)})
266a58e3351SLi Qianruo  // r3udNeg := VecInit(Seq.tabulate(5){i => udNeg(i)(itn_len-2, itn_len-13)})
267a58e3351SLi Qianruo  rudPmNeg := VecInit(Seq.tabulate(5){i => VecInit(Seq.tabulate(4){ j => SignExt(rudNeg(i)(9, 1), 10) + mNeg(j)(10, 1)})})
268a58e3351SLi Qianruo  r2udPmNeg := VecInit(Seq.tabulate(5){i => VecInit(Seq.tabulate(4){ j => SignExt(r2udNeg(i), 13) + SignExt(mNeg(j), 13)})})
269a58e3351SLi Qianruo  r3ws := rSumReg(itn_len-1, itn_len-13)
270a58e3351SLi Qianruo  r3wc := rCarryReg(itn_len-1, itn_len-13)
271a58e3351SLi Qianruo
272a58e3351SLi Qianruo  r2ws := rSumReg(itn_len-1, itn_len-10)
273a58e3351SLi Qianruo  r2wc := rCarryReg(itn_len-1, itn_len-10)
274a58e3351SLi Qianruo
275a58e3351SLi Qianruo  val udNegReg = RegEnable(udNeg, state(s_pre_1))
276a58e3351SLi Qianruo//  val rudNegReg = RegEnable(rudNeg, state(s_pre_1))
277a58e3351SLi Qianruo  val rudPmNegReg = RegEnable(rudPmNeg, state(s_pre_1))
278a58e3351SLi Qianruo  val r2udPmNegReg = RegEnable(r2udPmNeg, state(s_pre_1))
279a58e3351SLi Qianruo
280a58e3351SLi Qianruo  def DetectSign(signs: UInt, name: String): UInt = {
281a58e3351SLi Qianruo    val qVec = Wire(Vec(5, Bool())).suggestName(name)
282a58e3351SLi Qianruo    qVec(quot_neg_2) := signs(0) && signs(1) && signs(2)
283a58e3351SLi Qianruo    qVec(quot_neg_1) := ~signs(0) && signs(1) && signs(2)
284a58e3351SLi Qianruo    qVec(quot_0) := signs(2) && ~signs(1)
285a58e3351SLi Qianruo    qVec(quot_pos_1) := signs(3) && ~signs(2) && ~signs(1)
286a58e3351SLi Qianruo    qVec(quot_pos_2) := ~signs(3) && ~signs(2) && ~signs(1)
287a58e3351SLi Qianruo    qVec.asUInt
288a58e3351SLi Qianruo  }
289a58e3351SLi Qianruo  // Selection block
290a58e3351SLi Qianruo  val signs = VecInit(Seq.tabulate(4){ i => {
291a58e3351SLi Qianruo    val csa = Module(new CSA3_2(10)).suggestName(s"csa_sel_${i}")
292a58e3351SLi Qianruo    csa.io.in(0) := r2ws
293a58e3351SLi Qianruo    csa.io.in(1) := r2wc
294a58e3351SLi Qianruo    csa.io.in(2) := Mux1H(qPrevReg, rudPmNegReg.toSeq)(i) // rudPmNeg(OHToUInt(qPrevReg))(i)
295a58e3351SLi Qianruo
296a58e3351SLi Qianruo      (csa.io.out(0) + (csa.io.out(1)(8, 0) << 1))(9)
297a58e3351SLi Qianruo    }})
298a58e3351SLi Qianruo  qNext := DetectSign(signs.asUInt, s"sel_q")
299a58e3351SLi Qianruo  val csaWide1 = Module(new CSA3_2(itn_len)).suggestName("csa_sel_wide_1")
300a58e3351SLi Qianruo  val csaWide2 = Module(new CSA3_2(itn_len)).suggestName("csa_sel_wide_2")
301a58e3351SLi Qianruo  csaWide1.io.in(0) := rSumReg << 2
302a58e3351SLi Qianruo  csaWide1.io.in(1) := rCarryReg << 2
303a58e3351SLi Qianruo  csaWide1.io.in(2) := Mux1H(qPrevReg, udNegReg.toSeq) << 2//udNeg(OHToUInt(qPrevReg)) << 2
304a58e3351SLi Qianruo  csaWide2.io.in(0) := csaWide1.io.out(0) << 2
305a58e3351SLi Qianruo  csaWide2.io.in(1) := (csaWide1.io.out(1) << 1)(itn_len-1, 0) << 2
306a58e3351SLi Qianruo  csaWide2.io.in(2) := Mux1H(qNext, udNegReg.toSeq) << 2 // udNeg(OHToUInt(qNext)) << 2
307a58e3351SLi Qianruo  rSumIter := Mux(~oddIter & finalIter, csaWide1.io.out(0), csaWide2.io.out(0))
308a58e3351SLi Qianruo  rCarryIter := Mux(~oddIter & finalIter, (csaWide1.io.out(1) << 1)(itn_len-1, 0), (csaWide2.io.out(1) << 1)(itn_len-1, 0))
309a58e3351SLi Qianruo  // r3wsIter := r3udNeg(OHToUInt(qNext))
310a58e3351SLi Qianruo  // r3wcIter := (csaWide1.io.out(0)(itn_len-3, itn_len-16) + (csaWide1.io.out(1) << 1)(itn_len-3, itn_len-16))(13,1)
311a58e3351SLi Qianruo  // Speculative block
312a58e3351SLi Qianruo  qSpec := VecInit(Seq.tabulate(5){ q_spec => {
313a58e3351SLi Qianruo      val csa1 = Module(new CSA3_2(13)).suggestName(s"csa_spec_${q_spec}")
314a58e3351SLi Qianruo      csa1.io.in(0) := r3ws
315a58e3351SLi Qianruo      csa1.io.in(1) := r3wc
316a58e3351SLi Qianruo      csa1.io.in(2) := SignExt(udNegReg(q_spec)(itn_len-2, itn_len-11), 13) // (4, 6) -> (7, 6)
317a58e3351SLi Qianruo      val signs2 = VecInit(Seq.tabulate(4){ i => {
318a58e3351SLi Qianruo        val csa2 = Module(new CSA3_2(13)).suggestName(s"csa_spec_${q_spec}_${i}")
319a58e3351SLi Qianruo        csa2.io.in(0) := csa1.io.out(0)
320a58e3351SLi Qianruo        csa2.io.in(1) := (csa1.io.out(1) << 1)(12, 0)
321a58e3351SLi Qianruo        csa2.io.in(2) := Mux1H(qPrevReg, r2udPmNegReg.toSeq)(i) // r2udPmNeg(OHToUInt(qPrevReg))(i)
322a58e3351SLi Qianruo        (csa2.io.out(0) + (csa2.io.out(1)(11, 0) << 1))(12)
323a58e3351SLi Qianruo      }})
324a58e3351SLi Qianruo      val qVec2 = DetectSign(signs2.asUInt, s"spec_q_${q_spec}")
325a58e3351SLi Qianruo      qVec2
326a58e3351SLi Qianruo  }})
327a58e3351SLi Qianruo  // qNext2 := qSpec(OHToUInt(qNext)) // TODO: Use Mux1H!!
328a58e3351SLi Qianruo
329a58e3351SLi Qianruo  qNext2 := Mux1H(qNext, qSpec.toSeq)
330a58e3351SLi Qianruo
331a58e3351SLi Qianruo  // on the fly quotient conversion
332a58e3351SLi Qianruo  val quotHalfIter = Wire(UInt(64.W))
333a58e3351SLi Qianruo  val quotM1HalfIter = Wire(UInt(64.W))
334a58e3351SLi Qianruo  val quotIterNext = Wire(UInt(64.W))
335a58e3351SLi Qianruo  val quotM1IterNext = Wire(UInt(64.W))
336a58e3351SLi Qianruo  def OTFC(q: UInt, quot: UInt, quotM1: UInt): (UInt, UInt) = {
337a58e3351SLi Qianruo    val quotNext = Mux1H(Seq(
338a58e3351SLi Qianruo    q(quot_pos_2) -> (quot << 2 | "b10".U),
339a58e3351SLi Qianruo    q(quot_pos_1) -> (quot << 2 | "b01".U),
340a58e3351SLi Qianruo    q(quot_0)     -> (quot << 2 | "b00".U),
341a58e3351SLi Qianruo    q(quot_neg_1) -> (quotM1 << 2 | "b11".U),
342a58e3351SLi Qianruo    q(quot_neg_2) -> (quotM1 << 2 | "b10".U)
343a58e3351SLi Qianruo    ))
344a58e3351SLi Qianruo    val quotM1Next = Mux1H(Seq(
345a58e3351SLi Qianruo    q(quot_pos_2) -> (quot << 2 | "b01".U),
346a58e3351SLi Qianruo    q(quot_pos_1) -> (quot << 2 | "b00".U),
347a58e3351SLi Qianruo    q(quot_0)     -> (quotM1 << 2 | "b11".U),
348a58e3351SLi Qianruo    q(quot_neg_1) -> (quotM1 << 2 | "b10".U),
349a58e3351SLi Qianruo    q(quot_neg_2) -> (quotM1 << 2 | "b01".U)
350a58e3351SLi Qianruo    ))
351a58e3351SLi Qianruo    (quotNext(len-1, 0), quotM1Next(len-1, 0))
352a58e3351SLi Qianruo  }
353a58e3351SLi Qianruo  quotHalfIter := OTFC(qPrevReg, quotIterReg, quotM1IterReg)._1
354a58e3351SLi Qianruo  quotM1HalfIter := OTFC(qPrevReg, quotIterReg, quotM1IterReg)._2
355a58e3351SLi Qianruo  quotIterNext := Mux(~oddIter && finalIter, quotHalfIter, OTFC(qNext, quotHalfIter, quotM1HalfIter)._1)
356a58e3351SLi Qianruo  quotM1IterNext := Mux(~oddIter && finalIter, quotM1HalfIter, OTFC(qNext, quotHalfIter, quotM1HalfIter)._2)
357a58e3351SLi Qianruo  // quotIter := Mux(state(s_pre_1),  0.U(len.W),
358a58e3351SLi Qianruo  //                     Mux(state(s_iter), quotIterNext,
359a58e3351SLi Qianruo  //                       Mux(quotSignReg, aInverter, quotIterReg)))
360a58e3351SLi Qianruo  // quotM1Iter := Mux(state(s_pre_1),
361a58e3351SLi Qianruo  //                       0.U(len.W), Mux(state(s_iter), quotM1IterNext,
362a58e3351SLi Qianruo  //                         Mux(quotSignReg, dInverter, quotM1IterReg)))
363a58e3351SLi Qianruo
364a58e3351SLi Qianruo  quotIter := Mux(state(s_iter), quotIterNext,
365a58e3351SLi Qianruo                    Mux(state(s_pre_1), 0.U(len.W),
366a58e3351SLi Qianruo                      Mux(quotSignReg, aInverter, quotIterReg)))
367a58e3351SLi Qianruo  quotM1Iter := Mux(state(s_iter), quotM1IterNext,
368a58e3351SLi Qianruo                      Mux(state(s_pre_1), 0.U(len.W),
369a58e3351SLi Qianruo                        Mux(quotSignReg, dInverter, quotM1IterReg)))
370a58e3351SLi Qianruo  // finally, to the recovery stages!
371a58e3351SLi Qianruo
372a58e3351SLi Qianruo  when(rSignReg) {
373a58e3351SLi Qianruo    rNext := ~rSumReg + ~rCarryReg + 2.U
374a58e3351SLi Qianruo    rNextPd := ~rSumReg + ~rCarryReg + ~Cat(0.U(1.W), dNormReg, 0.U(3.W)) + 3.U
375a58e3351SLi Qianruo  } .otherwise {
376a58e3351SLi Qianruo    rNext := rSumReg + rCarryReg
377a58e3351SLi Qianruo    rNextPd := rSumReg + rCarryReg + Cat(0.U(1.W), dNormReg, 0.U(3.W))
378a58e3351SLi Qianruo  }
379a58e3351SLi Qianruo  val rNextReg = RegEnable(rNext(len + 3, 3), state(s_post_0))
380a58e3351SLi Qianruo  val rNextPdReg = RegEnable(rNextPd(len + 3, 3), state(s_post_0))
381a58e3351SLi Qianruo  dontTouch(rNextReg)
382a58e3351SLi Qianruo  // post_1
383a58e3351SLi Qianruo  val r = rNextReg
384a58e3351SLi Qianruo  val rPd = rNextPdReg
385935edac4STang Haojin  val rIsZero = ~(r.orR)
386935edac4STang Haojin  val needCorr = Mux(rSignReg, ~r(len) & r.orR, r(len)) // when we get pos rem for a<0 or neg rem for a>0
387a58e3351SLi Qianruo  val rPreShifted = Mux(needCorr, rPd, r)
388a58e3351SLi Qianruo  val rightShifter = Module(new RightShifter(len, lzc_width))
389a58e3351SLi Qianruo  rightShifter.io.in := rPreShifted
390a58e3351SLi Qianruo  rightShifter.io.shiftNum := dLZCReg
391935edac4STang Haojin  rightShifter.io.msb := Mux(~(rPreShifted.orR), 0.U, rSignReg)
392a58e3351SLi Qianruo  val rShifted = rightShifter.io.out
393a58e3351SLi Qianruo  val rFinal = RegEnable(Mux(specialReg, remSpecialReg, rShifted), state(s_post_1))// right shifted remainder. shift by the number of bits divisor is shifted
394a58e3351SLi Qianruo  val qFinal = RegEnable(Mux(specialReg, quotSpecialReg, Mux(needCorr, quotM1IterReg, quotIterReg)), state(s_post_1))
395a58e3351SLi Qianruo  val res = Mux(isHi, rFinal, qFinal)
396a58e3351SLi Qianruo  io.out_data := Mux(isW,
397a58e3351SLi Qianruo    SignExt(res(31, 0), len),
398a58e3351SLi Qianruo    res
399a58e3351SLi Qianruo  )
400a58e3351SLi Qianruo  io.in_ready := state(s_idle)
40107e4f25bSXuan Hu  io.out_valid := state(s_finish)
40207e4f25bSXuan Hu  io.out_validNext := state(s_post_1)
403a58e3351SLi Qianruo}
404a58e3351SLi Qianruo
405a58e3351SLi Qianruoobject mLookUpTable2 {
406a58e3351SLi Qianruo  // Usage :
407a58e3351SLi Qianruo  // result := decoder(QMCMinimizer, index, mLookupTable.xxx)
408a58e3351SLi Qianruo  val minus_m = Seq(
409e3da8badSTang Haojin    Seq( // -m[-1]
410a58e3351SLi Qianruo      0.U -> "b00_11010".U(7.W),
411a58e3351SLi Qianruo      1.U -> "b00_11110".U(7.W),
412a58e3351SLi Qianruo      2.U -> "b01_00000".U(7.W),
413a58e3351SLi Qianruo      3.U -> "b01_00100".U(7.W),
414a58e3351SLi Qianruo      4.U -> "b01_00110".U(7.W),
415a58e3351SLi Qianruo      5.U -> "b01_01010".U(7.W),
416a58e3351SLi Qianruo      6.U -> "b01_01100".U(7.W),
417a58e3351SLi Qianruo      7.U -> "b01_10000".U(7.W)
418a58e3351SLi Qianruo    ),
419e3da8badSTang Haojin    Seq( // -m[0]
420a58e3351SLi Qianruo      0.U -> "b000_0100".U(7.W),
421a58e3351SLi Qianruo      1.U -> "b000_0110".U(7.W),
422a58e3351SLi Qianruo      2.U -> "b000_0110".U(7.W),
423a58e3351SLi Qianruo      3.U -> "b000_0110".U(7.W),
424a58e3351SLi Qianruo      4.U -> "b000_1000".U(7.W),
425a58e3351SLi Qianruo      5.U -> "b000_1000".U(7.W),
426a58e3351SLi Qianruo      6.U -> "b000_1000".U(7.W),
427a58e3351SLi Qianruo      7.U -> "b000_1000".U(7.W)
428a58e3351SLi Qianruo    ),
429e3da8badSTang Haojin    Seq( //-m[1]
430a58e3351SLi Qianruo      0.U -> "b111_1101".U(7.W),
431a58e3351SLi Qianruo      1.U -> "b111_1100".U(7.W),
432a58e3351SLi Qianruo      2.U -> "b111_1100".U(7.W),
433a58e3351SLi Qianruo      3.U -> "b111_1100".U(7.W),
434a58e3351SLi Qianruo      4.U -> "b111_1011".U(7.W),
435a58e3351SLi Qianruo      5.U -> "b111_1010".U(7.W),
436a58e3351SLi Qianruo      6.U -> "b111_1010".U(7.W),
437a58e3351SLi Qianruo      7.U -> "b111_1010".U(7.W)
438a58e3351SLi Qianruo    ),
439e3da8badSTang Haojin    Seq( //-m[2]
440a58e3351SLi Qianruo      0.U -> "b11_01000".U(7.W),
441a58e3351SLi Qianruo      1.U -> "b11_00100".U(7.W),
442a58e3351SLi Qianruo      2.U -> "b11_00010".U(7.W),
443a58e3351SLi Qianruo      3.U -> "b10_11110".U(7.W),
444a58e3351SLi Qianruo      4.U -> "b10_11100".U(7.W),
445a58e3351SLi Qianruo      5.U -> "b10_11000".U(7.W),
446a58e3351SLi Qianruo      6.U -> "b10_10110".U(7.W),
447a58e3351SLi Qianruo      7.U -> "b10_10010".U(7.W)
448a58e3351SLi Qianruo    ))
449a58e3351SLi Qianruo}
450