xref: /XiangShan/src/main/scala/xiangshan/backend/VecExcpDataMergeModule.scala (revision 36c15e45ac725cd4c1e00ba022d3de24e9c10506)
1package xiangshan.backend
2
3import chisel3.util._
4import chisel3._
5import org.chipsalliance.cde.config.Parameters
6import utility._
7import xiangshan._
8import xiangshan.backend.fu.vector.Bundles._
9
10class VecExcpDataMergeModule(implicit p: Parameters) extends XSModule {
11  private val MaxLMUL = 8
12  private val VdIdxInGroupWidth = log2Ceil(MaxLMUL) // hold 0~7
13  private val minElemLen = 8 // 8 bits
14  private val maxElemNumPerVreg = VLEN / minElemLen
15  private val tailZeroBit = log2Ceil(maxElemNumPerVreg) // 16 -> 4
16
17  val i = IO(Input(new Bundle {
18    val fromExceptionGen = ValidIO(new VecExcpInfo)
19    val fromRab = new RabToVecExcpMod
20    val fromRat = new RatToVecExcpMod
21    val fromVprf = new VprfToExcpMod(maxMergeNumPerCycle * 2)
22  }))
23  val o = IO(Output(new Bundle {
24    val toVPRF = new ExcpModToVprf(maxMergeNumPerCycle * 2, maxMergeNumPerCycle)
25    val status = new Bundle {
26      val busy = Bool()
27    }
28  }))
29
30  private val oldPregVecFromRat: Vec[ValidIO[UInt]] = Wire(Vec(RabCommitWidth, ValidIO(UInt(VfPhyRegIdxWidth.W))))
31  oldPregVecFromRat.zipWithIndex.foreach { case (oldPreg: ValidIO[UInt], idx) =>
32    val vecOldVd = i.fromRat.vecOldVdPdest(idx)
33    val v0OldVd  = i.fromRat.v0OldVdPdest(idx)
34    oldPreg.valid := (vecOldVd.valid || v0OldVd.valid)
35    oldPreg.bits := Mux1H(Seq(
36      vecOldVd.valid -> vecOldVd.bits,
37      v0OldVd.valid -> v0OldVd.bits,
38    ))
39  }
40
41  private val lregNewPregVecFromRab = WireInit(i.fromRab.logicPhyRegMap)
42
43  private val preMergedOldVd = WireInit(VecInit(i.fromVprf.rdata.take(maxMergeNumPerCycle).map(_.bits.asTypeOf(new VecElemData(VLEN)))))
44  private val preMergedNewVd = WireInit(VecInit(i.fromVprf.rdata.drop(maxMergeNumPerCycle).map(_.bits.asTypeOf(new VecElemData(VLEN)))))
45  private val preMoveOldVd   = WireInit(VecInit(i.fromVprf.rdata.map(_.bits.asTypeOf(new VecElemData(VLEN)))))
46
47  private val sNoExcp_vecExcpInfo = WireInit(i.fromExceptionGen)
48  private val sNoExcp_vemul = sNoExcp_vecExcpInfo.bits.vlmul + sNoExcp_vecExcpInfo.bits.veew - sNoExcp_vecExcpInfo.bits.vsew
49  // data vemul
50  private val sNoExcp_dvemul = Mux(
51    sNoExcp_vecExcpInfo.bits.isIndexed,
52    sNoExcp_vecExcpInfo.bits.vlmul,
53    sNoExcp_vemul,
54  )
55  // index vemul
56  private val sNoExcp_ivemul = WireInit(VLmul(), sNoExcp_vemul)
57  dontTouch(sNoExcp_vemul)
58  dontTouch(sNoExcp_dvemul)
59  dontTouch(sNoExcp_ivemul)
60  private val sNoExcp_dvemulNoLessThanM1 = VLmul.makeNoLessThanM1(sNoExcp_dvemul).take(2)
61  private val sNoExcp_ivemulNoLessThanM1 = VLmul.makeNoLessThanM1(sNoExcp_ivemul).take(2)
62
63  // if ivemul - dvemul = idx
64  private val sNoExcp_vemul_i_d = VecInit.tabulate(4)(idx =>
65    sNoExcp_ivemulNoLessThanM1 === (sNoExcp_dvemulNoLessThanM1 +& idx.U) ||
66    (idx == 0).B && (sNoExcp_ivemulNoLessThanM1 < sNoExcp_dvemulNoLessThanM1)
67  )
68  private val sNoExcp_nonSegIndexed = sNoExcp_vecExcpInfo.bits.isIndexed && sNoExcp_vecExcpInfo.bits.nf === 0.U
69
70  private val commitNeeded = RegInit(VecInit.fill(MaxLMUL)(false.B))
71  private val rabCommitted = RegInit(VecInit.fill(MaxLMUL)(false.B))
72  private val ratCommitted = RegInit(VecInit.fill(MaxLMUL)(false.B))
73  private val hasReadRf    = RegInit(VecInit.fill(MaxLMUL)(false.B))
74
75  private val regMaps = Reg(Vec(MaxLMUL, new LogicPhyRegMap))
76
77  private val currentIdx = RegInit(0.U(log2Up(8 + 1).W))
78  private val currentIdxVec = (0 until maxMergeNumPerCycle).map(idx => currentIdx + idx.U)
79
80  private val mergedVd = Reg(Vec(maxMergeNumPerCycle, new VecElemData(VLEN)))
81
82  private val sNoExcp_eewOH = SewOH.convertFromVSew(sNoExcp_vecExcpInfo.bits.veew)
83  private val sNoExcp_sewOH = SewOH.convertFromVSew(sNoExcp_vecExcpInfo.bits.vsew)
84  private val sNoExcp_deewOH = Mux(
85    sNoExcp_vecExcpInfo.bits.isIndexed,
86    sNoExcp_sewOH,
87    sNoExcp_eewOH,
88  )
89  private val sNoExcp_voffset = Module(new GetE8OffsetInVreg(VLEN))(sNoExcp_deewOH, sNoExcp_vecExcpInfo.bits.vstart)
90  private val sNoExcp_idxRangeVec: Vec[HWRange] = Module(new NfMappedElemIdx(VLEN))(sNoExcp_vecExcpInfo.bits.nf, sNoExcp_deewOH)
91  private val sNoExcp_vstartIsAligned: Bool = Mux(!sNoExcp_vecExcpInfo.bits.isVlm, sNoExcp_voffset === 0.U, false.B)
92
93  private val sNoExcp_inRangeVec: Vec[Bool] = VecInit((0 until 8).map(idx =>
94    if (idx == 0) {
95      sNoExcp_vecExcpInfo.bits.isVlm ||
96      sNoExcp_idxRangeVec(idx).inRange (sNoExcp_vecExcpInfo.bits.vstart)
97    } else {
98      !sNoExcp_vecExcpInfo.bits.isVlm &&
99      sNoExcp_idxRangeVec(idx).inRange (sNoExcp_vecExcpInfo.bits.vstart)
100    }
101  ))
102  // The last no exception vdIdx, hold 0~7.
103  // No need to hold 8, since if all vd are new, there is no exception occuration.
104  private val sNoExcp_useNewVdUntil: UInt = PriorityEncoder(sNoExcp_inRangeVec)
105  // The last exception vdIdx, hold 0~8.
106  // Need to hold 8.
107  private val sNoExcp_needMergeUntil: UInt = sNoExcp_useNewVdUntil +
108    Mux(!sNoExcp_vecExcpInfo.bits.isWhole, sNoExcp_vecExcpInfo.bits.nf, 0.U) +&
109    1.U
110  // the max vd idx need to write
111  private val sNoExcp_maxVdIdx = Mux(
112    sNoExcp_vecExcpInfo.valid,
113    MuxCase(
114      default = ((sNoExcp_vecExcpInfo.bits.nf +& 1.U) << sNoExcp_dvemulNoLessThanM1).asUInt,
115      Seq(
116        sNoExcp_vecExcpInfo.bits.isVlm -> 1.U,
117        sNoExcp_vecExcpInfo.bits.isWhole -> (sNoExcp_vecExcpInfo.bits.nf +& 1.U),
118      )
119    ),
120    0.U
121  )
122
123  private val sNoExcp_handleUntil = sNoExcp_maxVdIdx(3, 0) // [1, 8]
124  // strided vector load need 2 uop to move data, so skip these reg maps
125  private val sNoExcp_writeOffset = Mux(sNoExcp_vecExcpInfo.bits.isStride, 2.U, 1.U)
126
127  private val sWaitRab_vecExcpInfo     = RegNextWithEnable(sNoExcp_vecExcpInfo)
128
129  // At the beginning of waitRab,
130  // when not offset not aligned, currentIdx = useNewVdUntil <= needMergeUntil <= handleUntil
131  // otherwise, currentIdx = needMergeUntil <= handleUntil
132  private val sWaitRab_useNewVdUntil   = RegEnable(sNoExcp_useNewVdUntil, sNoExcp_vecExcpInfo.valid)
133  private val sWaitRab_needMergeUntil  = RegEnable(sNoExcp_needMergeUntil, sNoExcp_vecExcpInfo.valid)
134  private val sWaitRab_e8offset        = RegEnable(
135    Mux1H((0 until 4).map(idx => sNoExcp_deewOH(idx) -> ZeroExt(sNoExcp_voffset(tailZeroBit - 1, 0), tailZeroBit))),
136    sNoExcp_vecExcpInfo.valid
137  )
138  private val sWaitRab_idxRangeVec     = RegEnable(sNoExcp_idxRangeVec, sNoExcp_vecExcpInfo.valid)
139  private val sWaitRab_vstartIsAligned = RegEnable(sNoExcp_vstartIsAligned, sNoExcp_vecExcpInfo.valid)
140  private val sWaitRab_handleUntil     = RegEnable(sNoExcp_handleUntil, sNoExcp_vecExcpInfo.valid)
141
142  private val sWaitRab_nonSegIndexed   = RegEnable(sNoExcp_nonSegIndexed, sNoExcp_vecExcpInfo.valid)
143  private val sWaitRab_vemul_i_d       = RegEnable(sNoExcp_vemul_i_d, sNoExcp_vecExcpInfo.valid)
144  private val sWaitRab_dvemulNoLessThanM1 = RegEnable(sNoExcp_dvemulNoLessThanM1, sNoExcp_vecExcpInfo.valid)
145
146  private val sWaitRab_rabWriteOffset = Reg(UInt(4.W)) // [1,10]
147  private val sWaitRab_ratWriteOffset = Reg(UInt(4.W)) // [1,10]
148
149  // segShuffledRegIdxTable(nf)(dvemul)(vdIdx)
150  private val segShuffledRegIdxTable: Seq[Seq[Seq[Int]]] = Seq.tabulate(8, 4) {
151    case (nf, dvemul) =>
152      val nField = nf + 1     // 1~8
153      val dEMUL = 1 << dvemul // 1, 2, 4, 8
154      if (nField == 2 && dEMUL == 2) {
155        Seq(0, 2, 1, 3, 0, 0, 0, 0)
156      }
157      else if (nField == 2 && dEMUL == 4) {
158        Seq(0, 4, 1, 5, 2, 6, 3, 7)
159      }
160      else if (nField == 3 && dEMUL == 2) {
161        Seq(0, 2, 4, 1, 3, 5, 0, 0)
162      }
163      else if (nField == 4 && dEMUL == 2) {
164        Seq(0, 2, 4, 6, 1, 3, 5, 7)
165      }
166      else {
167        Seq(0, 1, 2, 3, 4, 5, 6, 7)
168      }
169  }
170  private val segRegTableHW: Vec[Vec[Vec[UInt]]] = WireInit(VecInit.tabulate(8, 4) {
171    case (nf, dvemul) => VecInit(segShuffledRegIdxTable(nf)(dvemul).map(_.U(VdIdxInGroupWidth.W)))
172  })
173
174  // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (0, 4, ...)
175  private val oldVdLocVec: Vec[UInt] = VecInit(currentIdxVec.map(idx =>
176    Mux(
177      sWaitRab_nonSegIndexed,
178      Mux1H(sWaitRab_vemul_i_d.zipWithIndex.map { case (i_d_n, ii) => i_d_n -> (idx << ii).asUInt }),
179      Mux(
180        sWaitRab_vecExcpInfo.bits.isWhole,
181        idx,
182        segRegTableHW(sWaitRab_vecExcpInfo.bits.nf)(sWaitRab_dvemulNoLessThanM1)(idx),
183      )
184    ).take(VdIdxInGroupWidth)
185  ))
186
187  // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (3, 7, ...)
188  private val newVdLocVec = VecInit(currentIdxVec.map(idx =>
189    Mux(
190      sWaitRab_nonSegIndexed,
191      Mux1H(sWaitRab_vemul_i_d.zipWithIndex.map { case (i_d_n, ii) => i_d_n -> ((idx << ii).asUInt | ((1 << ii) - 1).U) }),
192      Mux(
193        sWaitRab_vecExcpInfo.bits.isWhole,
194        idx,
195        segRegTableHW(sWaitRab_vecExcpInfo.bits.nf)(sWaitRab_dvemulNoLessThanM1)(idx),
196      )
197    ).take(VdIdxInGroupWidth)
198  ))
199
200  dontTouch(oldVdLocVec)
201  dontTouch(newVdLocVec)
202
203  private object State extends ChiselEnum {
204    val noExcp  = Value
205    val waitRab = Value
206    val mergeVd = Value
207    val mvOldVd = Value
208    val finish  = Value
209  }
210
211  private val state: State.Type = RegInit(State.noExcp)
212  private val stateNext = WireInit(state)
213  state := stateNext
214
215  private val collectedAllRegMap = Wire(Bool())
216  private val mergeFinished = currentIdx >= sWaitRab_needMergeUntil
217  private val mvFinished = currentIdx >= sWaitRab_handleUntil
218
219  // get lreg and new preg, the last mapped newPdest
220  private val filteredRabCommitedVec: Vec[Vec[Bool]] = WireInit(VecInit.tabulate(4, MaxLMUL) { case (i_d_n, vdIdx) =>
221    val vdLoc = ((vdIdx + 1) << i_d_n) - 1
222    rabCommitted(if (vdLoc >= MaxLMUL) 0 else vdLoc)
223  })
224  // get old preg, the first mapped oldPdest
225  private val filteredRatCommitedVec: Vec[Vec[Bool]] = WireInit(VecInit.tabulate(4, MaxLMUL) { case (i_d_n, vdIdx) =>
226    val vdLoc = vdIdx << i_d_n
227    ratCommitted(if (vdLoc >= MaxLMUL) 0 else vdLoc)
228  })
229
230  private val filteredRabCommited = Wire(Vec(MaxLMUL, Bool()))
231  private val filteredRatCommited = Wire(Vec(MaxLMUL, Bool()))
232  when (sWaitRab_nonSegIndexed) {
233    filteredRabCommited := Mux1H(sWaitRab_vemul_i_d, filteredRabCommitedVec)
234    filteredRatCommited := Mux1H(sWaitRab_vemul_i_d, filteredRatCommitedVec)
235  }.otherwise {
236    // No need to shuffle, since the vdIdx always compressed towards zero and left tail unused.
237    filteredRabCommited := rabCommitted
238    filteredRatCommited := ratCommitted
239  }
240
241  // 1. no need commit
242  // 2. need commit and both rab and rat committed
243  collectedAllRegMap := ((~commitNeeded.asUInt).asUInt | (commitNeeded.asUInt & filteredRabCommited.asUInt & filteredRatCommited.asUInt)).andR
244
245  switch(state) {
246    is(State.noExcp) {
247      when (i.fromExceptionGen.valid) {
248        stateNext := State.waitRab
249      }
250    }
251    is(State.waitRab) {
252      when (collectedAllRegMap) {
253        stateNext := State.mergeVd
254        currentIdx := sWaitRab_useNewVdUntil
255      }
256    }
257    is(State.mergeVd) {
258      when (mvFinished) {
259        stateNext := State.finish
260      }.elsewhen (mergeFinished) {
261        stateNext := State.mvOldVd
262      }
263      when(o.toVPRF.w.head.valid) {
264        currentIdx := currentIdx + PopCount(o.toVPRF.w.map(_.valid))
265      }
266    }
267    is(State.mvOldVd) {
268      when (mvFinished) {
269        stateNext := State.finish
270      }
271      when(o.toVPRF.w.head.valid) {
272        currentIdx := currentIdx + PopCount(o.toVPRF.w.map(_.valid))
273      }
274    }
275    is(State.finish) {
276      stateNext := State.noExcp
277      currentIdx := 0.U
278    }
279  }
280
281  private val regWriteFromRabVec: Vec[ValidIO[RegWriteFromRab]] = i.fromRab.logicPhyRegMap
282  private val regWriteFromRatVec: Vec[ValidIO[UInt]] = oldPregVecFromRat
283
284  val mergedVdWData: Vec[VecE8Vec] = Wire(Vec(maxMergeNumPerCycle, new VecE8Vec(VLEN)))
285  mergedVdWData.zipWithIndex.foreach { case (vd, vIdx) =>
286    vd.data.zipWithIndex.foreach { case (vde, eIdx) =>
287      vde := Mux(
288        state === State.mergeVd,
289        Mux(
290          eIdx.U >= sWaitRab_e8offset,
291          preMergedOldVd(vIdx).e8Vec(eIdx),
292          preMergedNewVd(vIdx).e8Vec(eIdx),
293        ),
294        preMoveOldVd(vIdx).e8Vec(eIdx),
295      )
296    }
297  }
298
299  private val hasRabWrite = regWriteFromRabVec.head.valid
300  private val hasRatWrite = regWriteFromRatVec.head.valid
301  require(
302    2 * RabCommitWidth >= (MaxLMUL + 2),
303    "Cannot receive all 10 reg maps from RAB and RAT in two cycles. " +
304      "This module should be rewrited to support more than 2 cycles receiving"
305  )
306
307  switch (state) {
308    is (State.noExcp) {
309      when (stateNext === State.waitRab) {
310        sWaitRab_rabWriteOffset := 0.U
311        sWaitRab_ratWriteOffset := 0.U
312        commitNeeded.zipWithIndex.foreach { case (needed, idx) =>
313          needed := sNoExcp_maxVdIdx > idx.U
314        }
315      }
316    }
317    is (State.waitRab) {
318      when (hasRabWrite) {
319        sWaitRab_rabWriteOffset := sWaitRab_rabWriteOffset +
320          PriorityMux((0 until RabCommitWidth).map(
321            idx => i.fromRab.logicPhyRegMap.reverse(idx).valid -> (6 - idx).U
322          ))
323      }
324      when (hasRatWrite) {
325        sWaitRab_ratWriteOffset := sWaitRab_ratWriteOffset +
326          PriorityMux((0 until RabCommitWidth).map(
327            idx => regWriteFromRatVec.reverse(idx).valid -> (6 - idx).U
328          ))
329      }
330
331      when(sWaitRab_rabWriteOffset === 0.U) {
332        // the first patch of RAB commit consider offset
333        when(sWaitRab_vecExcpInfo.bits.isStride) {
334          (2 until RabCommitWidth).map { idx =>
335            val vdIdx = idx - 2
336            when(regWriteFromRabVec(idx).valid) {
337              regMaps(vdIdx).lreg := regWriteFromRabVec(idx).bits.lreg
338              regMaps(vdIdx).newPreg := regWriteFromRabVec(idx).bits.preg
339              rabCommitted(vdIdx) := true.B
340            }
341          }
342        }.otherwise {
343          (1 until RabCommitWidth).map { idx =>
344            val vdIdx = idx - 1
345            when(regWriteFromRabVec(idx).valid) {
346              regMaps(vdIdx).lreg := regWriteFromRabVec(idx).bits.lreg
347              regMaps(vdIdx).newPreg := regWriteFromRabVec(idx).bits.preg
348              rabCommitted(vdIdx) := true.B
349            }
350          }
351        }
352      }.otherwise {
353        // the second patch of RAB/RAT commit need no offset
354        when(sWaitRab_vecExcpInfo.bits.isStride) {
355          (0 until (MaxLMUL + 2 - RabCommitWidth)).map { idx =>
356            val vdIdx = idx - 2 + RabCommitWidth
357            when(regWriteFromRabVec(idx).valid) {
358              regMaps(vdIdx).lreg := regWriteFromRabVec(idx).bits.lreg
359              regMaps(vdIdx).newPreg := regWriteFromRabVec(idx).bits.preg
360              rabCommitted(vdIdx) := true.B
361            }
362          }
363        }.otherwise {
364          (0 until MaxLMUL + 1 - RabCommitWidth).map { idx =>
365            val vdIdx = idx - 1 + RabCommitWidth
366            when(regWriteFromRabVec(idx).valid) {
367              regMaps(vdIdx).lreg := regWriteFromRabVec(idx).bits.lreg
368              regMaps(vdIdx).newPreg := regWriteFromRabVec(idx).bits.preg
369              rabCommitted(vdIdx) := true.B
370            }
371          }
372        }
373      }
374
375      when (sWaitRab_ratWriteOffset === 0.U) {
376        // the first patch of RAT commit consider offset
377        when(sWaitRab_vecExcpInfo.bits.isStride) {
378          (2 until RabCommitWidth).map { idx =>
379            val vdIdx = idx - 2
380            when(regWriteFromRatVec(idx).valid) {
381              regMaps(vdIdx).oldPreg := regWriteFromRatVec(idx).bits
382              ratCommitted(vdIdx) := true.B
383            }
384          }
385        }.otherwise {
386          (1 until RabCommitWidth).map { idx =>
387            val vdIdx = idx - 1
388            when(regWriteFromRatVec(idx).valid) {
389              regMaps(vdIdx).oldPreg := regWriteFromRatVec(idx).bits
390              ratCommitted(vdIdx) := true.B
391            }
392          }
393        }
394      }.otherwise {
395        // the second patch of RAT commit need no offset
396        when(sWaitRab_vecExcpInfo.bits.isStride) {
397          (0 until (MaxLMUL + 2 - RabCommitWidth)).map { idx =>
398            val vdIdx = idx - 2 + RabCommitWidth
399            when(regWriteFromRatVec(idx).valid) {
400              regMaps(vdIdx).oldPreg := regWriteFromRatVec(idx).bits
401              ratCommitted(vdIdx) := true.B
402            }
403          }
404        }.otherwise {
405          (0 until MaxLMUL + 1 - RabCommitWidth).map { idx =>
406            val vdIdx = idx - 1 + RabCommitWidth
407            when(regWriteFromRatVec(idx).valid) {
408              regMaps(vdIdx).oldPreg := regWriteFromRatVec(idx).bits
409              ratCommitted(vdIdx) := true.B
410            }
411          }
412        }
413      }
414    }
415    is (State.finish) {
416      commitNeeded.foreach(_ := false.B)
417      rabCommitted.foreach(_ := false.B)
418      ratCommitted.foreach(_ := false.B)
419      hasReadRf   .foreach(_ := false.B)
420      sWaitRab_rabWriteOffset := 0.U
421      sWaitRab_ratWriteOffset := 0.U
422      sWaitRab_vecExcpInfo.valid := false.B
423    }
424  }
425
426  switch (state) {
427    is (State.mergeVd, State.mvOldVd) {
428      (0 until maxMergeNumPerCycle).map(vIdx =>
429        when(i.fromVprf.rdata(vIdx).valid) {
430          mergedVd(vIdx) := mergedVdWData(vIdx).asTypeOf(new VecElemData(VLEN))
431        }
432      )
433    }
434  }
435
436  when (state === State.mergeVd) {
437    (0 until maxMergeNumPerCycle).foreach { case (idx) =>
438      val vdIdx = currentIdxVec(idx)
439      // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (0, 4, ...)
440      val oldVdLoc = oldVdLocVec(idx)
441      // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (3, 7, ...)
442      val newVdLoc = newVdLocVec(idx)
443      o.toVPRF.r(idx).valid := commitNeeded(vdIdx) && !hasReadRf(vdIdx) && vdIdx < sWaitRab_needMergeUntil
444      o.toVPRF.r(idx).bits.addr := regMaps(oldVdLoc).oldPreg
445      o.toVPRF.r(idx).bits.isV0 := (regMaps(oldVdLoc).lreg === 0.U) && (idx == 0).B
446      o.toVPRF.r(idx + maxMergeNumPerCycle).valid := commitNeeded(vdIdx) && !hasReadRf(vdIdx) && vdIdx < sWaitRab_needMergeUntil
447      o.toVPRF.r(idx + maxMergeNumPerCycle).bits.addr := regMaps(newVdLoc).newPreg
448      o.toVPRF.r(idx + maxMergeNumPerCycle).bits.isV0 := (regMaps(newVdLoc).lreg === 0.U) && (idx == 0).B
449      hasReadRf(vdIdx) := true.B && vdIdx < sWaitRab_needMergeUntil
450    }
451  }.elsewhen (state === State.mvOldVd) {
452    (0 until maxMergeNumPerCycle).foreach { case (idx) =>
453      val vdIdx = currentIdxVec(idx)
454      // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (0, 4, ...)
455      val oldVdLoc = oldVdLocVec(idx)
456      // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (3, 7, ...)
457      val newVdLoc = newVdLocVec(idx)
458      o.toVPRF.r(idx).valid := commitNeeded(vdIdx) && !hasReadRf(vdIdx) && vdIdx < sWaitRab_handleUntil
459      o.toVPRF.r(idx).bits.addr := regMaps(oldVdLoc).oldPreg
460      o.toVPRF.r(idx).bits.isV0 := (regMaps(oldVdLoc).lreg === 0.U) && (idx == 0).B
461      o.toVPRF.r(idx + maxMergeNumPerCycle).valid := 0.U
462      o.toVPRF.r(idx + maxMergeNumPerCycle).bits.addr := 0.U
463      o.toVPRF.r(idx + maxMergeNumPerCycle).bits.isV0 := false.B
464      hasReadRf(vdIdx) := true.B && vdIdx < sWaitRab_handleUntil
465    }
466  }.otherwise {
467    o.toVPRF.r := 0.U.asTypeOf(chiselTypeOf(o.toVPRF.r))
468  }
469
470  o.toVPRF.w.zipWithIndex.foreach { case (w, idx) =>
471    val vdIdx = currentIdxVec(idx)
472    // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (0, 4, ...)
473    val oldVdLoc = oldVdLocVec(idx)
474    // when nonSegIndexed load, iemul/demul = 1 << 2, vdLoc will be mapped as (0, 1, 2, 3, ...) -> (3, 7, ...)
475    val newVdLoc = newVdLocVec(idx)
476    w.valid          := RegNext(i.fromVprf.rdata(idx).valid)
477    w.bits.isV0      := (regMaps(newVdLoc).lreg === 0.U) && (idx == 0).B
478    w.bits.newVdAddr := regMaps(newVdLoc).newPreg
479    w.bits.newVdData := mergedVd(idx.U).asUInt
480  }
481
482  o.status.busy := DelayN(state.isOneOf(State.waitRab, State.mergeVd, State.mvOldVd), 1)
483}
484
485class LogicPhyRegMap(implicit p: Parameters) extends XSBundle {
486  val lreg = UInt(LogicRegsWidth.W)
487  val newPreg = UInt(VfPhyRegIdxWidth.W)
488  val oldPreg = UInt(VfPhyRegIdxWidth.W)
489}
490
491class RegWriteFromRab(implicit p: Parameters) extends XSBundle {
492  private val maxVregLMUL = 8
493  val lreg = UInt(LogicRegsWidth.W)
494  val preg = UInt(VfPhyRegIdxWidth.W)
495}
496
497class RabToVecExcpMod(implicit p: Parameters) extends XSBundle {
498  val logicPhyRegMap = Vec(RabCommitWidth, ValidIO(new RegWriteFromRab))
499}
500
501class VecExcpInfo(implicit p: Parameters) extends XSBundle {
502  val vstart = Vstart()
503  val vsew = VSew()
504  val veew = VSew()
505  val vlmul = VLmul()
506  val nf = Nf()
507  val isStride = Bool()
508  val isIndexed = Bool()
509  val isWhole = Bool()
510  val isVlm = Bool()
511}
512
513class RatToVecExcpMod(implicit p: Parameters) extends XSBundle {
514  val vecOldVdPdest = Vec(RabCommitWidth, ValidIO(UInt(VfPhyRegIdxWidth.W)))
515  val v0OldVdPdest = Vec(RabCommitWidth, ValidIO(UInt(VfPhyRegIdxWidth.W)))
516}
517
518class VprfToExcpMod(numPort: Int)(implicit p: Parameters) extends XSBundle {
519  val rdata = Vec(numPort, ValidIO(UInt(VLEN.W)))
520}
521
522class ExcpModToVprf(numReadPort: Int, numWritePort: Int)(implicit p: Parameters) extends XSBundle {
523  val r = Vec(numReadPort, ValidIO(new Bundle {
524    val isV0 = Bool()
525    val addr = UInt(VfPhyRegIdxWidth.W)
526  }))
527  val w = Vec(numWritePort, ValidIO(new Bundle {
528    val isV0      = Bool()
529    val newVdAddr = UInt(VfPhyRegIdxWidth.W)
530    val newVdData = UInt(VLEN.W)
531  }))
532}
533
534class NfMappedElemIdx(vlen: Int) extends Module {
535  require(isPow2(vlen))
536  // vlen = 128, idxWidth = 8, hold 0~128
537  val idxWidth = log2Up(vlen + 1)
538
539  val in = IO(Input(new Bundle {
540    val nf = Nf()
541    val eewOH = SewOH()
542  }))
543  val out = IO(Output(new Bundle {
544    val idxRangeVec = Vec(8, new HWRange(idxWidth))
545  }))
546
547  private val minElemLen = 8
548  private val maxElemNumPerVreg = vlen / minElemLen
549
550  private val rangeTable: Vec[Vec[HWRange]] = VecInit.tabulate(8, 8) { case(nf, vdIdx) =>
551    val nFields = nf + 1
552    // vector register group
553    val vrgIdx = vdIdx / nFields
554    HWRange(idxWidth)((maxElemNumPerVreg * vrgIdx).U, (maxElemNumPerVreg * (vrgIdx + 1)).U)
555  }
556
557  out.idxRangeVec := VecInit(rangeTable.map { case rangeVec: Vec[HWRange] =>
558    Mux1H(
559      (0 until 4).map(i =>
560        in.eewOH(i) -> VecInit(rangeVec.map(
561          x => HWRange(idxWidth)(x.from >> i, x.until >> i)
562        ))
563      )
564    )
565  })(in.nf)
566
567  dontTouch(out.idxRangeVec)
568
569  def apply(nf: UInt, eewOH: UInt): Vec[HWRange] = {
570    this.in.nf := nf
571    this.in.eewOH := eewOH
572    this.out.idxRangeVec
573  }
574}
575
576class GetE8OffsetInVreg(vlen: Int) extends Module {
577  require(isPow2(vlen))
578  private val minElemLen = 8
579  private val maxElemNumPerVreg = vlen / minElemLen
580  private val tailZeroBit = log2Ceil(maxElemNumPerVreg) // 16 -> 4
581
582  val in = IO(Input(new Bundle {
583    val eewOH = SewOH()
584    val idx = UInt(log2Up(vlen).W)
585  }))
586  val out = IO(Output(new Bundle {
587    val offset = UInt(tailZeroBit.W)
588  }))
589
590  out.offset := Mux1H(
591    (0 until 4).map(
592      // eew=32(0b0100), idx=1, get offset=4
593      i => in.eewOH(i) -> (in.idx << i)
594    )
595  )
596
597  def apply(eewOH: UInt, idx: UInt): UInt = {
598    this.in.eewOH := eewOH
599    this.in.idx := idx
600    this.out.offset
601  }
602}
603
604class VecElemData(vlen: Int) extends Bundle {
605  val rawData = UInt(vlen.W)
606
607  def e8Vec  = this.rawData.asTypeOf(new VecE8Vec(vlen))
608  def e16Vec = this.rawData.asTypeOf(new VecE16Vec(vlen))
609  def e32Vec = this.rawData.asTypeOf(new VecE32Vec(vlen))
610  def e64Vec = this.rawData.asTypeOf(new VecE64Vec(vlen))
611}
612
613class VecE8Vec(vlen: Int) extends Bundle {
614  val data = Vec(vlen / 8, UInt(8.W))
615
616  def apply(idx: Int): UInt = this.data(idx)
617}
618
619class VecE16Vec(vlen: Int) extends Bundle {
620  val data = Vec(vlen / 16, UInt(16.W))
621
622  def apply(idx: Int): UInt = this.data(idx)
623}
624
625class VecE32Vec(vlen: Int) extends Bundle {
626  val data = Vec(vlen / 32, UInt(32.W))
627
628  def apply(idx: Int): UInt = this.data(idx)
629}
630
631class VecE64Vec(vlen: Int) extends Bundle {
632  val data = Vec(vlen / 64, UInt(64.W))
633
634  def apply(idx: Int): UInt = this.data(idx)
635}
636
637class HWRange(w: Int) extends Bundle {
638  val from  = UInt(w.W)
639  val until = UInt(w.W)
640
641  def inRange(uint: UInt) = {
642    uint >= this.from && uint < this.until
643  }
644
645  def apply(_from: Bits, _until: Bits): this.type = {
646    this.from := _from
647    this.until := _until
648    this
649  }
650}
651
652object HWRange {
653  def apply(w: Int)(_from: Bits, _until: Bits): HWRange = Wire(new HWRange(w)).apply(_from, _until)
654}
655
656