xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadMisalignBuffer.scala (revision 9abad712596b38217be5d18dd9d58f699066b33e)
141d8d239Shappy-lx/***************************************************************************************
241d8d239Shappy-lx* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
341d8d239Shappy-lx* Copyright (c) 2020-2021 Peng Cheng Laboratory
441d8d239Shappy-lx*
541d8d239Shappy-lx* XiangShan is licensed under Mulan PSL v2.
641d8d239Shappy-lx* You can use this software according to the terms and conditions of the Mulan PSL v2.
741d8d239Shappy-lx* You may obtain a copy of Mulan PSL v2 at:
841d8d239Shappy-lx*          http://license.coscl.org.cn/MulanPSL2
941d8d239Shappy-lx*
1041d8d239Shappy-lx* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
1141d8d239Shappy-lx* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
1241d8d239Shappy-lx* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
1341d8d239Shappy-lx*
1441d8d239Shappy-lx* See the Mulan PSL v2 for more details.
1541d8d239Shappy-lx***************************************************************************************/
1641d8d239Shappy-lx
1741d8d239Shappy-lxpackage xiangshan.mem
1841d8d239Shappy-lx
1941d8d239Shappy-lximport org.chipsalliance.cde.config.Parameters
2041d8d239Shappy-lximport chisel3._
2141d8d239Shappy-lximport chisel3.util._
2241d8d239Shappy-lximport utils._
2341d8d239Shappy-lximport utility._
2441d8d239Shappy-lximport xiangshan._
2541d8d239Shappy-lximport xiangshan.backend.fu.FuConfig._
2641d8d239Shappy-lximport xiangshan.backend.fu.fpu.FPU
2741d8d239Shappy-lximport xiangshan.backend.rob.RobLsqIO
2841d8d239Shappy-lximport xiangshan.cache._
2941d8d239Shappy-lximport xiangshan.frontend.FtqPtr
3041d8d239Shappy-lximport xiangshan.ExceptionNO._
3141d8d239Shappy-lximport xiangshan.cache.wpu.ReplayCarry
3241d8d239Shappy-lximport xiangshan.backend.rob.RobPtr
3341d8d239Shappy-lximport xiangshan.backend.Bundles.{MemExuOutput, DynInst}
3441d8d239Shappy-lx
3541d8d239Shappy-lxclass LoadMisalignBuffer(implicit p: Parameters) extends XSModule
3641d8d239Shappy-lx  with HasCircularQueuePtrHelper
3741d8d239Shappy-lx  with HasLoadHelper
3841d8d239Shappy-lx{
3941d8d239Shappy-lx  private val enqPortNum = LoadPipelineWidth
4041d8d239Shappy-lx  private val maxSplitNum = 2
4141d8d239Shappy-lx
4241d8d239Shappy-lx  require(maxSplitNum == 2)
4341d8d239Shappy-lx
4441d8d239Shappy-lx  private val LB = "b00".U(2.W)
4541d8d239Shappy-lx  private val LH = "b01".U(2.W)
4641d8d239Shappy-lx  private val LW = "b10".U(2.W)
4741d8d239Shappy-lx  private val LD = "b11".U(2.W)
4841d8d239Shappy-lx
4941d8d239Shappy-lx  // encode of how many bytes to shift or truncate
5041d8d239Shappy-lx  private val BYTE0 = "b000".U(3.W)
5141d8d239Shappy-lx  private val BYTE1 = "b001".U(3.W)
5241d8d239Shappy-lx  private val BYTE2 = "b010".U(3.W)
5341d8d239Shappy-lx  private val BYTE3 = "b011".U(3.W)
5441d8d239Shappy-lx  private val BYTE4 = "b100".U(3.W)
5541d8d239Shappy-lx  private val BYTE5 = "b101".U(3.W)
5641d8d239Shappy-lx  private val BYTE6 = "b110".U(3.W)
5741d8d239Shappy-lx  private val BYTE7 = "b111".U(3.W)
5841d8d239Shappy-lx
5941d8d239Shappy-lx  def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List(
6041d8d239Shappy-lx    LB -> 0x1.U, // lb
6141d8d239Shappy-lx    LH -> 0x3.U, // lh
6241d8d239Shappy-lx    LW -> 0xf.U, // lw
6341d8d239Shappy-lx    LD -> 0xff.U  // ld
6441d8d239Shappy-lx  ))
6541d8d239Shappy-lx
6641d8d239Shappy-lx  def getShiftAndTruncateData(shiftEncode: UInt, truncateEncode: UInt, data: UInt) = {
6741d8d239Shappy-lx    val shiftData = LookupTree(shiftEncode, List(
6841d8d239Shappy-lx      BYTE0 -> data(63,    0),
6941d8d239Shappy-lx      BYTE1 -> data(63,    8),
7041d8d239Shappy-lx      BYTE2 -> data(63,   16),
7141d8d239Shappy-lx      BYTE3 -> data(63,   24),
7241d8d239Shappy-lx      BYTE4 -> data(63,   32),
7341d8d239Shappy-lx      BYTE5 -> data(63,   40),
7441d8d239Shappy-lx      BYTE6 -> data(63,   48),
7541d8d239Shappy-lx      BYTE7 -> data(63,   56)
7641d8d239Shappy-lx    ))
7741d8d239Shappy-lx    val truncateData = LookupTree(truncateEncode, List(
7841d8d239Shappy-lx      BYTE0 -> 0.U(XLEN.W), // can not truncate with 0 byte width
7941d8d239Shappy-lx      BYTE1 -> shiftData(7,    0),
8041d8d239Shappy-lx      BYTE2 -> shiftData(15,   0),
8141d8d239Shappy-lx      BYTE3 -> shiftData(23,   0),
8241d8d239Shappy-lx      BYTE4 -> shiftData(31,   0),
8341d8d239Shappy-lx      BYTE5 -> shiftData(39,   0),
8441d8d239Shappy-lx      BYTE6 -> shiftData(47,   0),
8541d8d239Shappy-lx      BYTE7 -> shiftData(55,   0)
8641d8d239Shappy-lx    ))
8741d8d239Shappy-lx    truncateData(XLEN - 1, 0)
8841d8d239Shappy-lx  }
8941d8d239Shappy-lx
9041d8d239Shappy-lx  def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
9141d8d239Shappy-lx    assert(valid.length == bits.length)
9241d8d239Shappy-lx    if (valid.length == 0 || valid.length == 1) {
9341d8d239Shappy-lx      (valid, bits)
9441d8d239Shappy-lx    } else if (valid.length == 2) {
9541d8d239Shappy-lx      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
9641d8d239Shappy-lx      for (i <- res.indices) {
9741d8d239Shappy-lx        res(i).valid := valid(i)
9841d8d239Shappy-lx        res(i).bits := bits(i)
9941d8d239Shappy-lx      }
10041d8d239Shappy-lx      val oldest = Mux(valid(0) && valid(1),
10141d8d239Shappy-lx        Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) ||
10241d8d239Shappy-lx          (isNotBefore(bits(0).uop.robIdx, bits(1).uop.robIdx) && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)),
10341d8d239Shappy-lx        Mux(valid(0) && !valid(1), res(0), res(1)))
10441d8d239Shappy-lx      (Seq(oldest.valid), Seq(oldest.bits))
10541d8d239Shappy-lx    } else {
10641d8d239Shappy-lx      val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
10741d8d239Shappy-lx      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
10841d8d239Shappy-lx      selectOldest(left._1 ++ right._1, left._2 ++ right._2)
10941d8d239Shappy-lx    }
11041d8d239Shappy-lx  }
11141d8d239Shappy-lx
11241d8d239Shappy-lx  val io = IO(new Bundle() {
11341d8d239Shappy-lx    val redirect        = Flipped(Valid(new Redirect))
11441d8d239Shappy-lx    val req             = Vec(enqPortNum, Flipped(Valid(new LqWriteBundle)))
11541d8d239Shappy-lx    val rob             = Flipped(new RobLsqIO)
11641d8d239Shappy-lx    val splitLoadReq    = Decoupled(new LsPipelineBundle)
11741d8d239Shappy-lx    val splitLoadResp   = Flipped(Valid(new LqWriteBundle))
11841d8d239Shappy-lx    val writeBack       = Decoupled(new MemExuOutput)
11941d8d239Shappy-lx    val overwriteExpBuf = Output(new XSBundle {
12041d8d239Shappy-lx      val valid  = Bool()
121db6cfb5aSHaoyuan Feng      val vaddr  = UInt(XLEN.W)
12246e9ee74SHaoyuan Feng      val isHyper = Bool()
123db6cfb5aSHaoyuan Feng      val gpaddr = UInt(XLEN.W)
124ad415ae0SXiaokun-Pei      val isForVSnonLeafPTE = Bool()
12541d8d239Shappy-lx    })
12641d8d239Shappy-lx    val flushLdExpBuff  = Output(Bool())
12741d8d239Shappy-lx  })
12841d8d239Shappy-lx
12941d8d239Shappy-lx  io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool()))
13041d8d239Shappy-lx  io.rob.uop  := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst))
13141d8d239Shappy-lx
13241d8d239Shappy-lx  val req_valid = RegInit(false.B)
13341d8d239Shappy-lx  val req = Reg(new LqWriteBundle)
13441d8d239Shappy-lx
13541d8d239Shappy-lx  // enqueue
13641d8d239Shappy-lx  // s1:
13741d8d239Shappy-lx  val s1_req = VecInit(io.req.map(_.bits))
13841d8d239Shappy-lx  val s1_valid = VecInit(io.req.map(x => x.valid))
13941d8d239Shappy-lx
14041d8d239Shappy-lx  // s2: delay 1 cycle
14141d8d239Shappy-lx  val s2_req = RegNext(s1_req)
14241d8d239Shappy-lx  val s2_valid = (0 until enqPortNum).map(i =>
14341d8d239Shappy-lx    RegNext(s1_valid(i)) &&
14441d8d239Shappy-lx    !s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) &&
14541d8d239Shappy-lx    !s2_req(i).uop.robIdx.needFlush(io.redirect)
14641d8d239Shappy-lx  )
14794998b06Shappy-lx  val s2_miss_aligned = s2_req.map(x =>
14894998b06Shappy-lx    x.uop.exceptionVec(loadAddrMisaligned) && !x.uop.exceptionVec(breakPoint) && !TriggerAction.isDmode(x.uop.trigger)
14994998b06Shappy-lx  )
15041d8d239Shappy-lx
15141d8d239Shappy-lx  val s2_enqueue = Wire(Vec(enqPortNum, Bool()))
15241d8d239Shappy-lx  for (w <- 0 until enqPortNum) {
15341d8d239Shappy-lx    s2_enqueue(w) := s2_valid(w) && s2_miss_aligned(w)
15441d8d239Shappy-lx  }
15541d8d239Shappy-lx
15641d8d239Shappy-lx  when (req_valid && req.uop.robIdx.needFlush(io.redirect)) {
15741d8d239Shappy-lx    req_valid := s2_enqueue.asUInt.orR
15841d8d239Shappy-lx  } .elsewhen (s2_enqueue.asUInt.orR) {
15941d8d239Shappy-lx    req_valid := req_valid || true.B
16041d8d239Shappy-lx  }
16141d8d239Shappy-lx
16241d8d239Shappy-lx  val reqSel = selectOldest(s2_enqueue, s2_req)
16341d8d239Shappy-lx
16441d8d239Shappy-lx  when (req_valid) {
16541d8d239Shappy-lx    req := Mux(
16641d8d239Shappy-lx      reqSel._1(0) && (isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx) || (isNotBefore(req.uop.robIdx, reqSel._2(0).uop.robIdx) && req.uop.uopIdx > reqSel._2(0).uop.uopIdx)),
16741d8d239Shappy-lx      reqSel._2(0),
16841d8d239Shappy-lx      req)
16941d8d239Shappy-lx  } .elsewhen (s2_enqueue.asUInt.orR) {
17041d8d239Shappy-lx    req := reqSel._2(0)
17141d8d239Shappy-lx  }
17241d8d239Shappy-lx
17341d8d239Shappy-lx  val robMatch = req_valid && io.rob.pendingld && (io.rob.pendingPtr === req.uop.robIdx)
17441d8d239Shappy-lx
17541d8d239Shappy-lx  // buffer control:
17641d8d239Shappy-lx  //  - split miss-aligned load into aligned loads
17741d8d239Shappy-lx  //  - send split load to ldu and get result from ldu
17841d8d239Shappy-lx  //  - merge them and write back to rob
17941d8d239Shappy-lx  val s_idle :: s_split :: s_req :: s_resp :: s_comb :: s_wb :: s_wait :: Nil = Enum(7)
18041d8d239Shappy-lx  val bufferState = RegInit(s_idle)
18141d8d239Shappy-lx  val splitLoadReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle))))
18241d8d239Shappy-lx  val splitLoadResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LqWriteBundle))))
18341d8d239Shappy-lx  val unSentLoads = RegInit(0.U(maxSplitNum.W))
18441d8d239Shappy-lx  val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W))
18541d8d239Shappy-lx
18641d8d239Shappy-lx  // if there is exception or mmio in split load
18741d8d239Shappy-lx  val globalException = RegInit(false.B)
18841d8d239Shappy-lx  val globalMMIO = RegInit(false.B)
18941d8d239Shappy-lx
19041d8d239Shappy-lx  val hasException = ExceptionNO.selectByFu(io.splitLoadResp.bits.uop.exceptionVec, LduCfg).asUInt.orR
19141d8d239Shappy-lx  val isMMIO = io.splitLoadResp.bits.mmio
19241d8d239Shappy-lx
19341d8d239Shappy-lx  switch(bufferState) {
19441d8d239Shappy-lx    is (s_idle) {
19541d8d239Shappy-lx      when (robMatch) {
19641d8d239Shappy-lx        bufferState := s_split
19741d8d239Shappy-lx      }
19841d8d239Shappy-lx    }
19941d8d239Shappy-lx
20041d8d239Shappy-lx    is (s_split) {
20141d8d239Shappy-lx      bufferState := s_req
20241d8d239Shappy-lx    }
20341d8d239Shappy-lx
20441d8d239Shappy-lx    is (s_req) {
20541d8d239Shappy-lx      when (io.splitLoadReq.fire) {
20641d8d239Shappy-lx        bufferState := s_resp
20741d8d239Shappy-lx      }
20841d8d239Shappy-lx    }
20941d8d239Shappy-lx
21041d8d239Shappy-lx    is (s_resp) {
21141d8d239Shappy-lx      when (io.splitLoadResp.valid) {
21241d8d239Shappy-lx        val clearOh = UIntToOH(curPtr)
21341d8d239Shappy-lx        when (hasException || isMMIO) {
21441d8d239Shappy-lx          // commit directly when exception ocurs
21541d8d239Shappy-lx          // if any split load reaches mmio space, delegate to software loadAddrMisaligned exception
21641d8d239Shappy-lx          bufferState := s_wb
21741d8d239Shappy-lx          globalException := hasException
21841d8d239Shappy-lx          globalMMIO := isMMIO
21941d8d239Shappy-lx        } .elsewhen(io.splitLoadResp.bits.rep_info.need_rep || (unSentLoads & ~clearOh).orR) {
22041d8d239Shappy-lx          // need replay or still has unsent requests
22141d8d239Shappy-lx          bufferState := s_req
22241d8d239Shappy-lx        } .otherwise {
22341d8d239Shappy-lx          // merge the split load results
22441d8d239Shappy-lx          bufferState := s_comb
22541d8d239Shappy-lx        }
22641d8d239Shappy-lx      }
22741d8d239Shappy-lx    }
22841d8d239Shappy-lx
22941d8d239Shappy-lx    is (s_comb) {
23041d8d239Shappy-lx      bufferState := s_wb
23141d8d239Shappy-lx    }
23241d8d239Shappy-lx
23341d8d239Shappy-lx    is (s_wb) {
23441d8d239Shappy-lx      when(io.writeBack.fire) {
23541d8d239Shappy-lx        bufferState := s_wait
23641d8d239Shappy-lx      }
23741d8d239Shappy-lx    }
23841d8d239Shappy-lx
23941d8d239Shappy-lx    is (s_wait) {
24041d8d239Shappy-lx      when(io.rob.lcommit =/= 0.U || req.uop.robIdx.needFlush(io.redirect)) {
24141d8d239Shappy-lx        // rob commits the unaligned load or handled the exception, reset all state
24241d8d239Shappy-lx        bufferState := s_idle
24341d8d239Shappy-lx        req_valid := false.B
24441d8d239Shappy-lx        curPtr := 0.U
24541d8d239Shappy-lx        unSentLoads := 0.U
24641d8d239Shappy-lx        globalException := false.B
24741d8d239Shappy-lx        globalMMIO := false.B
24841d8d239Shappy-lx      }
24941d8d239Shappy-lx    }
25041d8d239Shappy-lx  }
25141d8d239Shappy-lx
25241d8d239Shappy-lx  val highAddress = LookupTree(req.uop.fuOpType(1, 0), List(
25341d8d239Shappy-lx    LB -> 0.U,
25441d8d239Shappy-lx    LH -> 1.U,
25541d8d239Shappy-lx    LW -> 3.U,
25641d8d239Shappy-lx    LD -> 7.U
25741d8d239Shappy-lx  )) + req.vaddr(4, 0)
25841d8d239Shappy-lx  // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region
25941d8d239Shappy-lx  val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4))
26041d8d239Shappy-lx  val aligned16BytesAddr   = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U)
26141d8d239Shappy-lx  val aligned16BytesSel    = req.vaddr(3, 0)
26241d8d239Shappy-lx
26341d8d239Shappy-lx  // meta of 128 bit load
26441d8d239Shappy-lx  val new128Load = WireInit(0.U.asTypeOf(new LsPipelineBundle))
26541d8d239Shappy-lx  // meta of split loads
26641d8d239Shappy-lx  val lowAddrLoad  = WireInit(0.U.asTypeOf(new LsPipelineBundle))
26741d8d239Shappy-lx  val highAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle))
26841d8d239Shappy-lx  val lowResultShift = RegInit(0.U(3.W)) // how many bytes should we shift right when got result
26941d8d239Shappy-lx  val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from result
27041d8d239Shappy-lx  val highResultShift = RegInit(0.U(3.W))
27141d8d239Shappy-lx  val highResultWidth = RegInit(0.U(3.W))
27241d8d239Shappy-lx
27341d8d239Shappy-lx  when (bufferState === s_split) {
27441d8d239Shappy-lx    when (!cross16BytesBoundary) {
27541d8d239Shappy-lx      // change this unaligned load into a 128 bits load
27641d8d239Shappy-lx      unSentLoads := 1.U
27741d8d239Shappy-lx      curPtr := 0.U
27841d8d239Shappy-lx      new128Load.vaddr := aligned16BytesAddr
279*9abad712SHaoyuan Feng      new128Load.fullva := req.fullva
28041d8d239Shappy-lx      // new128Load.mask  := (getMask(req.uop.fuOpType(1, 0)) << aligned16BytesSel).asUInt
28141d8d239Shappy-lx      new128Load.mask  := 0xffff.U
28241d8d239Shappy-lx      new128Load.uop   := req.uop
28341d8d239Shappy-lx      new128Load.uop.exceptionVec(loadAddrMisaligned) := false.B
28441d8d239Shappy-lx      new128Load.is128bit := true.B
28541d8d239Shappy-lx      splitLoadReqs(0) := new128Load
28641d8d239Shappy-lx    } .otherwise {
28741d8d239Shappy-lx      // split this unaligned load into `maxSplitNum` aligned loads
28841d8d239Shappy-lx      unSentLoads := Fill(maxSplitNum, 1.U(1.W))
28941d8d239Shappy-lx      curPtr := 0.U
29041d8d239Shappy-lx      lowAddrLoad.uop := req.uop
29141d8d239Shappy-lx      lowAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B
292*9abad712SHaoyuan Feng      lowAddrLoad.fullva := req.fullva
29341d8d239Shappy-lx      highAddrLoad.uop := req.uop
29441d8d239Shappy-lx      highAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B
295*9abad712SHaoyuan Feng      highAddrLoad.fullva := req.fullva
29641d8d239Shappy-lx
29741d8d239Shappy-lx      switch (req.uop.fuOpType(1, 0)) {
29841d8d239Shappy-lx        is (LB) {
29941d8d239Shappy-lx          assert(false.B, "lb should not trigger miss align")
30041d8d239Shappy-lx        }
30141d8d239Shappy-lx
30241d8d239Shappy-lx        is (LH) {
30341d8d239Shappy-lx          lowAddrLoad.uop.fuOpType := LB
30441d8d239Shappy-lx          lowAddrLoad.vaddr := req.vaddr
30541d8d239Shappy-lx          lowAddrLoad.mask  := 0x1.U << lowAddrLoad.vaddr(3, 0)
30641d8d239Shappy-lx          lowResultShift    := BYTE0
30741d8d239Shappy-lx          lowResultWidth    := BYTE1
30841d8d239Shappy-lx
30941d8d239Shappy-lx          highAddrLoad.uop.fuOpType := LB
31041d8d239Shappy-lx          highAddrLoad.vaddr := req.vaddr + 1.U
31141d8d239Shappy-lx          highAddrLoad.mask  := 0x1.U << highAddrLoad.vaddr(3, 0)
31241d8d239Shappy-lx          highResultShift    := BYTE0
31341d8d239Shappy-lx          highResultWidth    := BYTE1
31441d8d239Shappy-lx        }
31541d8d239Shappy-lx
31641d8d239Shappy-lx        is (LW) {
31741d8d239Shappy-lx          switch (req.vaddr(1, 0)) {
31841d8d239Shappy-lx            is ("b00".U) {
31941d8d239Shappy-lx              assert(false.B, "should not trigger miss align")
32041d8d239Shappy-lx            }
32141d8d239Shappy-lx
32241d8d239Shappy-lx            is ("b01".U) {
32341d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LW
32441d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr - 1.U
32541d8d239Shappy-lx              lowAddrLoad.mask  := 0xf.U << lowAddrLoad.vaddr(3, 0)
32641d8d239Shappy-lx              lowResultShift    := BYTE1
32741d8d239Shappy-lx              lowResultWidth    := BYTE3
32841d8d239Shappy-lx
32941d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LB
33041d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 3.U
33141d8d239Shappy-lx              highAddrLoad.mask  := 0x1.U << highAddrLoad.vaddr(3, 0)
33241d8d239Shappy-lx              highResultShift    := BYTE0
33341d8d239Shappy-lx              highResultWidth    := BYTE1
33441d8d239Shappy-lx            }
33541d8d239Shappy-lx
33641d8d239Shappy-lx            is ("b10".U) {
33741d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LH
33841d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr
33941d8d239Shappy-lx              lowAddrLoad.mask  := 0x3.U << lowAddrLoad.vaddr(3, 0)
34041d8d239Shappy-lx              lowResultShift    := BYTE0
34141d8d239Shappy-lx              lowResultWidth    := BYTE2
34241d8d239Shappy-lx
34341d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LH
34441d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 2.U
34541d8d239Shappy-lx              highAddrLoad.mask  := 0x3.U << highAddrLoad.vaddr(3, 0)
34641d8d239Shappy-lx              highResultShift    := BYTE0
34741d8d239Shappy-lx              highResultWidth    := BYTE2
34841d8d239Shappy-lx            }
34941d8d239Shappy-lx
35041d8d239Shappy-lx            is ("b11".U) {
35141d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LB
35241d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr
35341d8d239Shappy-lx              lowAddrLoad.mask  := 0x1.U << lowAddrLoad.vaddr(3, 0)
35441d8d239Shappy-lx              lowResultShift    := BYTE0
35541d8d239Shappy-lx              lowResultWidth    := BYTE1
35641d8d239Shappy-lx
35741d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LW
35841d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 1.U
35941d8d239Shappy-lx              highAddrLoad.mask  := 0xf.U << highAddrLoad.vaddr(3, 0)
36041d8d239Shappy-lx              highResultShift    := BYTE0
36141d8d239Shappy-lx              highResultWidth    := BYTE3
36241d8d239Shappy-lx            }
36341d8d239Shappy-lx          }
36441d8d239Shappy-lx        }
36541d8d239Shappy-lx
36641d8d239Shappy-lx        is (LD) {
36741d8d239Shappy-lx          switch (req.vaddr(2, 0)) {
36841d8d239Shappy-lx            is ("b000".U) {
36941d8d239Shappy-lx              assert(false.B, "should not trigger miss align")
37041d8d239Shappy-lx            }
37141d8d239Shappy-lx
37241d8d239Shappy-lx            is ("b001".U) {
37341d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LD
37441d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr - 1.U
37541d8d239Shappy-lx              lowAddrLoad.mask  := 0xff.U << lowAddrLoad.vaddr(3, 0)
37641d8d239Shappy-lx              lowResultShift    := BYTE1
37741d8d239Shappy-lx              lowResultWidth    := BYTE7
37841d8d239Shappy-lx
37941d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LB
38041d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 7.U
38141d8d239Shappy-lx              highAddrLoad.mask  := 0x1.U << highAddrLoad.vaddr(3, 0)
38241d8d239Shappy-lx              highResultShift    := BYTE0
38341d8d239Shappy-lx              highResultWidth    := BYTE1
38441d8d239Shappy-lx            }
38541d8d239Shappy-lx
38641d8d239Shappy-lx            is ("b010".U) {
38741d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LD
38841d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr - 2.U
38941d8d239Shappy-lx              lowAddrLoad.mask  := 0xff.U << lowAddrLoad.vaddr(3, 0)
39041d8d239Shappy-lx              lowResultShift    := BYTE2
39141d8d239Shappy-lx              lowResultWidth    := BYTE6
39241d8d239Shappy-lx
39341d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LH
39441d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 6.U
39541d8d239Shappy-lx              highAddrLoad.mask  := 0x3.U << highAddrLoad.vaddr(3, 0)
39641d8d239Shappy-lx              highResultShift    := BYTE0
39741d8d239Shappy-lx              highResultWidth    := BYTE2
39841d8d239Shappy-lx            }
39941d8d239Shappy-lx
40041d8d239Shappy-lx            is ("b011".U) {
40141d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LD
40241d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr - 3.U
40341d8d239Shappy-lx              lowAddrLoad.mask  := 0xff.U << lowAddrLoad.vaddr(3, 0)
40441d8d239Shappy-lx              lowResultShift    := BYTE3
40541d8d239Shappy-lx              lowResultWidth    := BYTE5
40641d8d239Shappy-lx
40741d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LW
40841d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 5.U
40941d8d239Shappy-lx              highAddrLoad.mask  := 0xf.U << highAddrLoad.vaddr(3, 0)
41041d8d239Shappy-lx              highResultShift    := BYTE0
41141d8d239Shappy-lx              highResultWidth    := BYTE3
41241d8d239Shappy-lx            }
41341d8d239Shappy-lx
41441d8d239Shappy-lx            is ("b100".U) {
41541d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LW
41641d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr
41741d8d239Shappy-lx              lowAddrLoad.mask  := 0xf.U << lowAddrLoad.vaddr(3, 0)
41841d8d239Shappy-lx              lowResultShift    := BYTE0
41941d8d239Shappy-lx              lowResultWidth    := BYTE4
42041d8d239Shappy-lx
42141d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LW
42241d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 4.U
42341d8d239Shappy-lx              highAddrLoad.mask  := 0xf.U << highAddrLoad.vaddr(3, 0)
42441d8d239Shappy-lx              highResultShift    := BYTE0
42541d8d239Shappy-lx              highResultWidth    := BYTE4
42641d8d239Shappy-lx            }
42741d8d239Shappy-lx
42841d8d239Shappy-lx            is ("b101".U) {
42941d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LW
43041d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr - 1.U
43141d8d239Shappy-lx              lowAddrLoad.mask  := 0xf.U << lowAddrLoad.vaddr(3, 0)
43241d8d239Shappy-lx              lowResultShift    := BYTE1
43341d8d239Shappy-lx              lowResultWidth    := BYTE3
43441d8d239Shappy-lx
43541d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LD
43641d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 3.U
43741d8d239Shappy-lx              highAddrLoad.mask  := 0xff.U << highAddrLoad.vaddr(3, 0)
43841d8d239Shappy-lx              highResultShift    := BYTE0
43941d8d239Shappy-lx              highResultWidth    := BYTE5
44041d8d239Shappy-lx            }
44141d8d239Shappy-lx
44241d8d239Shappy-lx            is ("b110".U) {
44341d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LH
44441d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr
44541d8d239Shappy-lx              lowAddrLoad.mask  := 0x3.U << lowAddrLoad.vaddr(3, 0)
44641d8d239Shappy-lx              lowResultShift    := BYTE0
44741d8d239Shappy-lx              lowResultWidth    := BYTE2
44841d8d239Shappy-lx
44941d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LD
45041d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 2.U
45141d8d239Shappy-lx              highAddrLoad.mask  := 0xff.U << highAddrLoad.vaddr(3, 0)
45241d8d239Shappy-lx              highResultShift    := BYTE0
45341d8d239Shappy-lx              highResultWidth    := BYTE6
45441d8d239Shappy-lx            }
45541d8d239Shappy-lx
45641d8d239Shappy-lx            is ("b111".U) {
45741d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LB
45841d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr
45941d8d239Shappy-lx              lowAddrLoad.mask  := 0x1.U << lowAddrLoad.vaddr(3, 0)
46041d8d239Shappy-lx              lowResultShift    := BYTE0
46141d8d239Shappy-lx              lowResultWidth    := BYTE1
46241d8d239Shappy-lx
46341d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LD
46441d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 1.U
46541d8d239Shappy-lx              highAddrLoad.mask  := 0xff.U << highAddrLoad.vaddr(3, 0)
46641d8d239Shappy-lx              highResultShift    := BYTE0
46741d8d239Shappy-lx              highResultWidth    := BYTE7
46841d8d239Shappy-lx            }
46941d8d239Shappy-lx          }
47041d8d239Shappy-lx        }
47141d8d239Shappy-lx      }
47241d8d239Shappy-lx
47341d8d239Shappy-lx      splitLoadReqs(0) := lowAddrLoad
47441d8d239Shappy-lx      splitLoadReqs(1) := highAddrLoad
47541d8d239Shappy-lx    }
47641d8d239Shappy-lx  }
47741d8d239Shappy-lx
47841d8d239Shappy-lx  io.splitLoadReq.valid := req_valid && (bufferState === s_req)
47941d8d239Shappy-lx  io.splitLoadReq.bits  := splitLoadReqs(curPtr)
48041d8d239Shappy-lx
48141d8d239Shappy-lx  when (io.splitLoadResp.valid) {
48241d8d239Shappy-lx    splitLoadResp(curPtr) := io.splitLoadResp.bits
48341d8d239Shappy-lx    when (isMMIO) {
48441d8d239Shappy-lx      unSentLoads := 0.U
48541d8d239Shappy-lx      splitLoadResp(curPtr).uop.exceptionVec := 0.U.asTypeOf(ExceptionVec())
48641d8d239Shappy-lx      // delegate to software
48741d8d239Shappy-lx      splitLoadResp(curPtr).uop.exceptionVec(loadAddrMisaligned) := true.B
48841d8d239Shappy-lx    } .elsewhen (hasException) {
48941d8d239Shappy-lx      unSentLoads := 0.U
49041d8d239Shappy-lx    } .elsewhen (!io.splitLoadResp.bits.rep_info.need_rep) {
49141d8d239Shappy-lx      unSentLoads := unSentLoads & ~UIntToOH(curPtr)
49241d8d239Shappy-lx      curPtr := curPtr + 1.U
49341d8d239Shappy-lx    }
49441d8d239Shappy-lx  }
49541d8d239Shappy-lx
49641d8d239Shappy-lx  val combinedData = RegInit(0.U(XLEN.W))
49741d8d239Shappy-lx
49841d8d239Shappy-lx  when (bufferState === s_comb) {
49941d8d239Shappy-lx    when (!cross16BytesBoundary) {
50041d8d239Shappy-lx      val shiftData = LookupTree(aligned16BytesSel, List(
50141d8d239Shappy-lx        "b0000".U -> splitLoadResp(0).data(63,     0),
50241d8d239Shappy-lx        "b0001".U -> splitLoadResp(0).data(71,     8),
50341d8d239Shappy-lx        "b0010".U -> splitLoadResp(0).data(79,    16),
50441d8d239Shappy-lx        "b0011".U -> splitLoadResp(0).data(87,    24),
50541d8d239Shappy-lx        "b0100".U -> splitLoadResp(0).data(95,    32),
50641d8d239Shappy-lx        "b0101".U -> splitLoadResp(0).data(103,   40),
50741d8d239Shappy-lx        "b0110".U -> splitLoadResp(0).data(111,   48),
50841d8d239Shappy-lx        "b0111".U -> splitLoadResp(0).data(119,   56),
50941d8d239Shappy-lx        "b1000".U -> splitLoadResp(0).data(127,   64),
51041d8d239Shappy-lx        "b1001".U -> splitLoadResp(0).data(127,   72),
51141d8d239Shappy-lx        "b1010".U -> splitLoadResp(0).data(127,   80),
51241d8d239Shappy-lx        "b1011".U -> splitLoadResp(0).data(127,   88),
51341d8d239Shappy-lx        "b1100".U -> splitLoadResp(0).data(127,   96),
51441d8d239Shappy-lx        "b1101".U -> splitLoadResp(0).data(127,  104),
51541d8d239Shappy-lx        "b1110".U -> splitLoadResp(0).data(127,  112),
51641d8d239Shappy-lx        "b1111".U -> splitLoadResp(0).data(127,  120)
51741d8d239Shappy-lx      ))
51841d8d239Shappy-lx      val truncateData = LookupTree(req.uop.fuOpType(1, 0), List(
51941d8d239Shappy-lx        LB -> shiftData(7,  0), // lb
52041d8d239Shappy-lx        LH -> shiftData(15, 0), // lh
52141d8d239Shappy-lx        LW -> shiftData(31, 0), // lw
52241d8d239Shappy-lx        LD -> shiftData(63, 0)  // ld
52341d8d239Shappy-lx      ))
52441d8d239Shappy-lx      combinedData := rdataHelper(req.uop, truncateData(XLEN - 1, 0))
52541d8d239Shappy-lx    } .otherwise {
52641d8d239Shappy-lx      val lowAddrResult = getShiftAndTruncateData(lowResultShift, lowResultWidth, splitLoadResp(0).data)
52741d8d239Shappy-lx                            .asTypeOf(Vec(XLEN / 8, UInt(8.W)))
52841d8d239Shappy-lx      val highAddrResult = getShiftAndTruncateData(highResultShift, highResultWidth, splitLoadResp(1).data)
52941d8d239Shappy-lx                            .asTypeOf(Vec(XLEN / 8, UInt(8.W)))
53041d8d239Shappy-lx      val catResult = Wire(Vec(XLEN / 8, UInt(8.W)))
53141d8d239Shappy-lx      (0 until XLEN / 8) .map {
53241d8d239Shappy-lx        case i => {
53341d8d239Shappy-lx          when (i.U < lowResultWidth) {
53441d8d239Shappy-lx            catResult(i) := lowAddrResult(i)
53541d8d239Shappy-lx          } .otherwise {
53641d8d239Shappy-lx            catResult(i) := highAddrResult(i.U - lowResultWidth)
53741d8d239Shappy-lx          }
53841d8d239Shappy-lx        }
53941d8d239Shappy-lx      }
54041d8d239Shappy-lx      combinedData := rdataHelper(req.uop, (catResult.asUInt)(XLEN - 1, 0))
54141d8d239Shappy-lx    }
54241d8d239Shappy-lx  }
54341d8d239Shappy-lx
54441d8d239Shappy-lx  io.writeBack.valid := req_valid && (bufferState === s_wb)
54541d8d239Shappy-lx  io.writeBack.bits.uop := req.uop
54641d8d239Shappy-lx  io.writeBack.bits.uop.exceptionVec := Mux(
54741d8d239Shappy-lx    globalMMIO || globalException,
54841d8d239Shappy-lx    splitLoadResp(curPtr).uop.exceptionVec,
54941d8d239Shappy-lx    0.U.asTypeOf(ExceptionVec()) // TODO: is this ok?
55041d8d239Shappy-lx  )
55141d8d239Shappy-lx  io.writeBack.bits.uop.flushPipe := Mux(globalMMIO || globalException, false.B, true.B)
55241d8d239Shappy-lx  io.writeBack.bits.uop.replayInst := false.B
55341d8d239Shappy-lx  io.writeBack.bits.data := combinedData
55441d8d239Shappy-lx  io.writeBack.bits.debug.isMMIO := globalMMIO
55541d8d239Shappy-lx  io.writeBack.bits.debug.isPerfCnt := false.B
55641d8d239Shappy-lx  io.writeBack.bits.debug.paddr := req.paddr
55741d8d239Shappy-lx  io.writeBack.bits.debug.vaddr := req.vaddr
55841d8d239Shappy-lx
55941d8d239Shappy-lx  val flush = req_valid && req.uop.robIdx.needFlush(io.redirect)
56041d8d239Shappy-lx
56141d8d239Shappy-lx  when (flush && (bufferState =/= s_idle)) {
56241d8d239Shappy-lx    bufferState := s_idle
56341d8d239Shappy-lx    req_valid := false.B
56441d8d239Shappy-lx    curPtr := 0.U
56541d8d239Shappy-lx    unSentLoads := 0.U
56641d8d239Shappy-lx    globalException := false.B
56741d8d239Shappy-lx    globalMMIO := false.B
56841d8d239Shappy-lx  }
56941d8d239Shappy-lx
57041d8d239Shappy-lx  // NOTE: spectial case (unaligned load cross page, page fault happens in next page)
57141d8d239Shappy-lx  // if exception happens in the higher page address part, overwrite the loadExceptionBuffer vaddr
572*9abad712SHaoyuan Feng  val overwriteExpBuf = GatedValidRegNext(req_valid && globalException)
573*9abad712SHaoyuan Feng  val overwriteVaddr = GatedRegNext(Mux(
574*9abad712SHaoyuan Feng    cross16BytesBoundary && (curPtr === 1.U),
575*9abad712SHaoyuan Feng    splitLoadResp(curPtr).vaddr,
576*9abad712SHaoyuan Feng    splitLoadResp(curPtr).fullva))
57746e9ee74SHaoyuan Feng  val overwriteIsHyper = GatedRegNext(splitLoadResp(curPtr).isHyper)
578a53daa0fSHaoyuan Feng  val overwriteGpaddr = GatedRegNext(splitLoadResp(curPtr).gpaddr)
579ad415ae0SXiaokun-Pei  val overwriteIsForVSnonLeafPTE = GatedRegNext(splitLoadResp(curPtr).isForVSnonLeafPTE)
58041d8d239Shappy-lx
58141d8d239Shappy-lx  io.overwriteExpBuf.valid := overwriteExpBuf
582a53daa0fSHaoyuan Feng  io.overwriteExpBuf.vaddr := overwriteVaddr
58346e9ee74SHaoyuan Feng  io.overwriteExpBuf.isHyper := overwriteIsHyper
584a53daa0fSHaoyuan Feng  io.overwriteExpBuf.gpaddr := overwriteGpaddr
585ad415ae0SXiaokun-Pei  io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE
58641d8d239Shappy-lx
58741d8d239Shappy-lx  // when no exception or mmio, flush loadExceptionBuffer at s_wb
58841d8d239Shappy-lx  val flushLdExpBuff = GatedValidRegNext(req_valid && (bufferState === s_wb) && !(globalMMIO || globalException))
58941d8d239Shappy-lx  io.flushLdExpBuff := flushLdExpBuff
59041d8d239Shappy-lx
59141d8d239Shappy-lx  XSPerfAccumulate("alloc",                  RegNext(!req_valid) && req_valid)
59241d8d239Shappy-lx  XSPerfAccumulate("flush",                  flush)
59341d8d239Shappy-lx  XSPerfAccumulate("flush_idle",             flush && (bufferState === s_idle))
59441d8d239Shappy-lx  XSPerfAccumulate("flush_non_idle",         flush && (bufferState =/= s_idle))
59541d8d239Shappy-lx}