xref: /XiangShan/src/main/scala/xiangshan/mem/lsqueue/LoadMisalignBuffer.scala (revision 35bb77967d8f8147bbe08e4cf9ecb32a8f912c9d)
141d8d239Shappy-lx/***************************************************************************************
241d8d239Shappy-lx* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
341d8d239Shappy-lx* Copyright (c) 2020-2021 Peng Cheng Laboratory
441d8d239Shappy-lx*
541d8d239Shappy-lx* XiangShan is licensed under Mulan PSL v2.
641d8d239Shappy-lx* You can use this software according to the terms and conditions of the Mulan PSL v2.
741d8d239Shappy-lx* You may obtain a copy of Mulan PSL v2 at:
841d8d239Shappy-lx*          http://license.coscl.org.cn/MulanPSL2
941d8d239Shappy-lx*
1041d8d239Shappy-lx* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
1141d8d239Shappy-lx* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
1241d8d239Shappy-lx* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
1341d8d239Shappy-lx*
1441d8d239Shappy-lx* See the Mulan PSL v2 for more details.
1541d8d239Shappy-lx***************************************************************************************/
1641d8d239Shappy-lx
1741d8d239Shappy-lxpackage xiangshan.mem
1841d8d239Shappy-lx
1941d8d239Shappy-lximport org.chipsalliance.cde.config.Parameters
2041d8d239Shappy-lximport chisel3._
2141d8d239Shappy-lximport chisel3.util._
2241d8d239Shappy-lximport utils._
2341d8d239Shappy-lximport utility._
2441d8d239Shappy-lximport xiangshan._
259e12e8edScz4eimport xiangshan.ExceptionNO._
269e12e8edScz4eimport xiangshan.frontend.FtqPtr
2741d8d239Shappy-lximport xiangshan.backend.fu.FuConfig._
28e7ab4635SHuijin Liimport xiangshan.backend.fu.FuType
2941d8d239Shappy-lximport xiangshan.backend.fu.fpu.FPU
3041d8d239Shappy-lximport xiangshan.backend.rob.RobLsqIO
319e12e8edScz4eimport xiangshan.mem.Bundles._
3241d8d239Shappy-lximport xiangshan.backend.rob.RobPtr
3341d8d239Shappy-lximport xiangshan.backend.Bundles.{MemExuOutput, DynInst}
34282dd18cSsfencevmaimport xiangshan.backend.fu.FuConfig.LduCfg
359e12e8edScz4eimport xiangshan.cache.mmu.HasTlbConst
369e12e8edScz4eimport xiangshan.cache._
379e12e8edScz4eimport xiangshan.cache.wpu.ReplayCarry
3841d8d239Shappy-lx
3941d8d239Shappy-lxclass LoadMisalignBuffer(implicit p: Parameters) extends XSModule
4041d8d239Shappy-lx  with HasCircularQueuePtrHelper
4141d8d239Shappy-lx  with HasLoadHelper
4221f3709aShappy-lx  with HasTlbConst
4341d8d239Shappy-lx{
4441d8d239Shappy-lx  private val enqPortNum = LoadPipelineWidth
4541d8d239Shappy-lx  private val maxSplitNum = 2
4641d8d239Shappy-lx
4741d8d239Shappy-lx  require(maxSplitNum == 2)
4841d8d239Shappy-lx
4941d8d239Shappy-lx  private val LB = "b00".U(2.W)
5041d8d239Shappy-lx  private val LH = "b01".U(2.W)
5141d8d239Shappy-lx  private val LW = "b10".U(2.W)
5241d8d239Shappy-lx  private val LD = "b11".U(2.W)
5341d8d239Shappy-lx
5441d8d239Shappy-lx  // encode of how many bytes to shift or truncate
5541d8d239Shappy-lx  private val BYTE0 = "b000".U(3.W)
5641d8d239Shappy-lx  private val BYTE1 = "b001".U(3.W)
5741d8d239Shappy-lx  private val BYTE2 = "b010".U(3.W)
5841d8d239Shappy-lx  private val BYTE3 = "b011".U(3.W)
5941d8d239Shappy-lx  private val BYTE4 = "b100".U(3.W)
6041d8d239Shappy-lx  private val BYTE5 = "b101".U(3.W)
6141d8d239Shappy-lx  private val BYTE6 = "b110".U(3.W)
6241d8d239Shappy-lx  private val BYTE7 = "b111".U(3.W)
6341d8d239Shappy-lx
6441d8d239Shappy-lx  def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List(
6541d8d239Shappy-lx    LB -> 0x1.U, // lb
6641d8d239Shappy-lx    LH -> 0x3.U, // lh
6741d8d239Shappy-lx    LW -> 0xf.U, // lw
6841d8d239Shappy-lx    LD -> 0xff.U  // ld
6941d8d239Shappy-lx  ))
7041d8d239Shappy-lx
7141d8d239Shappy-lx  def getShiftAndTruncateData(shiftEncode: UInt, truncateEncode: UInt, data: UInt) = {
7241d8d239Shappy-lx    val shiftData = LookupTree(shiftEncode, List(
7341d8d239Shappy-lx      BYTE0 -> data(63,    0),
7441d8d239Shappy-lx      BYTE1 -> data(63,    8),
7541d8d239Shappy-lx      BYTE2 -> data(63,   16),
7641d8d239Shappy-lx      BYTE3 -> data(63,   24),
7741d8d239Shappy-lx      BYTE4 -> data(63,   32),
7841d8d239Shappy-lx      BYTE5 -> data(63,   40),
7941d8d239Shappy-lx      BYTE6 -> data(63,   48),
8041d8d239Shappy-lx      BYTE7 -> data(63,   56)
8141d8d239Shappy-lx    ))
8241d8d239Shappy-lx    val truncateData = LookupTree(truncateEncode, List(
8341d8d239Shappy-lx      BYTE0 -> 0.U(XLEN.W), // can not truncate with 0 byte width
8441d8d239Shappy-lx      BYTE1 -> shiftData(7,    0),
8541d8d239Shappy-lx      BYTE2 -> shiftData(15,   0),
8641d8d239Shappy-lx      BYTE3 -> shiftData(23,   0),
8741d8d239Shappy-lx      BYTE4 -> shiftData(31,   0),
8841d8d239Shappy-lx      BYTE5 -> shiftData(39,   0),
8941d8d239Shappy-lx      BYTE6 -> shiftData(47,   0),
9041d8d239Shappy-lx      BYTE7 -> shiftData(55,   0)
9141d8d239Shappy-lx    ))
9241d8d239Shappy-lx    truncateData(XLEN - 1, 0)
9341d8d239Shappy-lx  }
9441d8d239Shappy-lx
9541d8d239Shappy-lx  def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = {
9641d8d239Shappy-lx    assert(valid.length == bits.length)
9741d8d239Shappy-lx    if (valid.length == 0 || valid.length == 1) {
9841d8d239Shappy-lx      (valid, bits)
9941d8d239Shappy-lx    } else if (valid.length == 2) {
10041d8d239Shappy-lx      val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0)))))
10141d8d239Shappy-lx      for (i <- res.indices) {
10241d8d239Shappy-lx        res(i).valid := valid(i)
10341d8d239Shappy-lx        res(i).bits := bits(i)
10441d8d239Shappy-lx      }
10541d8d239Shappy-lx      val oldest = Mux(valid(0) && valid(1),
10641d8d239Shappy-lx        Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) ||
107b240e1c0SAnzooooo          (bits(0).uop.robIdx === bits(1).uop.robIdx && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)),
10841d8d239Shappy-lx        Mux(valid(0) && !valid(1), res(0), res(1)))
10941d8d239Shappy-lx      (Seq(oldest.valid), Seq(oldest.bits))
11041d8d239Shappy-lx    } else {
11141d8d239Shappy-lx      val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2))
11241d8d239Shappy-lx      val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2)))
11341d8d239Shappy-lx      selectOldest(left._1 ++ right._1, left._2 ++ right._2)
11441d8d239Shappy-lx    }
11541d8d239Shappy-lx  }
11641d8d239Shappy-lx
11741d8d239Shappy-lx  val io = IO(new Bundle() {
11841d8d239Shappy-lx    val redirect        = Flipped(Valid(new Redirect))
1194ec1f462Scz4e    val enq             = Vec(enqPortNum, Flipped(new MisalignBufferEnqIO))
12041d8d239Shappy-lx    val rob             = Flipped(new RobLsqIO)
12141d8d239Shappy-lx    val splitLoadReq    = Decoupled(new LsPipelineBundle)
12241d8d239Shappy-lx    val splitLoadResp   = Flipped(Valid(new LqWriteBundle))
12341d8d239Shappy-lx    val writeBack       = Decoupled(new MemExuOutput)
124b240e1c0SAnzooooo    val vecWriteBack    = Decoupled(new VecPipelineFeedbackIO(isVStore = false))
125b240e1c0SAnzooooo    val loadOutValid    = Input(Bool())
126b240e1c0SAnzooooo    val loadVecOutValid = Input(Bool())
12741d8d239Shappy-lx    val overwriteExpBuf = Output(new XSBundle {
12841d8d239Shappy-lx      val valid  = Bool()
129db6cfb5aSHaoyuan Feng      val vaddr  = UInt(XLEN.W)
13046e9ee74SHaoyuan Feng      val isHyper = Bool()
131db6cfb5aSHaoyuan Feng      val gpaddr = UInt(XLEN.W)
132ad415ae0SXiaokun-Pei      val isForVSnonLeafPTE = Bool()
13341d8d239Shappy-lx    })
13441d8d239Shappy-lx    val flushLdExpBuff  = Output(Bool())
135b240e1c0SAnzooooo    val loadMisalignFull = Output(Bool())
13641d8d239Shappy-lx  })
13741d8d239Shappy-lx
13841d8d239Shappy-lx  io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool()))
13941d8d239Shappy-lx  io.rob.uop  := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst))
14041d8d239Shappy-lx
14141d8d239Shappy-lx  val req_valid = RegInit(false.B)
14241d8d239Shappy-lx  val req = Reg(new LqWriteBundle)
14341d8d239Shappy-lx
144b240e1c0SAnzooooo  io.loadMisalignFull := req_valid
14541d8d239Shappy-lx
1464ec1f462Scz4e  (0 until io.enq.length).map{i =>
147b240e1c0SAnzooooo    if (i == 0) {
1484ec1f462Scz4e      io.enq(0).req.ready := !req_valid && io.enq(0).req.valid
149b240e1c0SAnzooooo    }
150b240e1c0SAnzooooo    else {
1514ec1f462Scz4e      io.enq(i).req.ready := !io.enq.take(i).map(_.req.ready).reduce(_ || _) && !req_valid && io.enq(i).req.valid
152b240e1c0SAnzooooo    }
15341d8d239Shappy-lx  }
15441d8d239Shappy-lx
1554ec1f462Scz4e  val select_req_bit   = ParallelPriorityMux(io.enq.map(_.req.valid), io.enq.map(_.req.bits))
1564ec1f462Scz4e  val select_req_valid = io.enq.map(_.req.valid).reduce(_ || _)
157b240e1c0SAnzooooo  val canEnqValid = !req_valid && !select_req_bit.uop.robIdx.needFlush(io.redirect) && select_req_valid
158b240e1c0SAnzooooo  when(canEnqValid) {
159b240e1c0SAnzooooo    req := select_req_bit
160b240e1c0SAnzooooo    req_valid := true.B
16141d8d239Shappy-lx  }
16241d8d239Shappy-lx
16341d8d239Shappy-lx  // buffer control:
164b240e1c0SAnzooooo  //  - s_idle:   idle
165b240e1c0SAnzooooo  //  - s_split:  split misalign laod
166b240e1c0SAnzooooo  //  - s_req:    issue a split memory access request
167b240e1c0SAnzooooo  //  - s_resp:   Responds to a split load access request
168b240e1c0SAnzooooo  //  - s_comb_wakeup_rep: Merge the data and issue a wakeup load
169b240e1c0SAnzooooo  //  - s_wb: writeback yo rob/vecMergeBuffer
170b240e1c0SAnzooooo  val s_idle :: s_split :: s_req :: s_resp :: s_comb_wakeup_rep :: s_wb :: Nil = Enum(6)
17141d8d239Shappy-lx  val bufferState = RegInit(s_idle)
17241d8d239Shappy-lx  val splitLoadReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle))))
17341d8d239Shappy-lx  val splitLoadResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LqWriteBundle))))
174282dd18cSsfencevma  val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec()))
17541d8d239Shappy-lx  val unSentLoads = RegInit(0.U(maxSplitNum.W))
17641d8d239Shappy-lx  val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W))
177b240e1c0SAnzooooo  val needWakeUpReqsWire = Wire(Bool())
178b240e1c0SAnzooooo  val needWakeUpWB       = RegInit(false.B)
179b240e1c0SAnzooooo  val data_select        = RegEnable(genRdataOH(select_req_bit.uop), 0.U(genRdataOH(select_req_bit.uop).getWidth.W), canEnqValid)
18041d8d239Shappy-lx
181*35bb7796SAnzo  // if there is exception or uncache in split load
18241d8d239Shappy-lx  val globalException = RegInit(false.B)
183*35bb7796SAnzo  val globalUncache = RegInit(false.B)
184*35bb7796SAnzo
185*35bb7796SAnzo  // debug info
18641d8d239Shappy-lx  val globalMMIO = RegInit(false.B)
187*35bb7796SAnzo  val globalNC   = RegInit(false.B)
18841d8d239Shappy-lx
189da51a7acSAnzo  val hasException = io.splitLoadResp.bits.vecActive &&
190da51a7acSAnzo    ExceptionNO.selectByFu(io.splitLoadResp.bits.uop.exceptionVec, LduCfg).asUInt.orR || TriggerAction.isDmode(io.splitLoadResp.bits.uop.trigger)
191*35bb7796SAnzo  val isUncache = io.splitLoadResp.bits.mmio || io.splitLoadResp.bits.nc
192b240e1c0SAnzooooo  needWakeUpReqsWire := false.B
19341d8d239Shappy-lx  switch(bufferState) {
19441d8d239Shappy-lx    is (s_idle) {
195b240e1c0SAnzooooo      when (req_valid) {
19641d8d239Shappy-lx        bufferState := s_split
19741d8d239Shappy-lx      }
19841d8d239Shappy-lx    }
19941d8d239Shappy-lx
20041d8d239Shappy-lx    is (s_split) {
20141d8d239Shappy-lx      bufferState := s_req
20241d8d239Shappy-lx    }
20341d8d239Shappy-lx
20441d8d239Shappy-lx    is (s_req) {
20541d8d239Shappy-lx      when (io.splitLoadReq.fire) {
20641d8d239Shappy-lx        bufferState := s_resp
20741d8d239Shappy-lx      }
20841d8d239Shappy-lx    }
20941d8d239Shappy-lx
21041d8d239Shappy-lx    is (s_resp) {
21141d8d239Shappy-lx      when (io.splitLoadResp.valid) {
21241d8d239Shappy-lx        val clearOh = UIntToOH(curPtr)
213*35bb7796SAnzo        when (hasException || isUncache) {
21441d8d239Shappy-lx          // commit directly when exception ocurs
215*35bb7796SAnzo          // if any split load reaches uncache space, delegate to software loadAddrMisaligned exception
21641d8d239Shappy-lx          bufferState := s_wb
21741d8d239Shappy-lx          globalException := hasException
218*35bb7796SAnzo          globalUncache := isUncache
219*35bb7796SAnzo          globalMMIO := io.splitLoadResp.bits.mmio
220*35bb7796SAnzo          globalNC   := io.splitLoadResp.bits.nc
22141d8d239Shappy-lx        } .elsewhen(io.splitLoadResp.bits.rep_info.need_rep || (unSentLoads & ~clearOh).orR) {
22241d8d239Shappy-lx          // need replay or still has unsent requests
22341d8d239Shappy-lx          bufferState := s_req
22441d8d239Shappy-lx        } .otherwise {
22541d8d239Shappy-lx          // merge the split load results
226b240e1c0SAnzooooo          bufferState := s_comb_wakeup_rep
227b240e1c0SAnzooooo          needWakeUpWB := !req.isvec
22841d8d239Shappy-lx        }
22941d8d239Shappy-lx      }
23041d8d239Shappy-lx    }
23141d8d239Shappy-lx
232b240e1c0SAnzooooo    is (s_comb_wakeup_rep) {
233b240e1c0SAnzooooo      when(!req.isvec) {
234b240e1c0SAnzooooo        when(io.splitLoadReq.fire) {
235b240e1c0SAnzooooo          bufferState := s_wb
236b240e1c0SAnzooooo        }.otherwise {
237b240e1c0SAnzooooo          bufferState := s_comb_wakeup_rep
238b240e1c0SAnzooooo        }
239b240e1c0SAnzooooo        needWakeUpReqsWire := true.B
240b240e1c0SAnzooooo      } .otherwise {
24141d8d239Shappy-lx        bufferState := s_wb
24241d8d239Shappy-lx      }
24341d8d239Shappy-lx
24441d8d239Shappy-lx    }
24541d8d239Shappy-lx
246b240e1c0SAnzooooo    is (s_wb) {
247b240e1c0SAnzooooo      when(req.isvec) {
248b240e1c0SAnzooooo        when(io.vecWriteBack.fire) {
24941d8d239Shappy-lx          bufferState := s_idle
25041d8d239Shappy-lx          req_valid := false.B
25141d8d239Shappy-lx          curPtr := 0.U
25241d8d239Shappy-lx          unSentLoads := 0.U
25341d8d239Shappy-lx          globalException := false.B
254*35bb7796SAnzo          globalUncache := false.B
255b240e1c0SAnzooooo          needWakeUpWB := false.B
256*35bb7796SAnzo
257*35bb7796SAnzo          globalMMIO := false.B
258*35bb7796SAnzo          globalNC   := false.B
25941d8d239Shappy-lx        }
260b240e1c0SAnzooooo
261b240e1c0SAnzooooo      } .otherwise {
262b240e1c0SAnzooooo        when(io.writeBack.fire) {
263b240e1c0SAnzooooo          bufferState := s_idle
264b240e1c0SAnzooooo          req_valid := false.B
265b240e1c0SAnzooooo          curPtr := 0.U
266b240e1c0SAnzooooo          unSentLoads := 0.U
267b240e1c0SAnzooooo          globalException := false.B
268*35bb7796SAnzo          globalUncache := false.B
269b240e1c0SAnzooooo          needWakeUpWB := false.B
270*35bb7796SAnzo
271*35bb7796SAnzo          globalMMIO := false.B
272*35bb7796SAnzo          globalNC   := false.B
27341d8d239Shappy-lx        }
27441d8d239Shappy-lx      }
27541d8d239Shappy-lx
276b240e1c0SAnzooooo    }
277b240e1c0SAnzooooo  }
278b240e1c0SAnzooooo
279b240e1c0SAnzooooo  val alignedType = Mux(req.isvec, req.alignedType(1,0), req.uop.fuOpType(1, 0))
280b240e1c0SAnzooooo  val highAddress = LookupTree(alignedType, List(
28141d8d239Shappy-lx    LB -> 0.U,
28241d8d239Shappy-lx    LH -> 1.U,
28341d8d239Shappy-lx    LW -> 3.U,
28441d8d239Shappy-lx    LD -> 7.U
28541d8d239Shappy-lx  )) + req.vaddr(4, 0)
28641d8d239Shappy-lx  // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region
28741d8d239Shappy-lx  val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4))
28841d8d239Shappy-lx  val aligned16BytesAddr   = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U)
28941d8d239Shappy-lx  val aligned16BytesSel    = req.vaddr(3, 0)
29041d8d239Shappy-lx
29141d8d239Shappy-lx  // meta of 128 bit load
29241d8d239Shappy-lx  val new128Load = WireInit(0.U.asTypeOf(new LsPipelineBundle))
29341d8d239Shappy-lx  // meta of split loads
29441d8d239Shappy-lx  val lowAddrLoad  = WireInit(0.U.asTypeOf(new LsPipelineBundle))
29541d8d239Shappy-lx  val highAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle))
29641d8d239Shappy-lx  val lowResultShift = RegInit(0.U(3.W)) // how many bytes should we shift right when got result
29741d8d239Shappy-lx  val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from result
29841d8d239Shappy-lx  val highResultShift = RegInit(0.U(3.W))
29941d8d239Shappy-lx  val highResultWidth = RegInit(0.U(3.W))
30041d8d239Shappy-lx
30141d8d239Shappy-lx  when (bufferState === s_split) {
30241d8d239Shappy-lx    when (!cross16BytesBoundary) {
303b240e1c0SAnzooooo      assert(false.B, s"There should be no non-aligned access that does not cross 16Byte boundaries.")
30441d8d239Shappy-lx    } .otherwise {
30541d8d239Shappy-lx      // split this unaligned load into `maxSplitNum` aligned loads
30641d8d239Shappy-lx      unSentLoads := Fill(maxSplitNum, 1.U(1.W))
30741d8d239Shappy-lx      curPtr := 0.U
30841d8d239Shappy-lx      lowAddrLoad.uop := req.uop
30941d8d239Shappy-lx      lowAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B
3109abad712SHaoyuan Feng      lowAddrLoad.fullva := req.fullva
31141d8d239Shappy-lx      highAddrLoad.uop := req.uop
31241d8d239Shappy-lx      highAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B
3139abad712SHaoyuan Feng      highAddrLoad.fullva := req.fullva
31441d8d239Shappy-lx
315b240e1c0SAnzooooo      switch (alignedType(1, 0)) {
31641d8d239Shappy-lx        is (LB) {
31741d8d239Shappy-lx          assert(false.B, "lb should not trigger miss align")
31841d8d239Shappy-lx        }
31941d8d239Shappy-lx
32041d8d239Shappy-lx        is (LH) {
32141d8d239Shappy-lx          lowAddrLoad.uop.fuOpType := LB
32241d8d239Shappy-lx          lowAddrLoad.vaddr := req.vaddr
32341d8d239Shappy-lx          lowAddrLoad.mask  := 0x1.U << lowAddrLoad.vaddr(3, 0)
32441d8d239Shappy-lx          lowResultShift    := BYTE0
32541d8d239Shappy-lx          lowResultWidth    := BYTE1
32641d8d239Shappy-lx
32741d8d239Shappy-lx          highAddrLoad.uop.fuOpType := LB
32841d8d239Shappy-lx          highAddrLoad.vaddr := req.vaddr + 1.U
32941d8d239Shappy-lx          highAddrLoad.mask  := 0x1.U << highAddrLoad.vaddr(3, 0)
33041d8d239Shappy-lx          highResultShift    := BYTE0
33141d8d239Shappy-lx          highResultWidth    := BYTE1
33241d8d239Shappy-lx        }
33341d8d239Shappy-lx
33441d8d239Shappy-lx        is (LW) {
33541d8d239Shappy-lx          switch (req.vaddr(1, 0)) {
33641d8d239Shappy-lx            is ("b00".U) {
33741d8d239Shappy-lx              assert(false.B, "should not trigger miss align")
33841d8d239Shappy-lx            }
33941d8d239Shappy-lx
34041d8d239Shappy-lx            is ("b01".U) {
34141d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LW
34241d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr - 1.U
34341d8d239Shappy-lx              lowAddrLoad.mask  := 0xf.U << lowAddrLoad.vaddr(3, 0)
34441d8d239Shappy-lx              lowResultShift    := BYTE1
34541d8d239Shappy-lx              lowResultWidth    := BYTE3
34641d8d239Shappy-lx
34741d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LB
34841d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 3.U
34941d8d239Shappy-lx              highAddrLoad.mask  := 0x1.U << highAddrLoad.vaddr(3, 0)
35041d8d239Shappy-lx              highResultShift    := BYTE0
35141d8d239Shappy-lx              highResultWidth    := BYTE1
35241d8d239Shappy-lx            }
35341d8d239Shappy-lx
35441d8d239Shappy-lx            is ("b10".U) {
35541d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LH
35641d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr
35741d8d239Shappy-lx              lowAddrLoad.mask  := 0x3.U << lowAddrLoad.vaddr(3, 0)
35841d8d239Shappy-lx              lowResultShift    := BYTE0
35941d8d239Shappy-lx              lowResultWidth    := BYTE2
36041d8d239Shappy-lx
36141d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LH
36241d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 2.U
36341d8d239Shappy-lx              highAddrLoad.mask  := 0x3.U << highAddrLoad.vaddr(3, 0)
36441d8d239Shappy-lx              highResultShift    := BYTE0
36541d8d239Shappy-lx              highResultWidth    := BYTE2
36641d8d239Shappy-lx            }
36741d8d239Shappy-lx
36841d8d239Shappy-lx            is ("b11".U) {
36941d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LB
37041d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr
37141d8d239Shappy-lx              lowAddrLoad.mask  := 0x1.U << lowAddrLoad.vaddr(3, 0)
37241d8d239Shappy-lx              lowResultShift    := BYTE0
37341d8d239Shappy-lx              lowResultWidth    := BYTE1
37441d8d239Shappy-lx
37541d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LW
37641d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 1.U
37741d8d239Shappy-lx              highAddrLoad.mask  := 0xf.U << highAddrLoad.vaddr(3, 0)
37841d8d239Shappy-lx              highResultShift    := BYTE0
37941d8d239Shappy-lx              highResultWidth    := BYTE3
38041d8d239Shappy-lx            }
38141d8d239Shappy-lx          }
38241d8d239Shappy-lx        }
38341d8d239Shappy-lx
38441d8d239Shappy-lx        is (LD) {
38541d8d239Shappy-lx          switch (req.vaddr(2, 0)) {
38641d8d239Shappy-lx            is ("b000".U) {
38741d8d239Shappy-lx              assert(false.B, "should not trigger miss align")
38841d8d239Shappy-lx            }
38941d8d239Shappy-lx
39041d8d239Shappy-lx            is ("b001".U) {
39141d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LD
39241d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr - 1.U
39341d8d239Shappy-lx              lowAddrLoad.mask  := 0xff.U << lowAddrLoad.vaddr(3, 0)
39441d8d239Shappy-lx              lowResultShift    := BYTE1
39541d8d239Shappy-lx              lowResultWidth    := BYTE7
39641d8d239Shappy-lx
39741d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LB
39841d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 7.U
39941d8d239Shappy-lx              highAddrLoad.mask  := 0x1.U << highAddrLoad.vaddr(3, 0)
40041d8d239Shappy-lx              highResultShift    := BYTE0
40141d8d239Shappy-lx              highResultWidth    := BYTE1
40241d8d239Shappy-lx            }
40341d8d239Shappy-lx
40441d8d239Shappy-lx            is ("b010".U) {
40541d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LD
40641d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr - 2.U
40741d8d239Shappy-lx              lowAddrLoad.mask  := 0xff.U << lowAddrLoad.vaddr(3, 0)
40841d8d239Shappy-lx              lowResultShift    := BYTE2
40941d8d239Shappy-lx              lowResultWidth    := BYTE6
41041d8d239Shappy-lx
41141d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LH
41241d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 6.U
41341d8d239Shappy-lx              highAddrLoad.mask  := 0x3.U << highAddrLoad.vaddr(3, 0)
41441d8d239Shappy-lx              highResultShift    := BYTE0
41541d8d239Shappy-lx              highResultWidth    := BYTE2
41641d8d239Shappy-lx            }
41741d8d239Shappy-lx
41841d8d239Shappy-lx            is ("b011".U) {
41941d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LD
42041d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr - 3.U
42141d8d239Shappy-lx              lowAddrLoad.mask  := 0xff.U << lowAddrLoad.vaddr(3, 0)
42241d8d239Shappy-lx              lowResultShift    := BYTE3
42341d8d239Shappy-lx              lowResultWidth    := BYTE5
42441d8d239Shappy-lx
42541d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LW
42641d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 5.U
42741d8d239Shappy-lx              highAddrLoad.mask  := 0xf.U << highAddrLoad.vaddr(3, 0)
42841d8d239Shappy-lx              highResultShift    := BYTE0
42941d8d239Shappy-lx              highResultWidth    := BYTE3
43041d8d239Shappy-lx            }
43141d8d239Shappy-lx
43241d8d239Shappy-lx            is ("b100".U) {
43341d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LW
43441d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr
43541d8d239Shappy-lx              lowAddrLoad.mask  := 0xf.U << lowAddrLoad.vaddr(3, 0)
43641d8d239Shappy-lx              lowResultShift    := BYTE0
43741d8d239Shappy-lx              lowResultWidth    := BYTE4
43841d8d239Shappy-lx
43941d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LW
44041d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 4.U
44141d8d239Shappy-lx              highAddrLoad.mask  := 0xf.U << highAddrLoad.vaddr(3, 0)
44241d8d239Shappy-lx              highResultShift    := BYTE0
44341d8d239Shappy-lx              highResultWidth    := BYTE4
44441d8d239Shappy-lx            }
44541d8d239Shappy-lx
44641d8d239Shappy-lx            is ("b101".U) {
44741d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LW
44841d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr - 1.U
44941d8d239Shappy-lx              lowAddrLoad.mask  := 0xf.U << lowAddrLoad.vaddr(3, 0)
45041d8d239Shappy-lx              lowResultShift    := BYTE1
45141d8d239Shappy-lx              lowResultWidth    := BYTE3
45241d8d239Shappy-lx
45341d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LD
45441d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 3.U
45541d8d239Shappy-lx              highAddrLoad.mask  := 0xff.U << highAddrLoad.vaddr(3, 0)
45641d8d239Shappy-lx              highResultShift    := BYTE0
45741d8d239Shappy-lx              highResultWidth    := BYTE5
45841d8d239Shappy-lx            }
45941d8d239Shappy-lx
46041d8d239Shappy-lx            is ("b110".U) {
46141d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LH
46241d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr
46341d8d239Shappy-lx              lowAddrLoad.mask  := 0x3.U << lowAddrLoad.vaddr(3, 0)
46441d8d239Shappy-lx              lowResultShift    := BYTE0
46541d8d239Shappy-lx              lowResultWidth    := BYTE2
46641d8d239Shappy-lx
46741d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LD
46841d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 2.U
46941d8d239Shappy-lx              highAddrLoad.mask  := 0xff.U << highAddrLoad.vaddr(3, 0)
47041d8d239Shappy-lx              highResultShift    := BYTE0
47141d8d239Shappy-lx              highResultWidth    := BYTE6
47241d8d239Shappy-lx            }
47341d8d239Shappy-lx
47441d8d239Shappy-lx            is ("b111".U) {
47541d8d239Shappy-lx              lowAddrLoad.uop.fuOpType := LB
47641d8d239Shappy-lx              lowAddrLoad.vaddr := req.vaddr
47741d8d239Shappy-lx              lowAddrLoad.mask  := 0x1.U << lowAddrLoad.vaddr(3, 0)
47841d8d239Shappy-lx              lowResultShift    := BYTE0
47941d8d239Shappy-lx              lowResultWidth    := BYTE1
48041d8d239Shappy-lx
48141d8d239Shappy-lx              highAddrLoad.uop.fuOpType := LD
48241d8d239Shappy-lx              highAddrLoad.vaddr := req.vaddr + 1.U
48341d8d239Shappy-lx              highAddrLoad.mask  := 0xff.U << highAddrLoad.vaddr(3, 0)
48441d8d239Shappy-lx              highResultShift    := BYTE0
48541d8d239Shappy-lx              highResultWidth    := BYTE7
48641d8d239Shappy-lx            }
48741d8d239Shappy-lx          }
48841d8d239Shappy-lx        }
48941d8d239Shappy-lx      }
49041d8d239Shappy-lx
49141d8d239Shappy-lx      splitLoadReqs(0) := lowAddrLoad
49241d8d239Shappy-lx      splitLoadReqs(1) := highAddrLoad
49341d8d239Shappy-lx    }
494282dd18cSsfencevma    exceptionVec := 0.U.asTypeOf(exceptionVec.cloneType)
49541d8d239Shappy-lx  }
49641d8d239Shappy-lx
497b240e1c0SAnzooooo  io.splitLoadReq.valid := req_valid && (bufferState === s_req || bufferState === s_comb_wakeup_rep && needWakeUpReqsWire && !req.isvec)
49841d8d239Shappy-lx  io.splitLoadReq.bits  := splitLoadReqs(curPtr)
499b240e1c0SAnzooooo  io.splitLoadReq.bits.isvec  := req.isvec
500b240e1c0SAnzooooo  io.splitLoadReq.bits.misalignNeedWakeUp  := needWakeUpReqsWire
501b240e1c0SAnzooooo  io.splitLoadReq.bits.isFinalSplit        := curPtr(0) && !needWakeUpReqsWire
5024c5e04f2Shappy-lx  // Restore the information of H extension load
5034c5e04f2Shappy-lx  // bit encoding: | hlv 1 | hlvx 1 | is unsigned(1bit) | size(2bit) |
5044c5e04f2Shappy-lx  val reqIsHlv  = LSUOpType.isHlv(req.uop.fuOpType)
5054c5e04f2Shappy-lx  val reqIsHlvx = LSUOpType.isHlvx(req.uop.fuOpType)
506b240e1c0SAnzooooo  io.splitLoadReq.bits.uop.fuOpType := Mux(req.isvec, req.uop.fuOpType, Cat(reqIsHlv, reqIsHlvx, 0.U(1.W), splitLoadReqs(curPtr).uop.fuOpType(1, 0)))
507b240e1c0SAnzooooo  io.splitLoadReq.bits.alignedType  := Mux(req.isvec, splitLoadReqs(curPtr).uop.fuOpType(1, 0), req.alignedType)
50841d8d239Shappy-lx
50941d8d239Shappy-lx  when (io.splitLoadResp.valid) {
510282dd18cSsfencevma    val resp = io.splitLoadResp.bits
51141d8d239Shappy-lx    splitLoadResp(curPtr) := io.splitLoadResp.bits
512*35bb7796SAnzo    when (isUncache) {
51341d8d239Shappy-lx      unSentLoads := 0.U
514e7ab4635SHuijin Li      exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(exceptionVec.cloneType), LduCfg)
51541d8d239Shappy-lx      // delegate to software
516282dd18cSsfencevma      exceptionVec(loadAddrMisaligned) := true.B
51741d8d239Shappy-lx    } .elsewhen (hasException) {
51841d8d239Shappy-lx      unSentLoads := 0.U
519282dd18cSsfencevma      LduCfg.exceptionOut.map(no => exceptionVec(no) := exceptionVec(no) || resp.uop.exceptionVec(no))
52041d8d239Shappy-lx    } .elsewhen (!io.splitLoadResp.bits.rep_info.need_rep) {
52141d8d239Shappy-lx      unSentLoads := unSentLoads & ~UIntToOH(curPtr)
52241d8d239Shappy-lx      curPtr := curPtr + 1.U
523282dd18cSsfencevma      exceptionVec := 0.U.asTypeOf(ExceptionVec())
52441d8d239Shappy-lx    }
52541d8d239Shappy-lx  }
52641d8d239Shappy-lx
52741d8d239Shappy-lx  val combinedData = RegInit(0.U(XLEN.W))
52841d8d239Shappy-lx
529b240e1c0SAnzooooo  when (bufferState === s_comb_wakeup_rep) {
53041d8d239Shappy-lx    val lowAddrResult = getShiftAndTruncateData(lowResultShift, lowResultWidth, splitLoadResp(0).data)
53141d8d239Shappy-lx                          .asTypeOf(Vec(XLEN / 8, UInt(8.W)))
53241d8d239Shappy-lx    val highAddrResult = getShiftAndTruncateData(highResultShift, highResultWidth, splitLoadResp(1).data)
53341d8d239Shappy-lx                          .asTypeOf(Vec(XLEN / 8, UInt(8.W)))
53441d8d239Shappy-lx    val catResult = Wire(Vec(XLEN / 8, UInt(8.W)))
53541d8d239Shappy-lx    (0 until XLEN / 8) .map {
53641d8d239Shappy-lx      case i => {
53741d8d239Shappy-lx        when (i.U < lowResultWidth) {
53841d8d239Shappy-lx          catResult(i) := lowAddrResult(i)
53941d8d239Shappy-lx        } .otherwise {
54041d8d239Shappy-lx          catResult(i) := highAddrResult(i.U - lowResultWidth)
54141d8d239Shappy-lx        }
54241d8d239Shappy-lx      }
54341d8d239Shappy-lx    }
544b240e1c0SAnzooooo    combinedData := Mux(req.isvec, rdataVecHelper(req.alignedType, (catResult.asUInt)(XLEN - 1, 0)), rdataHelper(req.uop, (catResult.asUInt)(XLEN - 1, 0)))
545b240e1c0SAnzooooo
54641d8d239Shappy-lx  }
54741d8d239Shappy-lx
548*35bb7796SAnzo  io.writeBack.valid := req_valid && (bufferState === s_wb) && (io.splitLoadResp.valid && io.splitLoadResp.bits.misalignNeedWakeUp || globalUncache || globalException) && !io.loadOutValid && !req.isvec
54941d8d239Shappy-lx  io.writeBack.bits.uop := req.uop
550282dd18cSsfencevma  io.writeBack.bits.uop.exceptionVec := DontCare
551*35bb7796SAnzo  LduCfg.exceptionOut.map(no => io.writeBack.bits.uop.exceptionVec(no) := (globalUncache || globalException) && exceptionVec(no))
552*35bb7796SAnzo  io.writeBack.bits.uop.rfWen := !globalException && !globalUncache && req.uop.rfWen
553e7ab4635SHuijin Li  io.writeBack.bits.uop.fuType := FuType.ldu.U
554b240e1c0SAnzooooo  io.writeBack.bits.uop.flushPipe := false.B
55541d8d239Shappy-lx  io.writeBack.bits.uop.replayInst := false.B
556b240e1c0SAnzooooo  io.writeBack.bits.data := newRdataHelper(data_select, combinedData)
557b240e1c0SAnzooooo  io.writeBack.bits.isFromLoadUnit := needWakeUpWB
558*35bb7796SAnzo  // Misaligned accesses to uncache space trigger exceptions, so theoretically these signals won't do anything practical.
559*35bb7796SAnzo  // But let's get them assigned correctly.
56041d8d239Shappy-lx  io.writeBack.bits.debug.isMMIO := globalMMIO
561*35bb7796SAnzo  io.writeBack.bits.debug.isNC := globalNC
56241d8d239Shappy-lx  io.writeBack.bits.debug.isPerfCnt := false.B
56341d8d239Shappy-lx  io.writeBack.bits.debug.paddr := req.paddr
56441d8d239Shappy-lx  io.writeBack.bits.debug.vaddr := req.vaddr
56541d8d239Shappy-lx
566b240e1c0SAnzooooo
567b240e1c0SAnzooooo  // vector output
568b240e1c0SAnzooooo  io.vecWriteBack.valid := req_valid && (bufferState === s_wb) && !io.loadVecOutValid && req.isvec
569b240e1c0SAnzooooo
570b240e1c0SAnzooooo  io.vecWriteBack.bits.alignedType          := req.alignedType
571b240e1c0SAnzooooo  io.vecWriteBack.bits.vecFeedback          := true.B
572b240e1c0SAnzooooo  io.vecWriteBack.bits.vecdata.get          := combinedData
573b240e1c0SAnzooooo  io.vecWriteBack.bits.isvec                := req.isvec
574b240e1c0SAnzooooo  io.vecWriteBack.bits.elemIdx              := req.elemIdx
575b240e1c0SAnzooooo  io.vecWriteBack.bits.elemIdxInsideVd.get  := req.elemIdxInsideVd
576b240e1c0SAnzooooo  io.vecWriteBack.bits.mask                 := req.mask
577b240e1c0SAnzooooo  io.vecWriteBack.bits.reg_offset.get       := 0.U
578b240e1c0SAnzooooo  io.vecWriteBack.bits.usSecondInv          := req.usSecondInv
579b240e1c0SAnzooooo  io.vecWriteBack.bits.mBIndex              := req.mbIndex
580b240e1c0SAnzooooo  io.vecWriteBack.bits.hit                  := true.B
581b240e1c0SAnzooooo  io.vecWriteBack.bits.sourceType           := RSFeedbackType.lrqFull
582b240e1c0SAnzooooo  io.vecWriteBack.bits.trigger              := TriggerAction.None
583b240e1c0SAnzooooo  io.vecWriteBack.bits.flushState           := DontCare
584b240e1c0SAnzooooo  io.vecWriteBack.bits.exceptionVec         := ExceptionNO.selectByFu(exceptionVec, VlduCfg)
585da51a7acSAnzo  io.vecWriteBack.bits.hasException         := globalException
586b240e1c0SAnzooooo  io.vecWriteBack.bits.vaddr                := req.fullva
587b240e1c0SAnzooooo  io.vecWriteBack.bits.vaNeedExt            := req.vaNeedExt
588b240e1c0SAnzooooo  io.vecWriteBack.bits.gpaddr               := req.gpaddr
589b240e1c0SAnzooooo  io.vecWriteBack.bits.isForVSnonLeafPTE    := req.isForVSnonLeafPTE
590*35bb7796SAnzo  io.vecWriteBack.bits.mmio                 := globalMMIO
591b240e1c0SAnzooooo  io.vecWriteBack.bits.vstart               := req.uop.vpu.vstart
592b240e1c0SAnzooooo  io.vecWriteBack.bits.vecTriggerMask       := req.vecTriggerMask
593*35bb7796SAnzo  io.vecWriteBack.bits.nc                   := globalNC
594b240e1c0SAnzooooo
595b240e1c0SAnzooooo
59641d8d239Shappy-lx  val flush = req_valid && req.uop.robIdx.needFlush(io.redirect)
59741d8d239Shappy-lx
598b240e1c0SAnzooooo  when (flush) {
59941d8d239Shappy-lx    bufferState := s_idle
60041d8d239Shappy-lx    req_valid := false.B
60141d8d239Shappy-lx    curPtr := 0.U
60241d8d239Shappy-lx    unSentLoads := 0.U
60341d8d239Shappy-lx    globalException := false.B
604*35bb7796SAnzo    globalUncache := false.B
605*35bb7796SAnzo
60641d8d239Shappy-lx    globalMMIO := false.B
607*35bb7796SAnzo    globalNC   := false.B
60841d8d239Shappy-lx  }
60941d8d239Shappy-lx
61041d8d239Shappy-lx  // NOTE: spectial case (unaligned load cross page, page fault happens in next page)
61141d8d239Shappy-lx  // if exception happens in the higher page address part, overwrite the loadExceptionBuffer vaddr
6126444fe09Sgood-circle  val shouldOverwrite = req_valid && globalException
6136444fe09Sgood-circle  val overwriteExpBuf = GatedValidRegNext(shouldOverwrite)
6146444fe09Sgood-circle  val overwriteVaddr = RegEnable(
6156444fe09Sgood-circle    Mux(
6169abad712SHaoyuan Feng      cross16BytesBoundary && (curPtr === 1.U),
6179abad712SHaoyuan Feng      splitLoadResp(curPtr).vaddr,
6186444fe09Sgood-circle      splitLoadResp(curPtr).fullva),
6196444fe09Sgood-circle    shouldOverwrite)
620e80f666eSHaoyuan Feng  val overwriteGpaddr = RegEnable(splitLoadResp(curPtr).gpaddr, shouldOverwrite)
6216444fe09Sgood-circle  val overwriteIsHyper = RegEnable(splitLoadResp(curPtr).isHyper, shouldOverwrite)
6226444fe09Sgood-circle  val overwriteIsForVSnonLeafPTE = RegEnable(splitLoadResp(curPtr).isForVSnonLeafPTE, shouldOverwrite)
62341d8d239Shappy-lx
624b240e1c0SAnzooooo  //TODO In theory, there is no need to overwrite, but for now, the signal is retained in the code in this way.
625b240e1c0SAnzooooo  // and the signal will be removed after sufficient verification.
626b240e1c0SAnzooooo  io.overwriteExpBuf.valid := false.B
627a53daa0fSHaoyuan Feng  io.overwriteExpBuf.vaddr := overwriteVaddr
62846e9ee74SHaoyuan Feng  io.overwriteExpBuf.isHyper := overwriteIsHyper
629a53daa0fSHaoyuan Feng  io.overwriteExpBuf.gpaddr := overwriteGpaddr
630ad415ae0SXiaokun-Pei  io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE
63141d8d239Shappy-lx
632*35bb7796SAnzo  // when no exception or uncache, flush loadExceptionBuffer at s_wb
633*35bb7796SAnzo  val flushLdExpBuff = GatedValidRegNext(req_valid && (bufferState === s_wb) && !(globalUncache || globalException))
63441d8d239Shappy-lx  io.flushLdExpBuff := flushLdExpBuff
63541d8d239Shappy-lx
63641d8d239Shappy-lx  XSPerfAccumulate("alloc",                  RegNext(!req_valid) && req_valid)
63741d8d239Shappy-lx  XSPerfAccumulate("flush",                  flush)
63841d8d239Shappy-lx  XSPerfAccumulate("flush_idle",             flush && (bufferState === s_idle))
63941d8d239Shappy-lx  XSPerfAccumulate("flush_non_idle",         flush && (bufferState =/= s_idle))
64041d8d239Shappy-lx}
641