141d8d239Shappy-lx/*************************************************************************************** 241d8d239Shappy-lx* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 341d8d239Shappy-lx* Copyright (c) 2020-2021 Peng Cheng Laboratory 441d8d239Shappy-lx* 541d8d239Shappy-lx* XiangShan is licensed under Mulan PSL v2. 641d8d239Shappy-lx* You can use this software according to the terms and conditions of the Mulan PSL v2. 741d8d239Shappy-lx* You may obtain a copy of Mulan PSL v2 at: 841d8d239Shappy-lx* http://license.coscl.org.cn/MulanPSL2 941d8d239Shappy-lx* 1041d8d239Shappy-lx* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 1141d8d239Shappy-lx* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 1241d8d239Shappy-lx* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 1341d8d239Shappy-lx* 1441d8d239Shappy-lx* See the Mulan PSL v2 for more details. 1541d8d239Shappy-lx***************************************************************************************/ 1641d8d239Shappy-lx 1741d8d239Shappy-lxpackage xiangshan.mem 1841d8d239Shappy-lx 1941d8d239Shappy-lximport org.chipsalliance.cde.config.Parameters 2041d8d239Shappy-lximport chisel3._ 2141d8d239Shappy-lximport chisel3.util._ 2241d8d239Shappy-lximport utils._ 2341d8d239Shappy-lximport utility._ 2441d8d239Shappy-lximport xiangshan._ 2541d8d239Shappy-lximport xiangshan.backend.fu.FuConfig._ 2641d8d239Shappy-lximport xiangshan.backend.fu.fpu.FPU 2741d8d239Shappy-lximport xiangshan.backend.rob.RobLsqIO 2841d8d239Shappy-lximport xiangshan.cache._ 2941d8d239Shappy-lximport xiangshan.frontend.FtqPtr 3041d8d239Shappy-lximport xiangshan.ExceptionNO._ 3141d8d239Shappy-lximport xiangshan.cache.wpu.ReplayCarry 3241d8d239Shappy-lximport xiangshan.backend.rob.RobPtr 3341d8d239Shappy-lximport xiangshan.backend.Bundles.{MemExuOutput, DynInst} 3441d8d239Shappy-lx 3541d8d239Shappy-lxclass LoadMisalignBuffer(implicit p: Parameters) extends XSModule 3641d8d239Shappy-lx with HasCircularQueuePtrHelper 3741d8d239Shappy-lx with HasLoadHelper 3841d8d239Shappy-lx{ 3941d8d239Shappy-lx private val enqPortNum = LoadPipelineWidth 4041d8d239Shappy-lx private val maxSplitNum = 2 4141d8d239Shappy-lx 4241d8d239Shappy-lx require(maxSplitNum == 2) 4341d8d239Shappy-lx 4441d8d239Shappy-lx private val LB = "b00".U(2.W) 4541d8d239Shappy-lx private val LH = "b01".U(2.W) 4641d8d239Shappy-lx private val LW = "b10".U(2.W) 4741d8d239Shappy-lx private val LD = "b11".U(2.W) 4841d8d239Shappy-lx 4941d8d239Shappy-lx // encode of how many bytes to shift or truncate 5041d8d239Shappy-lx private val BYTE0 = "b000".U(3.W) 5141d8d239Shappy-lx private val BYTE1 = "b001".U(3.W) 5241d8d239Shappy-lx private val BYTE2 = "b010".U(3.W) 5341d8d239Shappy-lx private val BYTE3 = "b011".U(3.W) 5441d8d239Shappy-lx private val BYTE4 = "b100".U(3.W) 5541d8d239Shappy-lx private val BYTE5 = "b101".U(3.W) 5641d8d239Shappy-lx private val BYTE6 = "b110".U(3.W) 5741d8d239Shappy-lx private val BYTE7 = "b111".U(3.W) 5841d8d239Shappy-lx 5941d8d239Shappy-lx def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List( 6041d8d239Shappy-lx LB -> 0x1.U, // lb 6141d8d239Shappy-lx LH -> 0x3.U, // lh 6241d8d239Shappy-lx LW -> 0xf.U, // lw 6341d8d239Shappy-lx LD -> 0xff.U // ld 6441d8d239Shappy-lx )) 6541d8d239Shappy-lx 6641d8d239Shappy-lx def getShiftAndTruncateData(shiftEncode: UInt, truncateEncode: UInt, data: UInt) = { 6741d8d239Shappy-lx val shiftData = LookupTree(shiftEncode, List( 6841d8d239Shappy-lx BYTE0 -> data(63, 0), 6941d8d239Shappy-lx BYTE1 -> data(63, 8), 7041d8d239Shappy-lx BYTE2 -> data(63, 16), 7141d8d239Shappy-lx BYTE3 -> data(63, 24), 7241d8d239Shappy-lx BYTE4 -> data(63, 32), 7341d8d239Shappy-lx BYTE5 -> data(63, 40), 7441d8d239Shappy-lx BYTE6 -> data(63, 48), 7541d8d239Shappy-lx BYTE7 -> data(63, 56) 7641d8d239Shappy-lx )) 7741d8d239Shappy-lx val truncateData = LookupTree(truncateEncode, List( 7841d8d239Shappy-lx BYTE0 -> 0.U(XLEN.W), // can not truncate with 0 byte width 7941d8d239Shappy-lx BYTE1 -> shiftData(7, 0), 8041d8d239Shappy-lx BYTE2 -> shiftData(15, 0), 8141d8d239Shappy-lx BYTE3 -> shiftData(23, 0), 8241d8d239Shappy-lx BYTE4 -> shiftData(31, 0), 8341d8d239Shappy-lx BYTE5 -> shiftData(39, 0), 8441d8d239Shappy-lx BYTE6 -> shiftData(47, 0), 8541d8d239Shappy-lx BYTE7 -> shiftData(55, 0) 8641d8d239Shappy-lx )) 8741d8d239Shappy-lx truncateData(XLEN - 1, 0) 8841d8d239Shappy-lx } 8941d8d239Shappy-lx 9041d8d239Shappy-lx def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 9141d8d239Shappy-lx assert(valid.length == bits.length) 9241d8d239Shappy-lx if (valid.length == 0 || valid.length == 1) { 9341d8d239Shappy-lx (valid, bits) 9441d8d239Shappy-lx } else if (valid.length == 2) { 9541d8d239Shappy-lx val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 9641d8d239Shappy-lx for (i <- res.indices) { 9741d8d239Shappy-lx res(i).valid := valid(i) 9841d8d239Shappy-lx res(i).bits := bits(i) 9941d8d239Shappy-lx } 10041d8d239Shappy-lx val oldest = Mux(valid(0) && valid(1), 10141d8d239Shappy-lx Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) || 10241d8d239Shappy-lx (isNotBefore(bits(0).uop.robIdx, bits(1).uop.robIdx) && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)), 10341d8d239Shappy-lx Mux(valid(0) && !valid(1), res(0), res(1))) 10441d8d239Shappy-lx (Seq(oldest.valid), Seq(oldest.bits)) 10541d8d239Shappy-lx } else { 10641d8d239Shappy-lx val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 10741d8d239Shappy-lx val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 10841d8d239Shappy-lx selectOldest(left._1 ++ right._1, left._2 ++ right._2) 10941d8d239Shappy-lx } 11041d8d239Shappy-lx } 11141d8d239Shappy-lx 11241d8d239Shappy-lx val io = IO(new Bundle() { 11341d8d239Shappy-lx val redirect = Flipped(Valid(new Redirect)) 11441d8d239Shappy-lx val req = Vec(enqPortNum, Flipped(Valid(new LqWriteBundle))) 11541d8d239Shappy-lx val rob = Flipped(new RobLsqIO) 11641d8d239Shappy-lx val splitLoadReq = Decoupled(new LsPipelineBundle) 11741d8d239Shappy-lx val splitLoadResp = Flipped(Valid(new LqWriteBundle)) 11841d8d239Shappy-lx val writeBack = Decoupled(new MemExuOutput) 11941d8d239Shappy-lx val overwriteExpBuf = Output(new XSBundle { 12041d8d239Shappy-lx val valid = Bool() 121db6cfb5aSHaoyuan Feng val vaddr = UInt(XLEN.W) 122db6cfb5aSHaoyuan Feng val gpaddr = UInt(XLEN.W) 123*ad415ae0SXiaokun-Pei val isForVSnonLeafPTE = Bool() 12441d8d239Shappy-lx }) 12541d8d239Shappy-lx val flushLdExpBuff = Output(Bool()) 12641d8d239Shappy-lx }) 12741d8d239Shappy-lx 12841d8d239Shappy-lx io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool())) 12941d8d239Shappy-lx io.rob.uop := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst)) 13041d8d239Shappy-lx 13141d8d239Shappy-lx val req_valid = RegInit(false.B) 13241d8d239Shappy-lx val req = Reg(new LqWriteBundle) 13341d8d239Shappy-lx 13441d8d239Shappy-lx // enqueue 13541d8d239Shappy-lx // s1: 13641d8d239Shappy-lx val s1_req = VecInit(io.req.map(_.bits)) 13741d8d239Shappy-lx val s1_valid = VecInit(io.req.map(x => x.valid)) 13841d8d239Shappy-lx 13941d8d239Shappy-lx // s2: delay 1 cycle 14041d8d239Shappy-lx val s2_req = RegNext(s1_req) 14141d8d239Shappy-lx val s2_valid = (0 until enqPortNum).map(i => 14241d8d239Shappy-lx RegNext(s1_valid(i)) && 14341d8d239Shappy-lx !s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) && 14441d8d239Shappy-lx !s2_req(i).uop.robIdx.needFlush(io.redirect) 14541d8d239Shappy-lx ) 14694998b06Shappy-lx val s2_miss_aligned = s2_req.map(x => 14794998b06Shappy-lx x.uop.exceptionVec(loadAddrMisaligned) && !x.uop.exceptionVec(breakPoint) && !TriggerAction.isDmode(x.uop.trigger) 14894998b06Shappy-lx ) 14941d8d239Shappy-lx 15041d8d239Shappy-lx val s2_enqueue = Wire(Vec(enqPortNum, Bool())) 15141d8d239Shappy-lx for (w <- 0 until enqPortNum) { 15241d8d239Shappy-lx s2_enqueue(w) := s2_valid(w) && s2_miss_aligned(w) 15341d8d239Shappy-lx } 15441d8d239Shappy-lx 15541d8d239Shappy-lx when (req_valid && req.uop.robIdx.needFlush(io.redirect)) { 15641d8d239Shappy-lx req_valid := s2_enqueue.asUInt.orR 15741d8d239Shappy-lx } .elsewhen (s2_enqueue.asUInt.orR) { 15841d8d239Shappy-lx req_valid := req_valid || true.B 15941d8d239Shappy-lx } 16041d8d239Shappy-lx 16141d8d239Shappy-lx val reqSel = selectOldest(s2_enqueue, s2_req) 16241d8d239Shappy-lx 16341d8d239Shappy-lx when (req_valid) { 16441d8d239Shappy-lx req := Mux( 16541d8d239Shappy-lx reqSel._1(0) && (isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx) || (isNotBefore(req.uop.robIdx, reqSel._2(0).uop.robIdx) && req.uop.uopIdx > reqSel._2(0).uop.uopIdx)), 16641d8d239Shappy-lx reqSel._2(0), 16741d8d239Shappy-lx req) 16841d8d239Shappy-lx } .elsewhen (s2_enqueue.asUInt.orR) { 16941d8d239Shappy-lx req := reqSel._2(0) 17041d8d239Shappy-lx } 17141d8d239Shappy-lx 17241d8d239Shappy-lx val robMatch = req_valid && io.rob.pendingld && (io.rob.pendingPtr === req.uop.robIdx) 17341d8d239Shappy-lx 17441d8d239Shappy-lx // buffer control: 17541d8d239Shappy-lx // - split miss-aligned load into aligned loads 17641d8d239Shappy-lx // - send split load to ldu and get result from ldu 17741d8d239Shappy-lx // - merge them and write back to rob 17841d8d239Shappy-lx val s_idle :: s_split :: s_req :: s_resp :: s_comb :: s_wb :: s_wait :: Nil = Enum(7) 17941d8d239Shappy-lx val bufferState = RegInit(s_idle) 18041d8d239Shappy-lx val splitLoadReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle)))) 18141d8d239Shappy-lx val splitLoadResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LqWriteBundle)))) 18241d8d239Shappy-lx val unSentLoads = RegInit(0.U(maxSplitNum.W)) 18341d8d239Shappy-lx val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W)) 18441d8d239Shappy-lx 18541d8d239Shappy-lx // if there is exception or mmio in split load 18641d8d239Shappy-lx val globalException = RegInit(false.B) 18741d8d239Shappy-lx val globalMMIO = RegInit(false.B) 18841d8d239Shappy-lx 18941d8d239Shappy-lx val hasException = ExceptionNO.selectByFu(io.splitLoadResp.bits.uop.exceptionVec, LduCfg).asUInt.orR 19041d8d239Shappy-lx val isMMIO = io.splitLoadResp.bits.mmio 19141d8d239Shappy-lx 19241d8d239Shappy-lx switch(bufferState) { 19341d8d239Shappy-lx is (s_idle) { 19441d8d239Shappy-lx when (robMatch) { 19541d8d239Shappy-lx bufferState := s_split 19641d8d239Shappy-lx } 19741d8d239Shappy-lx } 19841d8d239Shappy-lx 19941d8d239Shappy-lx is (s_split) { 20041d8d239Shappy-lx bufferState := s_req 20141d8d239Shappy-lx } 20241d8d239Shappy-lx 20341d8d239Shappy-lx is (s_req) { 20441d8d239Shappy-lx when (io.splitLoadReq.fire) { 20541d8d239Shappy-lx bufferState := s_resp 20641d8d239Shappy-lx } 20741d8d239Shappy-lx } 20841d8d239Shappy-lx 20941d8d239Shappy-lx is (s_resp) { 21041d8d239Shappy-lx when (io.splitLoadResp.valid) { 21141d8d239Shappy-lx val clearOh = UIntToOH(curPtr) 21241d8d239Shappy-lx when (hasException || isMMIO) { 21341d8d239Shappy-lx // commit directly when exception ocurs 21441d8d239Shappy-lx // if any split load reaches mmio space, delegate to software loadAddrMisaligned exception 21541d8d239Shappy-lx bufferState := s_wb 21641d8d239Shappy-lx globalException := hasException 21741d8d239Shappy-lx globalMMIO := isMMIO 21841d8d239Shappy-lx } .elsewhen(io.splitLoadResp.bits.rep_info.need_rep || (unSentLoads & ~clearOh).orR) { 21941d8d239Shappy-lx // need replay or still has unsent requests 22041d8d239Shappy-lx bufferState := s_req 22141d8d239Shappy-lx } .otherwise { 22241d8d239Shappy-lx // merge the split load results 22341d8d239Shappy-lx bufferState := s_comb 22441d8d239Shappy-lx } 22541d8d239Shappy-lx } 22641d8d239Shappy-lx } 22741d8d239Shappy-lx 22841d8d239Shappy-lx is (s_comb) { 22941d8d239Shappy-lx bufferState := s_wb 23041d8d239Shappy-lx } 23141d8d239Shappy-lx 23241d8d239Shappy-lx is (s_wb) { 23341d8d239Shappy-lx when(io.writeBack.fire) { 23441d8d239Shappy-lx bufferState := s_wait 23541d8d239Shappy-lx } 23641d8d239Shappy-lx } 23741d8d239Shappy-lx 23841d8d239Shappy-lx is (s_wait) { 23941d8d239Shappy-lx when(io.rob.lcommit =/= 0.U || req.uop.robIdx.needFlush(io.redirect)) { 24041d8d239Shappy-lx // rob commits the unaligned load or handled the exception, reset all state 24141d8d239Shappy-lx bufferState := s_idle 24241d8d239Shappy-lx req_valid := false.B 24341d8d239Shappy-lx curPtr := 0.U 24441d8d239Shappy-lx unSentLoads := 0.U 24541d8d239Shappy-lx globalException := false.B 24641d8d239Shappy-lx globalMMIO := false.B 24741d8d239Shappy-lx } 24841d8d239Shappy-lx } 24941d8d239Shappy-lx } 25041d8d239Shappy-lx 25141d8d239Shappy-lx val highAddress = LookupTree(req.uop.fuOpType(1, 0), List( 25241d8d239Shappy-lx LB -> 0.U, 25341d8d239Shappy-lx LH -> 1.U, 25441d8d239Shappy-lx LW -> 3.U, 25541d8d239Shappy-lx LD -> 7.U 25641d8d239Shappy-lx )) + req.vaddr(4, 0) 25741d8d239Shappy-lx // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region 25841d8d239Shappy-lx val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4)) 25941d8d239Shappy-lx val aligned16BytesAddr = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U) 26041d8d239Shappy-lx val aligned16BytesSel = req.vaddr(3, 0) 26141d8d239Shappy-lx 26241d8d239Shappy-lx // meta of 128 bit load 26341d8d239Shappy-lx val new128Load = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 26441d8d239Shappy-lx // meta of split loads 26541d8d239Shappy-lx val lowAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 26641d8d239Shappy-lx val highAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 26741d8d239Shappy-lx val lowResultShift = RegInit(0.U(3.W)) // how many bytes should we shift right when got result 26841d8d239Shappy-lx val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from result 26941d8d239Shappy-lx val highResultShift = RegInit(0.U(3.W)) 27041d8d239Shappy-lx val highResultWidth = RegInit(0.U(3.W)) 27141d8d239Shappy-lx 27241d8d239Shappy-lx when (bufferState === s_split) { 27341d8d239Shappy-lx when (!cross16BytesBoundary) { 27441d8d239Shappy-lx // change this unaligned load into a 128 bits load 27541d8d239Shappy-lx unSentLoads := 1.U 27641d8d239Shappy-lx curPtr := 0.U 27741d8d239Shappy-lx new128Load.vaddr := aligned16BytesAddr 27841d8d239Shappy-lx // new128Load.mask := (getMask(req.uop.fuOpType(1, 0)) << aligned16BytesSel).asUInt 27941d8d239Shappy-lx new128Load.mask := 0xffff.U 28041d8d239Shappy-lx new128Load.uop := req.uop 28141d8d239Shappy-lx new128Load.uop.exceptionVec(loadAddrMisaligned) := false.B 28241d8d239Shappy-lx new128Load.is128bit := true.B 28341d8d239Shappy-lx splitLoadReqs(0) := new128Load 28441d8d239Shappy-lx } .otherwise { 28541d8d239Shappy-lx // split this unaligned load into `maxSplitNum` aligned loads 28641d8d239Shappy-lx unSentLoads := Fill(maxSplitNum, 1.U(1.W)) 28741d8d239Shappy-lx curPtr := 0.U 28841d8d239Shappy-lx lowAddrLoad.uop := req.uop 28941d8d239Shappy-lx lowAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 29041d8d239Shappy-lx highAddrLoad.uop := req.uop 29141d8d239Shappy-lx highAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 29241d8d239Shappy-lx 29341d8d239Shappy-lx switch (req.uop.fuOpType(1, 0)) { 29441d8d239Shappy-lx is (LB) { 29541d8d239Shappy-lx assert(false.B, "lb should not trigger miss align") 29641d8d239Shappy-lx } 29741d8d239Shappy-lx 29841d8d239Shappy-lx is (LH) { 29941d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LB 30041d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 30141d8d239Shappy-lx lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 30241d8d239Shappy-lx lowResultShift := BYTE0 30341d8d239Shappy-lx lowResultWidth := BYTE1 30441d8d239Shappy-lx 30541d8d239Shappy-lx highAddrLoad.uop.fuOpType := LB 30641d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 1.U 30741d8d239Shappy-lx highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 30841d8d239Shappy-lx highResultShift := BYTE0 30941d8d239Shappy-lx highResultWidth := BYTE1 31041d8d239Shappy-lx } 31141d8d239Shappy-lx 31241d8d239Shappy-lx is (LW) { 31341d8d239Shappy-lx switch (req.vaddr(1, 0)) { 31441d8d239Shappy-lx is ("b00".U) { 31541d8d239Shappy-lx assert(false.B, "should not trigger miss align") 31641d8d239Shappy-lx } 31741d8d239Shappy-lx 31841d8d239Shappy-lx is ("b01".U) { 31941d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LW 32041d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 1.U 32141d8d239Shappy-lx lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 32241d8d239Shappy-lx lowResultShift := BYTE1 32341d8d239Shappy-lx lowResultWidth := BYTE3 32441d8d239Shappy-lx 32541d8d239Shappy-lx highAddrLoad.uop.fuOpType := LB 32641d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 3.U 32741d8d239Shappy-lx highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 32841d8d239Shappy-lx highResultShift := BYTE0 32941d8d239Shappy-lx highResultWidth := BYTE1 33041d8d239Shappy-lx } 33141d8d239Shappy-lx 33241d8d239Shappy-lx is ("b10".U) { 33341d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LH 33441d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 33541d8d239Shappy-lx lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 33641d8d239Shappy-lx lowResultShift := BYTE0 33741d8d239Shappy-lx lowResultWidth := BYTE2 33841d8d239Shappy-lx 33941d8d239Shappy-lx highAddrLoad.uop.fuOpType := LH 34041d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 2.U 34141d8d239Shappy-lx highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 34241d8d239Shappy-lx highResultShift := BYTE0 34341d8d239Shappy-lx highResultWidth := BYTE2 34441d8d239Shappy-lx } 34541d8d239Shappy-lx 34641d8d239Shappy-lx is ("b11".U) { 34741d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LB 34841d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 34941d8d239Shappy-lx lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 35041d8d239Shappy-lx lowResultShift := BYTE0 35141d8d239Shappy-lx lowResultWidth := BYTE1 35241d8d239Shappy-lx 35341d8d239Shappy-lx highAddrLoad.uop.fuOpType := LW 35441d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 1.U 35541d8d239Shappy-lx highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 35641d8d239Shappy-lx highResultShift := BYTE0 35741d8d239Shappy-lx highResultWidth := BYTE3 35841d8d239Shappy-lx } 35941d8d239Shappy-lx } 36041d8d239Shappy-lx } 36141d8d239Shappy-lx 36241d8d239Shappy-lx is (LD) { 36341d8d239Shappy-lx switch (req.vaddr(2, 0)) { 36441d8d239Shappy-lx is ("b000".U) { 36541d8d239Shappy-lx assert(false.B, "should not trigger miss align") 36641d8d239Shappy-lx } 36741d8d239Shappy-lx 36841d8d239Shappy-lx is ("b001".U) { 36941d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LD 37041d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 1.U 37141d8d239Shappy-lx lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 37241d8d239Shappy-lx lowResultShift := BYTE1 37341d8d239Shappy-lx lowResultWidth := BYTE7 37441d8d239Shappy-lx 37541d8d239Shappy-lx highAddrLoad.uop.fuOpType := LB 37641d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 7.U 37741d8d239Shappy-lx highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 37841d8d239Shappy-lx highResultShift := BYTE0 37941d8d239Shappy-lx highResultWidth := BYTE1 38041d8d239Shappy-lx } 38141d8d239Shappy-lx 38241d8d239Shappy-lx is ("b010".U) { 38341d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LD 38441d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 2.U 38541d8d239Shappy-lx lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 38641d8d239Shappy-lx lowResultShift := BYTE2 38741d8d239Shappy-lx lowResultWidth := BYTE6 38841d8d239Shappy-lx 38941d8d239Shappy-lx highAddrLoad.uop.fuOpType := LH 39041d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 6.U 39141d8d239Shappy-lx highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 39241d8d239Shappy-lx highResultShift := BYTE0 39341d8d239Shappy-lx highResultWidth := BYTE2 39441d8d239Shappy-lx } 39541d8d239Shappy-lx 39641d8d239Shappy-lx is ("b011".U) { 39741d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LD 39841d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 3.U 39941d8d239Shappy-lx lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 40041d8d239Shappy-lx lowResultShift := BYTE3 40141d8d239Shappy-lx lowResultWidth := BYTE5 40241d8d239Shappy-lx 40341d8d239Shappy-lx highAddrLoad.uop.fuOpType := LW 40441d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 5.U 40541d8d239Shappy-lx highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 40641d8d239Shappy-lx highResultShift := BYTE0 40741d8d239Shappy-lx highResultWidth := BYTE3 40841d8d239Shappy-lx } 40941d8d239Shappy-lx 41041d8d239Shappy-lx is ("b100".U) { 41141d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LW 41241d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 41341d8d239Shappy-lx lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 41441d8d239Shappy-lx lowResultShift := BYTE0 41541d8d239Shappy-lx lowResultWidth := BYTE4 41641d8d239Shappy-lx 41741d8d239Shappy-lx highAddrLoad.uop.fuOpType := LW 41841d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 4.U 41941d8d239Shappy-lx highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 42041d8d239Shappy-lx highResultShift := BYTE0 42141d8d239Shappy-lx highResultWidth := BYTE4 42241d8d239Shappy-lx } 42341d8d239Shappy-lx 42441d8d239Shappy-lx is ("b101".U) { 42541d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LW 42641d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 1.U 42741d8d239Shappy-lx lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 42841d8d239Shappy-lx lowResultShift := BYTE1 42941d8d239Shappy-lx lowResultWidth := BYTE3 43041d8d239Shappy-lx 43141d8d239Shappy-lx highAddrLoad.uop.fuOpType := LD 43241d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 3.U 43341d8d239Shappy-lx highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 43441d8d239Shappy-lx highResultShift := BYTE0 43541d8d239Shappy-lx highResultWidth := BYTE5 43641d8d239Shappy-lx } 43741d8d239Shappy-lx 43841d8d239Shappy-lx is ("b110".U) { 43941d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LH 44041d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 44141d8d239Shappy-lx lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 44241d8d239Shappy-lx lowResultShift := BYTE0 44341d8d239Shappy-lx lowResultWidth := BYTE2 44441d8d239Shappy-lx 44541d8d239Shappy-lx highAddrLoad.uop.fuOpType := LD 44641d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 2.U 44741d8d239Shappy-lx highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 44841d8d239Shappy-lx highResultShift := BYTE0 44941d8d239Shappy-lx highResultWidth := BYTE6 45041d8d239Shappy-lx } 45141d8d239Shappy-lx 45241d8d239Shappy-lx is ("b111".U) { 45341d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LB 45441d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 45541d8d239Shappy-lx lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 45641d8d239Shappy-lx lowResultShift := BYTE0 45741d8d239Shappy-lx lowResultWidth := BYTE1 45841d8d239Shappy-lx 45941d8d239Shappy-lx highAddrLoad.uop.fuOpType := LD 46041d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 1.U 46141d8d239Shappy-lx highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 46241d8d239Shappy-lx highResultShift := BYTE0 46341d8d239Shappy-lx highResultWidth := BYTE7 46441d8d239Shappy-lx } 46541d8d239Shappy-lx } 46641d8d239Shappy-lx } 46741d8d239Shappy-lx } 46841d8d239Shappy-lx 46941d8d239Shappy-lx splitLoadReqs(0) := lowAddrLoad 47041d8d239Shappy-lx splitLoadReqs(1) := highAddrLoad 47141d8d239Shappy-lx } 47241d8d239Shappy-lx } 47341d8d239Shappy-lx 47441d8d239Shappy-lx io.splitLoadReq.valid := req_valid && (bufferState === s_req) 47541d8d239Shappy-lx io.splitLoadReq.bits := splitLoadReqs(curPtr) 47641d8d239Shappy-lx 47741d8d239Shappy-lx when (io.splitLoadResp.valid) { 47841d8d239Shappy-lx splitLoadResp(curPtr) := io.splitLoadResp.bits 47941d8d239Shappy-lx when (isMMIO) { 48041d8d239Shappy-lx unSentLoads := 0.U 48141d8d239Shappy-lx splitLoadResp(curPtr).uop.exceptionVec := 0.U.asTypeOf(ExceptionVec()) 48241d8d239Shappy-lx // delegate to software 48341d8d239Shappy-lx splitLoadResp(curPtr).uop.exceptionVec(loadAddrMisaligned) := true.B 48441d8d239Shappy-lx } .elsewhen (hasException) { 48541d8d239Shappy-lx unSentLoads := 0.U 48641d8d239Shappy-lx } .elsewhen (!io.splitLoadResp.bits.rep_info.need_rep) { 48741d8d239Shappy-lx unSentLoads := unSentLoads & ~UIntToOH(curPtr) 48841d8d239Shappy-lx curPtr := curPtr + 1.U 48941d8d239Shappy-lx } 49041d8d239Shappy-lx } 49141d8d239Shappy-lx 49241d8d239Shappy-lx val combinedData = RegInit(0.U(XLEN.W)) 49341d8d239Shappy-lx 49441d8d239Shappy-lx when (bufferState === s_comb) { 49541d8d239Shappy-lx when (!cross16BytesBoundary) { 49641d8d239Shappy-lx val shiftData = LookupTree(aligned16BytesSel, List( 49741d8d239Shappy-lx "b0000".U -> splitLoadResp(0).data(63, 0), 49841d8d239Shappy-lx "b0001".U -> splitLoadResp(0).data(71, 8), 49941d8d239Shappy-lx "b0010".U -> splitLoadResp(0).data(79, 16), 50041d8d239Shappy-lx "b0011".U -> splitLoadResp(0).data(87, 24), 50141d8d239Shappy-lx "b0100".U -> splitLoadResp(0).data(95, 32), 50241d8d239Shappy-lx "b0101".U -> splitLoadResp(0).data(103, 40), 50341d8d239Shappy-lx "b0110".U -> splitLoadResp(0).data(111, 48), 50441d8d239Shappy-lx "b0111".U -> splitLoadResp(0).data(119, 56), 50541d8d239Shappy-lx "b1000".U -> splitLoadResp(0).data(127, 64), 50641d8d239Shappy-lx "b1001".U -> splitLoadResp(0).data(127, 72), 50741d8d239Shappy-lx "b1010".U -> splitLoadResp(0).data(127, 80), 50841d8d239Shappy-lx "b1011".U -> splitLoadResp(0).data(127, 88), 50941d8d239Shappy-lx "b1100".U -> splitLoadResp(0).data(127, 96), 51041d8d239Shappy-lx "b1101".U -> splitLoadResp(0).data(127, 104), 51141d8d239Shappy-lx "b1110".U -> splitLoadResp(0).data(127, 112), 51241d8d239Shappy-lx "b1111".U -> splitLoadResp(0).data(127, 120) 51341d8d239Shappy-lx )) 51441d8d239Shappy-lx val truncateData = LookupTree(req.uop.fuOpType(1, 0), List( 51541d8d239Shappy-lx LB -> shiftData(7, 0), // lb 51641d8d239Shappy-lx LH -> shiftData(15, 0), // lh 51741d8d239Shappy-lx LW -> shiftData(31, 0), // lw 51841d8d239Shappy-lx LD -> shiftData(63, 0) // ld 51941d8d239Shappy-lx )) 52041d8d239Shappy-lx combinedData := rdataHelper(req.uop, truncateData(XLEN - 1, 0)) 52141d8d239Shappy-lx } .otherwise { 52241d8d239Shappy-lx val lowAddrResult = getShiftAndTruncateData(lowResultShift, lowResultWidth, splitLoadResp(0).data) 52341d8d239Shappy-lx .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 52441d8d239Shappy-lx val highAddrResult = getShiftAndTruncateData(highResultShift, highResultWidth, splitLoadResp(1).data) 52541d8d239Shappy-lx .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 52641d8d239Shappy-lx val catResult = Wire(Vec(XLEN / 8, UInt(8.W))) 52741d8d239Shappy-lx (0 until XLEN / 8) .map { 52841d8d239Shappy-lx case i => { 52941d8d239Shappy-lx when (i.U < lowResultWidth) { 53041d8d239Shappy-lx catResult(i) := lowAddrResult(i) 53141d8d239Shappy-lx } .otherwise { 53241d8d239Shappy-lx catResult(i) := highAddrResult(i.U - lowResultWidth) 53341d8d239Shappy-lx } 53441d8d239Shappy-lx } 53541d8d239Shappy-lx } 53641d8d239Shappy-lx combinedData := rdataHelper(req.uop, (catResult.asUInt)(XLEN - 1, 0)) 53741d8d239Shappy-lx } 53841d8d239Shappy-lx } 53941d8d239Shappy-lx 54041d8d239Shappy-lx io.writeBack.valid := req_valid && (bufferState === s_wb) 54141d8d239Shappy-lx io.writeBack.bits.uop := req.uop 54241d8d239Shappy-lx io.writeBack.bits.uop.exceptionVec := Mux( 54341d8d239Shappy-lx globalMMIO || globalException, 54441d8d239Shappy-lx splitLoadResp(curPtr).uop.exceptionVec, 54541d8d239Shappy-lx 0.U.asTypeOf(ExceptionVec()) // TODO: is this ok? 54641d8d239Shappy-lx ) 54741d8d239Shappy-lx io.writeBack.bits.uop.flushPipe := Mux(globalMMIO || globalException, false.B, true.B) 54841d8d239Shappy-lx io.writeBack.bits.uop.replayInst := false.B 54941d8d239Shappy-lx io.writeBack.bits.data := combinedData 55041d8d239Shappy-lx io.writeBack.bits.debug.isMMIO := globalMMIO 55141d8d239Shappy-lx io.writeBack.bits.debug.isPerfCnt := false.B 55241d8d239Shappy-lx io.writeBack.bits.debug.paddr := req.paddr 55341d8d239Shappy-lx io.writeBack.bits.debug.vaddr := req.vaddr 55441d8d239Shappy-lx 55541d8d239Shappy-lx val flush = req_valid && req.uop.robIdx.needFlush(io.redirect) 55641d8d239Shappy-lx 55741d8d239Shappy-lx when (flush && (bufferState =/= s_idle)) { 55841d8d239Shappy-lx bufferState := s_idle 55941d8d239Shappy-lx req_valid := false.B 56041d8d239Shappy-lx curPtr := 0.U 56141d8d239Shappy-lx unSentLoads := 0.U 56241d8d239Shappy-lx globalException := false.B 56341d8d239Shappy-lx globalMMIO := false.B 56441d8d239Shappy-lx } 56541d8d239Shappy-lx 56641d8d239Shappy-lx // NOTE: spectial case (unaligned load cross page, page fault happens in next page) 56741d8d239Shappy-lx // if exception happens in the higher page address part, overwrite the loadExceptionBuffer vaddr 56841d8d239Shappy-lx val overwriteExpBuf = GatedValidRegNext(req_valid && cross16BytesBoundary && globalException && (curPtr === 1.U)) 569a53daa0fSHaoyuan Feng val overwriteVaddr = GatedRegNext(splitLoadResp(curPtr).vaddr) 570a53daa0fSHaoyuan Feng val overwriteGpaddr = GatedRegNext(splitLoadResp(curPtr).gpaddr) 571*ad415ae0SXiaokun-Pei val overwriteIsForVSnonLeafPTE = GatedRegNext(splitLoadResp(curPtr).isForVSnonLeafPTE) 57241d8d239Shappy-lx 57341d8d239Shappy-lx io.overwriteExpBuf.valid := overwriteExpBuf 574a53daa0fSHaoyuan Feng io.overwriteExpBuf.vaddr := overwriteVaddr 575a53daa0fSHaoyuan Feng io.overwriteExpBuf.gpaddr := overwriteGpaddr 576*ad415ae0SXiaokun-Pei io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE 57741d8d239Shappy-lx 57841d8d239Shappy-lx // when no exception or mmio, flush loadExceptionBuffer at s_wb 57941d8d239Shappy-lx val flushLdExpBuff = GatedValidRegNext(req_valid && (bufferState === s_wb) && !(globalMMIO || globalException)) 58041d8d239Shappy-lx io.flushLdExpBuff := flushLdExpBuff 58141d8d239Shappy-lx 58241d8d239Shappy-lx XSPerfAccumulate("alloc", RegNext(!req_valid) && req_valid) 58341d8d239Shappy-lx XSPerfAccumulate("flush", flush) 58441d8d239Shappy-lx XSPerfAccumulate("flush_idle", flush && (bufferState === s_idle)) 58541d8d239Shappy-lx XSPerfAccumulate("flush_non_idle", flush && (bufferState =/= s_idle)) 58641d8d239Shappy-lx}