141d8d239Shappy-lx/*************************************************************************************** 241d8d239Shappy-lx* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 341d8d239Shappy-lx* Copyright (c) 2020-2021 Peng Cheng Laboratory 441d8d239Shappy-lx* 541d8d239Shappy-lx* XiangShan is licensed under Mulan PSL v2. 641d8d239Shappy-lx* You can use this software according to the terms and conditions of the Mulan PSL v2. 741d8d239Shappy-lx* You may obtain a copy of Mulan PSL v2 at: 841d8d239Shappy-lx* http://license.coscl.org.cn/MulanPSL2 941d8d239Shappy-lx* 1041d8d239Shappy-lx* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 1141d8d239Shappy-lx* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 1241d8d239Shappy-lx* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 1341d8d239Shappy-lx* 1441d8d239Shappy-lx* See the Mulan PSL v2 for more details. 1541d8d239Shappy-lx***************************************************************************************/ 1641d8d239Shappy-lx 1741d8d239Shappy-lxpackage xiangshan.mem 1841d8d239Shappy-lx 1941d8d239Shappy-lximport org.chipsalliance.cde.config.Parameters 2041d8d239Shappy-lximport chisel3._ 2141d8d239Shappy-lximport chisel3.util._ 2241d8d239Shappy-lximport utils._ 2341d8d239Shappy-lximport utility._ 2441d8d239Shappy-lximport xiangshan._ 259e12e8edScz4eimport xiangshan.ExceptionNO._ 269e12e8edScz4eimport xiangshan.frontend.FtqPtr 2741d8d239Shappy-lximport xiangshan.backend.fu.FuConfig._ 28e7ab4635SHuijin Liimport xiangshan.backend.fu.FuType 2941d8d239Shappy-lximport xiangshan.backend.fu.fpu.FPU 3041d8d239Shappy-lximport xiangshan.backend.rob.RobLsqIO 319e12e8edScz4eimport xiangshan.mem.Bundles._ 3241d8d239Shappy-lximport xiangshan.backend.rob.RobPtr 3341d8d239Shappy-lximport xiangshan.backend.Bundles.{MemExuOutput, DynInst} 34282dd18cSsfencevmaimport xiangshan.backend.fu.FuConfig.LduCfg 359e12e8edScz4eimport xiangshan.cache.mmu.HasTlbConst 369e12e8edScz4eimport xiangshan.cache._ 379e12e8edScz4eimport xiangshan.cache.wpu.ReplayCarry 3841d8d239Shappy-lx 3941d8d239Shappy-lxclass LoadMisalignBuffer(implicit p: Parameters) extends XSModule 4041d8d239Shappy-lx with HasCircularQueuePtrHelper 4141d8d239Shappy-lx with HasLoadHelper 4221f3709aShappy-lx with HasTlbConst 4341d8d239Shappy-lx{ 4441d8d239Shappy-lx private val enqPortNum = LoadPipelineWidth 4541d8d239Shappy-lx private val maxSplitNum = 2 4641d8d239Shappy-lx 4741d8d239Shappy-lx require(maxSplitNum == 2) 4841d8d239Shappy-lx 4941d8d239Shappy-lx private val LB = "b00".U(2.W) 5041d8d239Shappy-lx private val LH = "b01".U(2.W) 5141d8d239Shappy-lx private val LW = "b10".U(2.W) 5241d8d239Shappy-lx private val LD = "b11".U(2.W) 5341d8d239Shappy-lx 5441d8d239Shappy-lx // encode of how many bytes to shift or truncate 5541d8d239Shappy-lx private val BYTE0 = "b000".U(3.W) 5641d8d239Shappy-lx private val BYTE1 = "b001".U(3.W) 5741d8d239Shappy-lx private val BYTE2 = "b010".U(3.W) 5841d8d239Shappy-lx private val BYTE3 = "b011".U(3.W) 5941d8d239Shappy-lx private val BYTE4 = "b100".U(3.W) 6041d8d239Shappy-lx private val BYTE5 = "b101".U(3.W) 6141d8d239Shappy-lx private val BYTE6 = "b110".U(3.W) 6241d8d239Shappy-lx private val BYTE7 = "b111".U(3.W) 6341d8d239Shappy-lx 6441d8d239Shappy-lx def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List( 6541d8d239Shappy-lx LB -> 0x1.U, // lb 6641d8d239Shappy-lx LH -> 0x3.U, // lh 6741d8d239Shappy-lx LW -> 0xf.U, // lw 6841d8d239Shappy-lx LD -> 0xff.U // ld 6941d8d239Shappy-lx )) 7041d8d239Shappy-lx 7141d8d239Shappy-lx def getShiftAndTruncateData(shiftEncode: UInt, truncateEncode: UInt, data: UInt) = { 7241d8d239Shappy-lx val shiftData = LookupTree(shiftEncode, List( 7341d8d239Shappy-lx BYTE0 -> data(63, 0), 7441d8d239Shappy-lx BYTE1 -> data(63, 8), 7541d8d239Shappy-lx BYTE2 -> data(63, 16), 7641d8d239Shappy-lx BYTE3 -> data(63, 24), 7741d8d239Shappy-lx BYTE4 -> data(63, 32), 7841d8d239Shappy-lx BYTE5 -> data(63, 40), 7941d8d239Shappy-lx BYTE6 -> data(63, 48), 8041d8d239Shappy-lx BYTE7 -> data(63, 56) 8141d8d239Shappy-lx )) 8241d8d239Shappy-lx val truncateData = LookupTree(truncateEncode, List( 8341d8d239Shappy-lx BYTE0 -> 0.U(XLEN.W), // can not truncate with 0 byte width 8441d8d239Shappy-lx BYTE1 -> shiftData(7, 0), 8541d8d239Shappy-lx BYTE2 -> shiftData(15, 0), 8641d8d239Shappy-lx BYTE3 -> shiftData(23, 0), 8741d8d239Shappy-lx BYTE4 -> shiftData(31, 0), 8841d8d239Shappy-lx BYTE5 -> shiftData(39, 0), 8941d8d239Shappy-lx BYTE6 -> shiftData(47, 0), 9041d8d239Shappy-lx BYTE7 -> shiftData(55, 0) 9141d8d239Shappy-lx )) 9241d8d239Shappy-lx truncateData(XLEN - 1, 0) 9341d8d239Shappy-lx } 9441d8d239Shappy-lx 9541d8d239Shappy-lx def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 9641d8d239Shappy-lx assert(valid.length == bits.length) 9741d8d239Shappy-lx if (valid.length == 0 || valid.length == 1) { 9841d8d239Shappy-lx (valid, bits) 9941d8d239Shappy-lx } else if (valid.length == 2) { 10041d8d239Shappy-lx val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 10141d8d239Shappy-lx for (i <- res.indices) { 10241d8d239Shappy-lx res(i).valid := valid(i) 10341d8d239Shappy-lx res(i).bits := bits(i) 10441d8d239Shappy-lx } 10541d8d239Shappy-lx val oldest = Mux(valid(0) && valid(1), 10641d8d239Shappy-lx Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) || 107b240e1c0SAnzooooo (bits(0).uop.robIdx === bits(1).uop.robIdx && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)), 10841d8d239Shappy-lx Mux(valid(0) && !valid(1), res(0), res(1))) 10941d8d239Shappy-lx (Seq(oldest.valid), Seq(oldest.bits)) 11041d8d239Shappy-lx } else { 11141d8d239Shappy-lx val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 11241d8d239Shappy-lx val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 11341d8d239Shappy-lx selectOldest(left._1 ++ right._1, left._2 ++ right._2) 11441d8d239Shappy-lx } 11541d8d239Shappy-lx } 11641d8d239Shappy-lx 11741d8d239Shappy-lx val io = IO(new Bundle() { 11841d8d239Shappy-lx val redirect = Flipped(Valid(new Redirect)) 1194ec1f462Scz4e val enq = Vec(enqPortNum, Flipped(new MisalignBufferEnqIO)) 12041d8d239Shappy-lx val rob = Flipped(new RobLsqIO) 12141d8d239Shappy-lx val splitLoadReq = Decoupled(new LsPipelineBundle) 12241d8d239Shappy-lx val splitLoadResp = Flipped(Valid(new LqWriteBundle)) 12341d8d239Shappy-lx val writeBack = Decoupled(new MemExuOutput) 124b240e1c0SAnzooooo val vecWriteBack = Decoupled(new VecPipelineFeedbackIO(isVStore = false)) 125b240e1c0SAnzooooo val loadOutValid = Input(Bool()) 126b240e1c0SAnzooooo val loadVecOutValid = Input(Bool()) 12741d8d239Shappy-lx val overwriteExpBuf = Output(new XSBundle { 12841d8d239Shappy-lx val valid = Bool() 129db6cfb5aSHaoyuan Feng val vaddr = UInt(XLEN.W) 13046e9ee74SHaoyuan Feng val isHyper = Bool() 131db6cfb5aSHaoyuan Feng val gpaddr = UInt(XLEN.W) 132ad415ae0SXiaokun-Pei val isForVSnonLeafPTE = Bool() 13341d8d239Shappy-lx }) 13441d8d239Shappy-lx val flushLdExpBuff = Output(Bool()) 135b240e1c0SAnzooooo val loadMisalignFull = Output(Bool()) 13641d8d239Shappy-lx }) 13741d8d239Shappy-lx 13841d8d239Shappy-lx io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool())) 13941d8d239Shappy-lx io.rob.uop := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst)) 14041d8d239Shappy-lx 14141d8d239Shappy-lx val req_valid = RegInit(false.B) 14241d8d239Shappy-lx val req = Reg(new LqWriteBundle) 14341d8d239Shappy-lx 144b240e1c0SAnzooooo io.loadMisalignFull := req_valid 14541d8d239Shappy-lx 1464ec1f462Scz4e (0 until io.enq.length).map{i => 147b240e1c0SAnzooooo if (i == 0) { 1484ec1f462Scz4e io.enq(0).req.ready := !req_valid && io.enq(0).req.valid 149b240e1c0SAnzooooo } 150b240e1c0SAnzooooo else { 1514ec1f462Scz4e io.enq(i).req.ready := !io.enq.take(i).map(_.req.ready).reduce(_ || _) && !req_valid && io.enq(i).req.valid 152b240e1c0SAnzooooo } 15341d8d239Shappy-lx } 15441d8d239Shappy-lx 1554ec1f462Scz4e val select_req_bit = ParallelPriorityMux(io.enq.map(_.req.valid), io.enq.map(_.req.bits)) 1564ec1f462Scz4e val select_req_valid = io.enq.map(_.req.valid).reduce(_ || _) 157b240e1c0SAnzooooo val canEnqValid = !req_valid && !select_req_bit.uop.robIdx.needFlush(io.redirect) && select_req_valid 158b240e1c0SAnzooooo when(canEnqValid) { 159b240e1c0SAnzooooo req := select_req_bit 160b240e1c0SAnzooooo req_valid := true.B 16141d8d239Shappy-lx } 16241d8d239Shappy-lx 16341d8d239Shappy-lx // buffer control: 164b240e1c0SAnzooooo // - s_idle: idle 165b240e1c0SAnzooooo // - s_split: split misalign laod 166b240e1c0SAnzooooo // - s_req: issue a split memory access request 167b240e1c0SAnzooooo // - s_resp: Responds to a split load access request 168b240e1c0SAnzooooo // - s_comb_wakeup_rep: Merge the data and issue a wakeup load 169b240e1c0SAnzooooo // - s_wb: writeback yo rob/vecMergeBuffer 170b240e1c0SAnzooooo val s_idle :: s_split :: s_req :: s_resp :: s_comb_wakeup_rep :: s_wb :: Nil = Enum(6) 17141d8d239Shappy-lx val bufferState = RegInit(s_idle) 17241d8d239Shappy-lx val splitLoadReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle)))) 17341d8d239Shappy-lx val splitLoadResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LqWriteBundle)))) 174282dd18cSsfencevma val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec())) 17541d8d239Shappy-lx val unSentLoads = RegInit(0.U(maxSplitNum.W)) 17641d8d239Shappy-lx val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W)) 177b240e1c0SAnzooooo val needWakeUpReqsWire = Wire(Bool()) 178b240e1c0SAnzooooo val needWakeUpWB = RegInit(false.B) 179b240e1c0SAnzooooo val data_select = RegEnable(genRdataOH(select_req_bit.uop), 0.U(genRdataOH(select_req_bit.uop).getWidth.W), canEnqValid) 18041d8d239Shappy-lx 181*35bb7796SAnzo // if there is exception or uncache in split load 18241d8d239Shappy-lx val globalException = RegInit(false.B) 183*35bb7796SAnzo val globalUncache = RegInit(false.B) 184*35bb7796SAnzo 185*35bb7796SAnzo // debug info 18641d8d239Shappy-lx val globalMMIO = RegInit(false.B) 187*35bb7796SAnzo val globalNC = RegInit(false.B) 18841d8d239Shappy-lx 189da51a7acSAnzo val hasException = io.splitLoadResp.bits.vecActive && 190da51a7acSAnzo ExceptionNO.selectByFu(io.splitLoadResp.bits.uop.exceptionVec, LduCfg).asUInt.orR || TriggerAction.isDmode(io.splitLoadResp.bits.uop.trigger) 191*35bb7796SAnzo val isUncache = io.splitLoadResp.bits.mmio || io.splitLoadResp.bits.nc 192b240e1c0SAnzooooo needWakeUpReqsWire := false.B 19341d8d239Shappy-lx switch(bufferState) { 19441d8d239Shappy-lx is (s_idle) { 195b240e1c0SAnzooooo when (req_valid) { 19641d8d239Shappy-lx bufferState := s_split 19741d8d239Shappy-lx } 19841d8d239Shappy-lx } 19941d8d239Shappy-lx 20041d8d239Shappy-lx is (s_split) { 20141d8d239Shappy-lx bufferState := s_req 20241d8d239Shappy-lx } 20341d8d239Shappy-lx 20441d8d239Shappy-lx is (s_req) { 20541d8d239Shappy-lx when (io.splitLoadReq.fire) { 20641d8d239Shappy-lx bufferState := s_resp 20741d8d239Shappy-lx } 20841d8d239Shappy-lx } 20941d8d239Shappy-lx 21041d8d239Shappy-lx is (s_resp) { 21141d8d239Shappy-lx when (io.splitLoadResp.valid) { 21241d8d239Shappy-lx val clearOh = UIntToOH(curPtr) 213*35bb7796SAnzo when (hasException || isUncache) { 21441d8d239Shappy-lx // commit directly when exception ocurs 215*35bb7796SAnzo // if any split load reaches uncache space, delegate to software loadAddrMisaligned exception 21641d8d239Shappy-lx bufferState := s_wb 21741d8d239Shappy-lx globalException := hasException 218*35bb7796SAnzo globalUncache := isUncache 219*35bb7796SAnzo globalMMIO := io.splitLoadResp.bits.mmio 220*35bb7796SAnzo globalNC := io.splitLoadResp.bits.nc 22141d8d239Shappy-lx } .elsewhen(io.splitLoadResp.bits.rep_info.need_rep || (unSentLoads & ~clearOh).orR) { 22241d8d239Shappy-lx // need replay or still has unsent requests 22341d8d239Shappy-lx bufferState := s_req 22441d8d239Shappy-lx } .otherwise { 22541d8d239Shappy-lx // merge the split load results 226b240e1c0SAnzooooo bufferState := s_comb_wakeup_rep 227b240e1c0SAnzooooo needWakeUpWB := !req.isvec 22841d8d239Shappy-lx } 22941d8d239Shappy-lx } 23041d8d239Shappy-lx } 23141d8d239Shappy-lx 232b240e1c0SAnzooooo is (s_comb_wakeup_rep) { 233b240e1c0SAnzooooo when(!req.isvec) { 234b240e1c0SAnzooooo when(io.splitLoadReq.fire) { 235b240e1c0SAnzooooo bufferState := s_wb 236b240e1c0SAnzooooo }.otherwise { 237b240e1c0SAnzooooo bufferState := s_comb_wakeup_rep 238b240e1c0SAnzooooo } 239b240e1c0SAnzooooo needWakeUpReqsWire := true.B 240b240e1c0SAnzooooo } .otherwise { 24141d8d239Shappy-lx bufferState := s_wb 24241d8d239Shappy-lx } 24341d8d239Shappy-lx 24441d8d239Shappy-lx } 24541d8d239Shappy-lx 246b240e1c0SAnzooooo is (s_wb) { 247b240e1c0SAnzooooo when(req.isvec) { 248b240e1c0SAnzooooo when(io.vecWriteBack.fire) { 24941d8d239Shappy-lx bufferState := s_idle 25041d8d239Shappy-lx req_valid := false.B 25141d8d239Shappy-lx curPtr := 0.U 25241d8d239Shappy-lx unSentLoads := 0.U 25341d8d239Shappy-lx globalException := false.B 254*35bb7796SAnzo globalUncache := false.B 255b240e1c0SAnzooooo needWakeUpWB := false.B 256*35bb7796SAnzo 257*35bb7796SAnzo globalMMIO := false.B 258*35bb7796SAnzo globalNC := false.B 25941d8d239Shappy-lx } 260b240e1c0SAnzooooo 261b240e1c0SAnzooooo } .otherwise { 262b240e1c0SAnzooooo when(io.writeBack.fire) { 263b240e1c0SAnzooooo bufferState := s_idle 264b240e1c0SAnzooooo req_valid := false.B 265b240e1c0SAnzooooo curPtr := 0.U 266b240e1c0SAnzooooo unSentLoads := 0.U 267b240e1c0SAnzooooo globalException := false.B 268*35bb7796SAnzo globalUncache := false.B 269b240e1c0SAnzooooo needWakeUpWB := false.B 270*35bb7796SAnzo 271*35bb7796SAnzo globalMMIO := false.B 272*35bb7796SAnzo globalNC := false.B 27341d8d239Shappy-lx } 27441d8d239Shappy-lx } 27541d8d239Shappy-lx 276b240e1c0SAnzooooo } 277b240e1c0SAnzooooo } 278b240e1c0SAnzooooo 279b240e1c0SAnzooooo val alignedType = Mux(req.isvec, req.alignedType(1,0), req.uop.fuOpType(1, 0)) 280b240e1c0SAnzooooo val highAddress = LookupTree(alignedType, List( 28141d8d239Shappy-lx LB -> 0.U, 28241d8d239Shappy-lx LH -> 1.U, 28341d8d239Shappy-lx LW -> 3.U, 28441d8d239Shappy-lx LD -> 7.U 28541d8d239Shappy-lx )) + req.vaddr(4, 0) 28641d8d239Shappy-lx // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region 28741d8d239Shappy-lx val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4)) 28841d8d239Shappy-lx val aligned16BytesAddr = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U) 28941d8d239Shappy-lx val aligned16BytesSel = req.vaddr(3, 0) 29041d8d239Shappy-lx 29141d8d239Shappy-lx // meta of 128 bit load 29241d8d239Shappy-lx val new128Load = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 29341d8d239Shappy-lx // meta of split loads 29441d8d239Shappy-lx val lowAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 29541d8d239Shappy-lx val highAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 29641d8d239Shappy-lx val lowResultShift = RegInit(0.U(3.W)) // how many bytes should we shift right when got result 29741d8d239Shappy-lx val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from result 29841d8d239Shappy-lx val highResultShift = RegInit(0.U(3.W)) 29941d8d239Shappy-lx val highResultWidth = RegInit(0.U(3.W)) 30041d8d239Shappy-lx 30141d8d239Shappy-lx when (bufferState === s_split) { 30241d8d239Shappy-lx when (!cross16BytesBoundary) { 303b240e1c0SAnzooooo assert(false.B, s"There should be no non-aligned access that does not cross 16Byte boundaries.") 30441d8d239Shappy-lx } .otherwise { 30541d8d239Shappy-lx // split this unaligned load into `maxSplitNum` aligned loads 30641d8d239Shappy-lx unSentLoads := Fill(maxSplitNum, 1.U(1.W)) 30741d8d239Shappy-lx curPtr := 0.U 30841d8d239Shappy-lx lowAddrLoad.uop := req.uop 30941d8d239Shappy-lx lowAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 3109abad712SHaoyuan Feng lowAddrLoad.fullva := req.fullva 31141d8d239Shappy-lx highAddrLoad.uop := req.uop 31241d8d239Shappy-lx highAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 3139abad712SHaoyuan Feng highAddrLoad.fullva := req.fullva 31441d8d239Shappy-lx 315b240e1c0SAnzooooo switch (alignedType(1, 0)) { 31641d8d239Shappy-lx is (LB) { 31741d8d239Shappy-lx assert(false.B, "lb should not trigger miss align") 31841d8d239Shappy-lx } 31941d8d239Shappy-lx 32041d8d239Shappy-lx is (LH) { 32141d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LB 32241d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 32341d8d239Shappy-lx lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 32441d8d239Shappy-lx lowResultShift := BYTE0 32541d8d239Shappy-lx lowResultWidth := BYTE1 32641d8d239Shappy-lx 32741d8d239Shappy-lx highAddrLoad.uop.fuOpType := LB 32841d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 1.U 32941d8d239Shappy-lx highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 33041d8d239Shappy-lx highResultShift := BYTE0 33141d8d239Shappy-lx highResultWidth := BYTE1 33241d8d239Shappy-lx } 33341d8d239Shappy-lx 33441d8d239Shappy-lx is (LW) { 33541d8d239Shappy-lx switch (req.vaddr(1, 0)) { 33641d8d239Shappy-lx is ("b00".U) { 33741d8d239Shappy-lx assert(false.B, "should not trigger miss align") 33841d8d239Shappy-lx } 33941d8d239Shappy-lx 34041d8d239Shappy-lx is ("b01".U) { 34141d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LW 34241d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 1.U 34341d8d239Shappy-lx lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 34441d8d239Shappy-lx lowResultShift := BYTE1 34541d8d239Shappy-lx lowResultWidth := BYTE3 34641d8d239Shappy-lx 34741d8d239Shappy-lx highAddrLoad.uop.fuOpType := LB 34841d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 3.U 34941d8d239Shappy-lx highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 35041d8d239Shappy-lx highResultShift := BYTE0 35141d8d239Shappy-lx highResultWidth := BYTE1 35241d8d239Shappy-lx } 35341d8d239Shappy-lx 35441d8d239Shappy-lx is ("b10".U) { 35541d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LH 35641d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 35741d8d239Shappy-lx lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 35841d8d239Shappy-lx lowResultShift := BYTE0 35941d8d239Shappy-lx lowResultWidth := BYTE2 36041d8d239Shappy-lx 36141d8d239Shappy-lx highAddrLoad.uop.fuOpType := LH 36241d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 2.U 36341d8d239Shappy-lx highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 36441d8d239Shappy-lx highResultShift := BYTE0 36541d8d239Shappy-lx highResultWidth := BYTE2 36641d8d239Shappy-lx } 36741d8d239Shappy-lx 36841d8d239Shappy-lx is ("b11".U) { 36941d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LB 37041d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 37141d8d239Shappy-lx lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 37241d8d239Shappy-lx lowResultShift := BYTE0 37341d8d239Shappy-lx lowResultWidth := BYTE1 37441d8d239Shappy-lx 37541d8d239Shappy-lx highAddrLoad.uop.fuOpType := LW 37641d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 1.U 37741d8d239Shappy-lx highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 37841d8d239Shappy-lx highResultShift := BYTE0 37941d8d239Shappy-lx highResultWidth := BYTE3 38041d8d239Shappy-lx } 38141d8d239Shappy-lx } 38241d8d239Shappy-lx } 38341d8d239Shappy-lx 38441d8d239Shappy-lx is (LD) { 38541d8d239Shappy-lx switch (req.vaddr(2, 0)) { 38641d8d239Shappy-lx is ("b000".U) { 38741d8d239Shappy-lx assert(false.B, "should not trigger miss align") 38841d8d239Shappy-lx } 38941d8d239Shappy-lx 39041d8d239Shappy-lx is ("b001".U) { 39141d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LD 39241d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 1.U 39341d8d239Shappy-lx lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 39441d8d239Shappy-lx lowResultShift := BYTE1 39541d8d239Shappy-lx lowResultWidth := BYTE7 39641d8d239Shappy-lx 39741d8d239Shappy-lx highAddrLoad.uop.fuOpType := LB 39841d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 7.U 39941d8d239Shappy-lx highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 40041d8d239Shappy-lx highResultShift := BYTE0 40141d8d239Shappy-lx highResultWidth := BYTE1 40241d8d239Shappy-lx } 40341d8d239Shappy-lx 40441d8d239Shappy-lx is ("b010".U) { 40541d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LD 40641d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 2.U 40741d8d239Shappy-lx lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 40841d8d239Shappy-lx lowResultShift := BYTE2 40941d8d239Shappy-lx lowResultWidth := BYTE6 41041d8d239Shappy-lx 41141d8d239Shappy-lx highAddrLoad.uop.fuOpType := LH 41241d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 6.U 41341d8d239Shappy-lx highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 41441d8d239Shappy-lx highResultShift := BYTE0 41541d8d239Shappy-lx highResultWidth := BYTE2 41641d8d239Shappy-lx } 41741d8d239Shappy-lx 41841d8d239Shappy-lx is ("b011".U) { 41941d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LD 42041d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 3.U 42141d8d239Shappy-lx lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 42241d8d239Shappy-lx lowResultShift := BYTE3 42341d8d239Shappy-lx lowResultWidth := BYTE5 42441d8d239Shappy-lx 42541d8d239Shappy-lx highAddrLoad.uop.fuOpType := LW 42641d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 5.U 42741d8d239Shappy-lx highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 42841d8d239Shappy-lx highResultShift := BYTE0 42941d8d239Shappy-lx highResultWidth := BYTE3 43041d8d239Shappy-lx } 43141d8d239Shappy-lx 43241d8d239Shappy-lx is ("b100".U) { 43341d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LW 43441d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 43541d8d239Shappy-lx lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 43641d8d239Shappy-lx lowResultShift := BYTE0 43741d8d239Shappy-lx lowResultWidth := BYTE4 43841d8d239Shappy-lx 43941d8d239Shappy-lx highAddrLoad.uop.fuOpType := LW 44041d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 4.U 44141d8d239Shappy-lx highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 44241d8d239Shappy-lx highResultShift := BYTE0 44341d8d239Shappy-lx highResultWidth := BYTE4 44441d8d239Shappy-lx } 44541d8d239Shappy-lx 44641d8d239Shappy-lx is ("b101".U) { 44741d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LW 44841d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 1.U 44941d8d239Shappy-lx lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 45041d8d239Shappy-lx lowResultShift := BYTE1 45141d8d239Shappy-lx lowResultWidth := BYTE3 45241d8d239Shappy-lx 45341d8d239Shappy-lx highAddrLoad.uop.fuOpType := LD 45441d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 3.U 45541d8d239Shappy-lx highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 45641d8d239Shappy-lx highResultShift := BYTE0 45741d8d239Shappy-lx highResultWidth := BYTE5 45841d8d239Shappy-lx } 45941d8d239Shappy-lx 46041d8d239Shappy-lx is ("b110".U) { 46141d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LH 46241d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 46341d8d239Shappy-lx lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 46441d8d239Shappy-lx lowResultShift := BYTE0 46541d8d239Shappy-lx lowResultWidth := BYTE2 46641d8d239Shappy-lx 46741d8d239Shappy-lx highAddrLoad.uop.fuOpType := LD 46841d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 2.U 46941d8d239Shappy-lx highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 47041d8d239Shappy-lx highResultShift := BYTE0 47141d8d239Shappy-lx highResultWidth := BYTE6 47241d8d239Shappy-lx } 47341d8d239Shappy-lx 47441d8d239Shappy-lx is ("b111".U) { 47541d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LB 47641d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 47741d8d239Shappy-lx lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 47841d8d239Shappy-lx lowResultShift := BYTE0 47941d8d239Shappy-lx lowResultWidth := BYTE1 48041d8d239Shappy-lx 48141d8d239Shappy-lx highAddrLoad.uop.fuOpType := LD 48241d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 1.U 48341d8d239Shappy-lx highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 48441d8d239Shappy-lx highResultShift := BYTE0 48541d8d239Shappy-lx highResultWidth := BYTE7 48641d8d239Shappy-lx } 48741d8d239Shappy-lx } 48841d8d239Shappy-lx } 48941d8d239Shappy-lx } 49041d8d239Shappy-lx 49141d8d239Shappy-lx splitLoadReqs(0) := lowAddrLoad 49241d8d239Shappy-lx splitLoadReqs(1) := highAddrLoad 49341d8d239Shappy-lx } 494282dd18cSsfencevma exceptionVec := 0.U.asTypeOf(exceptionVec.cloneType) 49541d8d239Shappy-lx } 49641d8d239Shappy-lx 497b240e1c0SAnzooooo io.splitLoadReq.valid := req_valid && (bufferState === s_req || bufferState === s_comb_wakeup_rep && needWakeUpReqsWire && !req.isvec) 49841d8d239Shappy-lx io.splitLoadReq.bits := splitLoadReqs(curPtr) 499b240e1c0SAnzooooo io.splitLoadReq.bits.isvec := req.isvec 500b240e1c0SAnzooooo io.splitLoadReq.bits.misalignNeedWakeUp := needWakeUpReqsWire 501b240e1c0SAnzooooo io.splitLoadReq.bits.isFinalSplit := curPtr(0) && !needWakeUpReqsWire 5024c5e04f2Shappy-lx // Restore the information of H extension load 5034c5e04f2Shappy-lx // bit encoding: | hlv 1 | hlvx 1 | is unsigned(1bit) | size(2bit) | 5044c5e04f2Shappy-lx val reqIsHlv = LSUOpType.isHlv(req.uop.fuOpType) 5054c5e04f2Shappy-lx val reqIsHlvx = LSUOpType.isHlvx(req.uop.fuOpType) 506b240e1c0SAnzooooo io.splitLoadReq.bits.uop.fuOpType := Mux(req.isvec, req.uop.fuOpType, Cat(reqIsHlv, reqIsHlvx, 0.U(1.W), splitLoadReqs(curPtr).uop.fuOpType(1, 0))) 507b240e1c0SAnzooooo io.splitLoadReq.bits.alignedType := Mux(req.isvec, splitLoadReqs(curPtr).uop.fuOpType(1, 0), req.alignedType) 50841d8d239Shappy-lx 50941d8d239Shappy-lx when (io.splitLoadResp.valid) { 510282dd18cSsfencevma val resp = io.splitLoadResp.bits 51141d8d239Shappy-lx splitLoadResp(curPtr) := io.splitLoadResp.bits 512*35bb7796SAnzo when (isUncache) { 51341d8d239Shappy-lx unSentLoads := 0.U 514e7ab4635SHuijin Li exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(exceptionVec.cloneType), LduCfg) 51541d8d239Shappy-lx // delegate to software 516282dd18cSsfencevma exceptionVec(loadAddrMisaligned) := true.B 51741d8d239Shappy-lx } .elsewhen (hasException) { 51841d8d239Shappy-lx unSentLoads := 0.U 519282dd18cSsfencevma LduCfg.exceptionOut.map(no => exceptionVec(no) := exceptionVec(no) || resp.uop.exceptionVec(no)) 52041d8d239Shappy-lx } .elsewhen (!io.splitLoadResp.bits.rep_info.need_rep) { 52141d8d239Shappy-lx unSentLoads := unSentLoads & ~UIntToOH(curPtr) 52241d8d239Shappy-lx curPtr := curPtr + 1.U 523282dd18cSsfencevma exceptionVec := 0.U.asTypeOf(ExceptionVec()) 52441d8d239Shappy-lx } 52541d8d239Shappy-lx } 52641d8d239Shappy-lx 52741d8d239Shappy-lx val combinedData = RegInit(0.U(XLEN.W)) 52841d8d239Shappy-lx 529b240e1c0SAnzooooo when (bufferState === s_comb_wakeup_rep) { 53041d8d239Shappy-lx val lowAddrResult = getShiftAndTruncateData(lowResultShift, lowResultWidth, splitLoadResp(0).data) 53141d8d239Shappy-lx .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 53241d8d239Shappy-lx val highAddrResult = getShiftAndTruncateData(highResultShift, highResultWidth, splitLoadResp(1).data) 53341d8d239Shappy-lx .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 53441d8d239Shappy-lx val catResult = Wire(Vec(XLEN / 8, UInt(8.W))) 53541d8d239Shappy-lx (0 until XLEN / 8) .map { 53641d8d239Shappy-lx case i => { 53741d8d239Shappy-lx when (i.U < lowResultWidth) { 53841d8d239Shappy-lx catResult(i) := lowAddrResult(i) 53941d8d239Shappy-lx } .otherwise { 54041d8d239Shappy-lx catResult(i) := highAddrResult(i.U - lowResultWidth) 54141d8d239Shappy-lx } 54241d8d239Shappy-lx } 54341d8d239Shappy-lx } 544b240e1c0SAnzooooo combinedData := Mux(req.isvec, rdataVecHelper(req.alignedType, (catResult.asUInt)(XLEN - 1, 0)), rdataHelper(req.uop, (catResult.asUInt)(XLEN - 1, 0))) 545b240e1c0SAnzooooo 54641d8d239Shappy-lx } 54741d8d239Shappy-lx 548*35bb7796SAnzo io.writeBack.valid := req_valid && (bufferState === s_wb) && (io.splitLoadResp.valid && io.splitLoadResp.bits.misalignNeedWakeUp || globalUncache || globalException) && !io.loadOutValid && !req.isvec 54941d8d239Shappy-lx io.writeBack.bits.uop := req.uop 550282dd18cSsfencevma io.writeBack.bits.uop.exceptionVec := DontCare 551*35bb7796SAnzo LduCfg.exceptionOut.map(no => io.writeBack.bits.uop.exceptionVec(no) := (globalUncache || globalException) && exceptionVec(no)) 552*35bb7796SAnzo io.writeBack.bits.uop.rfWen := !globalException && !globalUncache && req.uop.rfWen 553e7ab4635SHuijin Li io.writeBack.bits.uop.fuType := FuType.ldu.U 554b240e1c0SAnzooooo io.writeBack.bits.uop.flushPipe := false.B 55541d8d239Shappy-lx io.writeBack.bits.uop.replayInst := false.B 556b240e1c0SAnzooooo io.writeBack.bits.data := newRdataHelper(data_select, combinedData) 557b240e1c0SAnzooooo io.writeBack.bits.isFromLoadUnit := needWakeUpWB 558*35bb7796SAnzo // Misaligned accesses to uncache space trigger exceptions, so theoretically these signals won't do anything practical. 559*35bb7796SAnzo // But let's get them assigned correctly. 56041d8d239Shappy-lx io.writeBack.bits.debug.isMMIO := globalMMIO 561*35bb7796SAnzo io.writeBack.bits.debug.isNC := globalNC 56241d8d239Shappy-lx io.writeBack.bits.debug.isPerfCnt := false.B 56341d8d239Shappy-lx io.writeBack.bits.debug.paddr := req.paddr 56441d8d239Shappy-lx io.writeBack.bits.debug.vaddr := req.vaddr 56541d8d239Shappy-lx 566b240e1c0SAnzooooo 567b240e1c0SAnzooooo // vector output 568b240e1c0SAnzooooo io.vecWriteBack.valid := req_valid && (bufferState === s_wb) && !io.loadVecOutValid && req.isvec 569b240e1c0SAnzooooo 570b240e1c0SAnzooooo io.vecWriteBack.bits.alignedType := req.alignedType 571b240e1c0SAnzooooo io.vecWriteBack.bits.vecFeedback := true.B 572b240e1c0SAnzooooo io.vecWriteBack.bits.vecdata.get := combinedData 573b240e1c0SAnzooooo io.vecWriteBack.bits.isvec := req.isvec 574b240e1c0SAnzooooo io.vecWriteBack.bits.elemIdx := req.elemIdx 575b240e1c0SAnzooooo io.vecWriteBack.bits.elemIdxInsideVd.get := req.elemIdxInsideVd 576b240e1c0SAnzooooo io.vecWriteBack.bits.mask := req.mask 577b240e1c0SAnzooooo io.vecWriteBack.bits.reg_offset.get := 0.U 578b240e1c0SAnzooooo io.vecWriteBack.bits.usSecondInv := req.usSecondInv 579b240e1c0SAnzooooo io.vecWriteBack.bits.mBIndex := req.mbIndex 580b240e1c0SAnzooooo io.vecWriteBack.bits.hit := true.B 581b240e1c0SAnzooooo io.vecWriteBack.bits.sourceType := RSFeedbackType.lrqFull 582b240e1c0SAnzooooo io.vecWriteBack.bits.trigger := TriggerAction.None 583b240e1c0SAnzooooo io.vecWriteBack.bits.flushState := DontCare 584b240e1c0SAnzooooo io.vecWriteBack.bits.exceptionVec := ExceptionNO.selectByFu(exceptionVec, VlduCfg) 585da51a7acSAnzo io.vecWriteBack.bits.hasException := globalException 586b240e1c0SAnzooooo io.vecWriteBack.bits.vaddr := req.fullva 587b240e1c0SAnzooooo io.vecWriteBack.bits.vaNeedExt := req.vaNeedExt 588b240e1c0SAnzooooo io.vecWriteBack.bits.gpaddr := req.gpaddr 589b240e1c0SAnzooooo io.vecWriteBack.bits.isForVSnonLeafPTE := req.isForVSnonLeafPTE 590*35bb7796SAnzo io.vecWriteBack.bits.mmio := globalMMIO 591b240e1c0SAnzooooo io.vecWriteBack.bits.vstart := req.uop.vpu.vstart 592b240e1c0SAnzooooo io.vecWriteBack.bits.vecTriggerMask := req.vecTriggerMask 593*35bb7796SAnzo io.vecWriteBack.bits.nc := globalNC 594b240e1c0SAnzooooo 595b240e1c0SAnzooooo 59641d8d239Shappy-lx val flush = req_valid && req.uop.robIdx.needFlush(io.redirect) 59741d8d239Shappy-lx 598b240e1c0SAnzooooo when (flush) { 59941d8d239Shappy-lx bufferState := s_idle 60041d8d239Shappy-lx req_valid := false.B 60141d8d239Shappy-lx curPtr := 0.U 60241d8d239Shappy-lx unSentLoads := 0.U 60341d8d239Shappy-lx globalException := false.B 604*35bb7796SAnzo globalUncache := false.B 605*35bb7796SAnzo 60641d8d239Shappy-lx globalMMIO := false.B 607*35bb7796SAnzo globalNC := false.B 60841d8d239Shappy-lx } 60941d8d239Shappy-lx 61041d8d239Shappy-lx // NOTE: spectial case (unaligned load cross page, page fault happens in next page) 61141d8d239Shappy-lx // if exception happens in the higher page address part, overwrite the loadExceptionBuffer vaddr 6126444fe09Sgood-circle val shouldOverwrite = req_valid && globalException 6136444fe09Sgood-circle val overwriteExpBuf = GatedValidRegNext(shouldOverwrite) 6146444fe09Sgood-circle val overwriteVaddr = RegEnable( 6156444fe09Sgood-circle Mux( 6169abad712SHaoyuan Feng cross16BytesBoundary && (curPtr === 1.U), 6179abad712SHaoyuan Feng splitLoadResp(curPtr).vaddr, 6186444fe09Sgood-circle splitLoadResp(curPtr).fullva), 6196444fe09Sgood-circle shouldOverwrite) 620e80f666eSHaoyuan Feng val overwriteGpaddr = RegEnable(splitLoadResp(curPtr).gpaddr, shouldOverwrite) 6216444fe09Sgood-circle val overwriteIsHyper = RegEnable(splitLoadResp(curPtr).isHyper, shouldOverwrite) 6226444fe09Sgood-circle val overwriteIsForVSnonLeafPTE = RegEnable(splitLoadResp(curPtr).isForVSnonLeafPTE, shouldOverwrite) 62341d8d239Shappy-lx 624b240e1c0SAnzooooo //TODO In theory, there is no need to overwrite, but for now, the signal is retained in the code in this way. 625b240e1c0SAnzooooo // and the signal will be removed after sufficient verification. 626b240e1c0SAnzooooo io.overwriteExpBuf.valid := false.B 627a53daa0fSHaoyuan Feng io.overwriteExpBuf.vaddr := overwriteVaddr 62846e9ee74SHaoyuan Feng io.overwriteExpBuf.isHyper := overwriteIsHyper 629a53daa0fSHaoyuan Feng io.overwriteExpBuf.gpaddr := overwriteGpaddr 630ad415ae0SXiaokun-Pei io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE 63141d8d239Shappy-lx 632*35bb7796SAnzo // when no exception or uncache, flush loadExceptionBuffer at s_wb 633*35bb7796SAnzo val flushLdExpBuff = GatedValidRegNext(req_valid && (bufferState === s_wb) && !(globalUncache || globalException)) 63441d8d239Shappy-lx io.flushLdExpBuff := flushLdExpBuff 63541d8d239Shappy-lx 63641d8d239Shappy-lx XSPerfAccumulate("alloc", RegNext(!req_valid) && req_valid) 63741d8d239Shappy-lx XSPerfAccumulate("flush", flush) 63841d8d239Shappy-lx XSPerfAccumulate("flush_idle", flush && (bufferState === s_idle)) 63941d8d239Shappy-lx XSPerfAccumulate("flush_non_idle", flush && (bufferState =/= s_idle)) 64041d8d239Shappy-lx} 641