1*41d8d239Shappy-lx/*************************************************************************************** 2*41d8d239Shappy-lx* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3*41d8d239Shappy-lx* Copyright (c) 2020-2021 Peng Cheng Laboratory 4*41d8d239Shappy-lx* 5*41d8d239Shappy-lx* XiangShan is licensed under Mulan PSL v2. 6*41d8d239Shappy-lx* You can use this software according to the terms and conditions of the Mulan PSL v2. 7*41d8d239Shappy-lx* You may obtain a copy of Mulan PSL v2 at: 8*41d8d239Shappy-lx* http://license.coscl.org.cn/MulanPSL2 9*41d8d239Shappy-lx* 10*41d8d239Shappy-lx* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11*41d8d239Shappy-lx* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12*41d8d239Shappy-lx* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13*41d8d239Shappy-lx* 14*41d8d239Shappy-lx* See the Mulan PSL v2 for more details. 15*41d8d239Shappy-lx***************************************************************************************/ 16*41d8d239Shappy-lx 17*41d8d239Shappy-lxpackage xiangshan.mem 18*41d8d239Shappy-lx 19*41d8d239Shappy-lximport org.chipsalliance.cde.config.Parameters 20*41d8d239Shappy-lximport chisel3._ 21*41d8d239Shappy-lximport chisel3.util._ 22*41d8d239Shappy-lximport utils._ 23*41d8d239Shappy-lximport utility._ 24*41d8d239Shappy-lximport xiangshan._ 25*41d8d239Shappy-lximport xiangshan.backend.fu.FuConfig._ 26*41d8d239Shappy-lximport xiangshan.backend.fu.fpu.FPU 27*41d8d239Shappy-lximport xiangshan.backend.rob.RobLsqIO 28*41d8d239Shappy-lximport xiangshan.cache._ 29*41d8d239Shappy-lximport xiangshan.frontend.FtqPtr 30*41d8d239Shappy-lximport xiangshan.ExceptionNO._ 31*41d8d239Shappy-lximport xiangshan.cache.wpu.ReplayCarry 32*41d8d239Shappy-lximport xiangshan.backend.rob.RobPtr 33*41d8d239Shappy-lximport xiangshan.backend.Bundles.{MemExuOutput, DynInst} 34*41d8d239Shappy-lx 35*41d8d239Shappy-lxclass LoadMisalignBuffer(implicit p: Parameters) extends XSModule 36*41d8d239Shappy-lx with HasCircularQueuePtrHelper 37*41d8d239Shappy-lx with HasLoadHelper 38*41d8d239Shappy-lx{ 39*41d8d239Shappy-lx private val enqPortNum = LoadPipelineWidth 40*41d8d239Shappy-lx private val maxSplitNum = 2 41*41d8d239Shappy-lx 42*41d8d239Shappy-lx require(maxSplitNum == 2) 43*41d8d239Shappy-lx 44*41d8d239Shappy-lx private val LB = "b00".U(2.W) 45*41d8d239Shappy-lx private val LH = "b01".U(2.W) 46*41d8d239Shappy-lx private val LW = "b10".U(2.W) 47*41d8d239Shappy-lx private val LD = "b11".U(2.W) 48*41d8d239Shappy-lx 49*41d8d239Shappy-lx // encode of how many bytes to shift or truncate 50*41d8d239Shappy-lx private val BYTE0 = "b000".U(3.W) 51*41d8d239Shappy-lx private val BYTE1 = "b001".U(3.W) 52*41d8d239Shappy-lx private val BYTE2 = "b010".U(3.W) 53*41d8d239Shappy-lx private val BYTE3 = "b011".U(3.W) 54*41d8d239Shappy-lx private val BYTE4 = "b100".U(3.W) 55*41d8d239Shappy-lx private val BYTE5 = "b101".U(3.W) 56*41d8d239Shappy-lx private val BYTE6 = "b110".U(3.W) 57*41d8d239Shappy-lx private val BYTE7 = "b111".U(3.W) 58*41d8d239Shappy-lx 59*41d8d239Shappy-lx def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List( 60*41d8d239Shappy-lx LB -> 0x1.U, // lb 61*41d8d239Shappy-lx LH -> 0x3.U, // lh 62*41d8d239Shappy-lx LW -> 0xf.U, // lw 63*41d8d239Shappy-lx LD -> 0xff.U // ld 64*41d8d239Shappy-lx )) 65*41d8d239Shappy-lx 66*41d8d239Shappy-lx def getShiftAndTruncateData(shiftEncode: UInt, truncateEncode: UInt, data: UInt) = { 67*41d8d239Shappy-lx val shiftData = LookupTree(shiftEncode, List( 68*41d8d239Shappy-lx BYTE0 -> data(63, 0), 69*41d8d239Shappy-lx BYTE1 -> data(63, 8), 70*41d8d239Shappy-lx BYTE2 -> data(63, 16), 71*41d8d239Shappy-lx BYTE3 -> data(63, 24), 72*41d8d239Shappy-lx BYTE4 -> data(63, 32), 73*41d8d239Shappy-lx BYTE5 -> data(63, 40), 74*41d8d239Shappy-lx BYTE6 -> data(63, 48), 75*41d8d239Shappy-lx BYTE7 -> data(63, 56) 76*41d8d239Shappy-lx )) 77*41d8d239Shappy-lx val truncateData = LookupTree(truncateEncode, List( 78*41d8d239Shappy-lx BYTE0 -> 0.U(XLEN.W), // can not truncate with 0 byte width 79*41d8d239Shappy-lx BYTE1 -> shiftData(7, 0), 80*41d8d239Shappy-lx BYTE2 -> shiftData(15, 0), 81*41d8d239Shappy-lx BYTE3 -> shiftData(23, 0), 82*41d8d239Shappy-lx BYTE4 -> shiftData(31, 0), 83*41d8d239Shappy-lx BYTE5 -> shiftData(39, 0), 84*41d8d239Shappy-lx BYTE6 -> shiftData(47, 0), 85*41d8d239Shappy-lx BYTE7 -> shiftData(55, 0) 86*41d8d239Shappy-lx )) 87*41d8d239Shappy-lx truncateData(XLEN - 1, 0) 88*41d8d239Shappy-lx } 89*41d8d239Shappy-lx 90*41d8d239Shappy-lx def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 91*41d8d239Shappy-lx assert(valid.length == bits.length) 92*41d8d239Shappy-lx if (valid.length == 0 || valid.length == 1) { 93*41d8d239Shappy-lx (valid, bits) 94*41d8d239Shappy-lx } else if (valid.length == 2) { 95*41d8d239Shappy-lx val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 96*41d8d239Shappy-lx for (i <- res.indices) { 97*41d8d239Shappy-lx res(i).valid := valid(i) 98*41d8d239Shappy-lx res(i).bits := bits(i) 99*41d8d239Shappy-lx } 100*41d8d239Shappy-lx val oldest = Mux(valid(0) && valid(1), 101*41d8d239Shappy-lx Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) || 102*41d8d239Shappy-lx (isNotBefore(bits(0).uop.robIdx, bits(1).uop.robIdx) && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)), 103*41d8d239Shappy-lx Mux(valid(0) && !valid(1), res(0), res(1))) 104*41d8d239Shappy-lx (Seq(oldest.valid), Seq(oldest.bits)) 105*41d8d239Shappy-lx } else { 106*41d8d239Shappy-lx val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 107*41d8d239Shappy-lx val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 108*41d8d239Shappy-lx selectOldest(left._1 ++ right._1, left._2 ++ right._2) 109*41d8d239Shappy-lx } 110*41d8d239Shappy-lx } 111*41d8d239Shappy-lx 112*41d8d239Shappy-lx val io = IO(new Bundle() { 113*41d8d239Shappy-lx val redirect = Flipped(Valid(new Redirect)) 114*41d8d239Shappy-lx val req = Vec(enqPortNum, Flipped(Valid(new LqWriteBundle))) 115*41d8d239Shappy-lx val rob = Flipped(new RobLsqIO) 116*41d8d239Shappy-lx val splitLoadReq = Decoupled(new LsPipelineBundle) 117*41d8d239Shappy-lx val splitLoadResp = Flipped(Valid(new LqWriteBundle)) 118*41d8d239Shappy-lx val writeBack = Decoupled(new MemExuOutput) 119*41d8d239Shappy-lx val overwriteExpBuf = Output(new XSBundle { 120*41d8d239Shappy-lx val valid = Bool() 121*41d8d239Shappy-lx val vaddr = UInt(VAddrBits.W) 122*41d8d239Shappy-lx }) 123*41d8d239Shappy-lx val flushLdExpBuff = Output(Bool()) 124*41d8d239Shappy-lx }) 125*41d8d239Shappy-lx 126*41d8d239Shappy-lx io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool())) 127*41d8d239Shappy-lx io.rob.uop := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst)) 128*41d8d239Shappy-lx 129*41d8d239Shappy-lx val req_valid = RegInit(false.B) 130*41d8d239Shappy-lx val req = Reg(new LqWriteBundle) 131*41d8d239Shappy-lx 132*41d8d239Shappy-lx // enqueue 133*41d8d239Shappy-lx // s1: 134*41d8d239Shappy-lx val s1_req = VecInit(io.req.map(_.bits)) 135*41d8d239Shappy-lx val s1_valid = VecInit(io.req.map(x => x.valid)) 136*41d8d239Shappy-lx 137*41d8d239Shappy-lx // s2: delay 1 cycle 138*41d8d239Shappy-lx val s2_req = RegNext(s1_req) 139*41d8d239Shappy-lx val s2_valid = (0 until enqPortNum).map(i => 140*41d8d239Shappy-lx RegNext(s1_valid(i)) && 141*41d8d239Shappy-lx !s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) && 142*41d8d239Shappy-lx !s2_req(i).uop.robIdx.needFlush(io.redirect) 143*41d8d239Shappy-lx ) 144*41d8d239Shappy-lx val s2_miss_aligned = s2_req.map(x => x.uop.exceptionVec(loadAddrMisaligned)) 145*41d8d239Shappy-lx 146*41d8d239Shappy-lx val s2_enqueue = Wire(Vec(enqPortNum, Bool())) 147*41d8d239Shappy-lx for (w <- 0 until enqPortNum) { 148*41d8d239Shappy-lx s2_enqueue(w) := s2_valid(w) && s2_miss_aligned(w) 149*41d8d239Shappy-lx } 150*41d8d239Shappy-lx 151*41d8d239Shappy-lx when (req_valid && req.uop.robIdx.needFlush(io.redirect)) { 152*41d8d239Shappy-lx req_valid := s2_enqueue.asUInt.orR 153*41d8d239Shappy-lx } .elsewhen (s2_enqueue.asUInt.orR) { 154*41d8d239Shappy-lx req_valid := req_valid || true.B 155*41d8d239Shappy-lx } 156*41d8d239Shappy-lx 157*41d8d239Shappy-lx val reqSel = selectOldest(s2_enqueue, s2_req) 158*41d8d239Shappy-lx 159*41d8d239Shappy-lx when (req_valid) { 160*41d8d239Shappy-lx req := Mux( 161*41d8d239Shappy-lx reqSel._1(0) && (isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx) || (isNotBefore(req.uop.robIdx, reqSel._2(0).uop.robIdx) && req.uop.uopIdx > reqSel._2(0).uop.uopIdx)), 162*41d8d239Shappy-lx reqSel._2(0), 163*41d8d239Shappy-lx req) 164*41d8d239Shappy-lx } .elsewhen (s2_enqueue.asUInt.orR) { 165*41d8d239Shappy-lx req := reqSel._2(0) 166*41d8d239Shappy-lx } 167*41d8d239Shappy-lx 168*41d8d239Shappy-lx val robMatch = req_valid && io.rob.pendingld && (io.rob.pendingPtr === req.uop.robIdx) 169*41d8d239Shappy-lx 170*41d8d239Shappy-lx // buffer control: 171*41d8d239Shappy-lx // - split miss-aligned load into aligned loads 172*41d8d239Shappy-lx // - send split load to ldu and get result from ldu 173*41d8d239Shappy-lx // - merge them and write back to rob 174*41d8d239Shappy-lx val s_idle :: s_split :: s_req :: s_resp :: s_comb :: s_wb :: s_wait :: Nil = Enum(7) 175*41d8d239Shappy-lx val bufferState = RegInit(s_idle) 176*41d8d239Shappy-lx val splitLoadReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle)))) 177*41d8d239Shappy-lx val splitLoadResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LqWriteBundle)))) 178*41d8d239Shappy-lx val unSentLoads = RegInit(0.U(maxSplitNum.W)) 179*41d8d239Shappy-lx val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W)) 180*41d8d239Shappy-lx 181*41d8d239Shappy-lx // if there is exception or mmio in split load 182*41d8d239Shappy-lx val globalException = RegInit(false.B) 183*41d8d239Shappy-lx val globalMMIO = RegInit(false.B) 184*41d8d239Shappy-lx 185*41d8d239Shappy-lx val hasException = ExceptionNO.selectByFu(io.splitLoadResp.bits.uop.exceptionVec, LduCfg).asUInt.orR 186*41d8d239Shappy-lx val isMMIO = io.splitLoadResp.bits.mmio 187*41d8d239Shappy-lx 188*41d8d239Shappy-lx switch(bufferState) { 189*41d8d239Shappy-lx is (s_idle) { 190*41d8d239Shappy-lx when (robMatch) { 191*41d8d239Shappy-lx bufferState := s_split 192*41d8d239Shappy-lx } 193*41d8d239Shappy-lx } 194*41d8d239Shappy-lx 195*41d8d239Shappy-lx is (s_split) { 196*41d8d239Shappy-lx bufferState := s_req 197*41d8d239Shappy-lx } 198*41d8d239Shappy-lx 199*41d8d239Shappy-lx is (s_req) { 200*41d8d239Shappy-lx when (io.splitLoadReq.fire) { 201*41d8d239Shappy-lx bufferState := s_resp 202*41d8d239Shappy-lx } 203*41d8d239Shappy-lx } 204*41d8d239Shappy-lx 205*41d8d239Shappy-lx is (s_resp) { 206*41d8d239Shappy-lx when (io.splitLoadResp.valid) { 207*41d8d239Shappy-lx val clearOh = UIntToOH(curPtr) 208*41d8d239Shappy-lx when (hasException || isMMIO) { 209*41d8d239Shappy-lx // commit directly when exception ocurs 210*41d8d239Shappy-lx // if any split load reaches mmio space, delegate to software loadAddrMisaligned exception 211*41d8d239Shappy-lx bufferState := s_wb 212*41d8d239Shappy-lx globalException := hasException 213*41d8d239Shappy-lx globalMMIO := isMMIO 214*41d8d239Shappy-lx } .elsewhen(io.splitLoadResp.bits.rep_info.need_rep || (unSentLoads & ~clearOh).orR) { 215*41d8d239Shappy-lx // need replay or still has unsent requests 216*41d8d239Shappy-lx bufferState := s_req 217*41d8d239Shappy-lx } .otherwise { 218*41d8d239Shappy-lx // merge the split load results 219*41d8d239Shappy-lx bufferState := s_comb 220*41d8d239Shappy-lx } 221*41d8d239Shappy-lx } 222*41d8d239Shappy-lx } 223*41d8d239Shappy-lx 224*41d8d239Shappy-lx is (s_comb) { 225*41d8d239Shappy-lx bufferState := s_wb 226*41d8d239Shappy-lx } 227*41d8d239Shappy-lx 228*41d8d239Shappy-lx is (s_wb) { 229*41d8d239Shappy-lx when(io.writeBack.fire) { 230*41d8d239Shappy-lx bufferState := s_wait 231*41d8d239Shappy-lx } 232*41d8d239Shappy-lx } 233*41d8d239Shappy-lx 234*41d8d239Shappy-lx is (s_wait) { 235*41d8d239Shappy-lx when(io.rob.lcommit =/= 0.U || req.uop.robIdx.needFlush(io.redirect)) { 236*41d8d239Shappy-lx // rob commits the unaligned load or handled the exception, reset all state 237*41d8d239Shappy-lx bufferState := s_idle 238*41d8d239Shappy-lx req_valid := false.B 239*41d8d239Shappy-lx curPtr := 0.U 240*41d8d239Shappy-lx unSentLoads := 0.U 241*41d8d239Shappy-lx globalException := false.B 242*41d8d239Shappy-lx globalMMIO := false.B 243*41d8d239Shappy-lx } 244*41d8d239Shappy-lx } 245*41d8d239Shappy-lx } 246*41d8d239Shappy-lx 247*41d8d239Shappy-lx val highAddress = LookupTree(req.uop.fuOpType(1, 0), List( 248*41d8d239Shappy-lx LB -> 0.U, 249*41d8d239Shappy-lx LH -> 1.U, 250*41d8d239Shappy-lx LW -> 3.U, 251*41d8d239Shappy-lx LD -> 7.U 252*41d8d239Shappy-lx )) + req.vaddr(4, 0) 253*41d8d239Shappy-lx // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region 254*41d8d239Shappy-lx val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4)) 255*41d8d239Shappy-lx val aligned16BytesAddr = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U) 256*41d8d239Shappy-lx val aligned16BytesSel = req.vaddr(3, 0) 257*41d8d239Shappy-lx 258*41d8d239Shappy-lx // meta of 128 bit load 259*41d8d239Shappy-lx val new128Load = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 260*41d8d239Shappy-lx // meta of split loads 261*41d8d239Shappy-lx val lowAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 262*41d8d239Shappy-lx val highAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 263*41d8d239Shappy-lx val lowResultShift = RegInit(0.U(3.W)) // how many bytes should we shift right when got result 264*41d8d239Shappy-lx val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from result 265*41d8d239Shappy-lx val highResultShift = RegInit(0.U(3.W)) 266*41d8d239Shappy-lx val highResultWidth = RegInit(0.U(3.W)) 267*41d8d239Shappy-lx 268*41d8d239Shappy-lx when (bufferState === s_split) { 269*41d8d239Shappy-lx when (!cross16BytesBoundary) { 270*41d8d239Shappy-lx // change this unaligned load into a 128 bits load 271*41d8d239Shappy-lx unSentLoads := 1.U 272*41d8d239Shappy-lx curPtr := 0.U 273*41d8d239Shappy-lx new128Load.vaddr := aligned16BytesAddr 274*41d8d239Shappy-lx // new128Load.mask := (getMask(req.uop.fuOpType(1, 0)) << aligned16BytesSel).asUInt 275*41d8d239Shappy-lx new128Load.mask := 0xffff.U 276*41d8d239Shappy-lx new128Load.uop := req.uop 277*41d8d239Shappy-lx new128Load.uop.exceptionVec(loadAddrMisaligned) := false.B 278*41d8d239Shappy-lx new128Load.is128bit := true.B 279*41d8d239Shappy-lx splitLoadReqs(0) := new128Load 280*41d8d239Shappy-lx } .otherwise { 281*41d8d239Shappy-lx // split this unaligned load into `maxSplitNum` aligned loads 282*41d8d239Shappy-lx unSentLoads := Fill(maxSplitNum, 1.U(1.W)) 283*41d8d239Shappy-lx curPtr := 0.U 284*41d8d239Shappy-lx lowAddrLoad.uop := req.uop 285*41d8d239Shappy-lx lowAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 286*41d8d239Shappy-lx highAddrLoad.uop := req.uop 287*41d8d239Shappy-lx highAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 288*41d8d239Shappy-lx 289*41d8d239Shappy-lx switch (req.uop.fuOpType(1, 0)) { 290*41d8d239Shappy-lx is (LB) { 291*41d8d239Shappy-lx assert(false.B, "lb should not trigger miss align") 292*41d8d239Shappy-lx } 293*41d8d239Shappy-lx 294*41d8d239Shappy-lx is (LH) { 295*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LB 296*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 297*41d8d239Shappy-lx lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 298*41d8d239Shappy-lx lowResultShift := BYTE0 299*41d8d239Shappy-lx lowResultWidth := BYTE1 300*41d8d239Shappy-lx 301*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LB 302*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 1.U 303*41d8d239Shappy-lx highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 304*41d8d239Shappy-lx highResultShift := BYTE0 305*41d8d239Shappy-lx highResultWidth := BYTE1 306*41d8d239Shappy-lx } 307*41d8d239Shappy-lx 308*41d8d239Shappy-lx is (LW) { 309*41d8d239Shappy-lx switch (req.vaddr(1, 0)) { 310*41d8d239Shappy-lx is ("b00".U) { 311*41d8d239Shappy-lx assert(false.B, "should not trigger miss align") 312*41d8d239Shappy-lx } 313*41d8d239Shappy-lx 314*41d8d239Shappy-lx is ("b01".U) { 315*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LW 316*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 1.U 317*41d8d239Shappy-lx lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 318*41d8d239Shappy-lx lowResultShift := BYTE1 319*41d8d239Shappy-lx lowResultWidth := BYTE3 320*41d8d239Shappy-lx 321*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LB 322*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 3.U 323*41d8d239Shappy-lx highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 324*41d8d239Shappy-lx highResultShift := BYTE0 325*41d8d239Shappy-lx highResultWidth := BYTE1 326*41d8d239Shappy-lx } 327*41d8d239Shappy-lx 328*41d8d239Shappy-lx is ("b10".U) { 329*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LH 330*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 331*41d8d239Shappy-lx lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 332*41d8d239Shappy-lx lowResultShift := BYTE0 333*41d8d239Shappy-lx lowResultWidth := BYTE2 334*41d8d239Shappy-lx 335*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LH 336*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 2.U 337*41d8d239Shappy-lx highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 338*41d8d239Shappy-lx highResultShift := BYTE0 339*41d8d239Shappy-lx highResultWidth := BYTE2 340*41d8d239Shappy-lx } 341*41d8d239Shappy-lx 342*41d8d239Shappy-lx is ("b11".U) { 343*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LB 344*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 345*41d8d239Shappy-lx lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 346*41d8d239Shappy-lx lowResultShift := BYTE0 347*41d8d239Shappy-lx lowResultWidth := BYTE1 348*41d8d239Shappy-lx 349*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LW 350*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 1.U 351*41d8d239Shappy-lx highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 352*41d8d239Shappy-lx highResultShift := BYTE0 353*41d8d239Shappy-lx highResultWidth := BYTE3 354*41d8d239Shappy-lx } 355*41d8d239Shappy-lx } 356*41d8d239Shappy-lx } 357*41d8d239Shappy-lx 358*41d8d239Shappy-lx is (LD) { 359*41d8d239Shappy-lx switch (req.vaddr(2, 0)) { 360*41d8d239Shappy-lx is ("b000".U) { 361*41d8d239Shappy-lx assert(false.B, "should not trigger miss align") 362*41d8d239Shappy-lx } 363*41d8d239Shappy-lx 364*41d8d239Shappy-lx is ("b001".U) { 365*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LD 366*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 1.U 367*41d8d239Shappy-lx lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 368*41d8d239Shappy-lx lowResultShift := BYTE1 369*41d8d239Shappy-lx lowResultWidth := BYTE7 370*41d8d239Shappy-lx 371*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LB 372*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 7.U 373*41d8d239Shappy-lx highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 374*41d8d239Shappy-lx highResultShift := BYTE0 375*41d8d239Shappy-lx highResultWidth := BYTE1 376*41d8d239Shappy-lx } 377*41d8d239Shappy-lx 378*41d8d239Shappy-lx is ("b010".U) { 379*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LD 380*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 2.U 381*41d8d239Shappy-lx lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 382*41d8d239Shappy-lx lowResultShift := BYTE2 383*41d8d239Shappy-lx lowResultWidth := BYTE6 384*41d8d239Shappy-lx 385*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LH 386*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 6.U 387*41d8d239Shappy-lx highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 388*41d8d239Shappy-lx highResultShift := BYTE0 389*41d8d239Shappy-lx highResultWidth := BYTE2 390*41d8d239Shappy-lx } 391*41d8d239Shappy-lx 392*41d8d239Shappy-lx is ("b011".U) { 393*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LD 394*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 3.U 395*41d8d239Shappy-lx lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 396*41d8d239Shappy-lx lowResultShift := BYTE3 397*41d8d239Shappy-lx lowResultWidth := BYTE5 398*41d8d239Shappy-lx 399*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LW 400*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 5.U 401*41d8d239Shappy-lx highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 402*41d8d239Shappy-lx highResultShift := BYTE0 403*41d8d239Shappy-lx highResultWidth := BYTE3 404*41d8d239Shappy-lx } 405*41d8d239Shappy-lx 406*41d8d239Shappy-lx is ("b100".U) { 407*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LW 408*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 409*41d8d239Shappy-lx lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 410*41d8d239Shappy-lx lowResultShift := BYTE0 411*41d8d239Shappy-lx lowResultWidth := BYTE4 412*41d8d239Shappy-lx 413*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LW 414*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 4.U 415*41d8d239Shappy-lx highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 416*41d8d239Shappy-lx highResultShift := BYTE0 417*41d8d239Shappy-lx highResultWidth := BYTE4 418*41d8d239Shappy-lx } 419*41d8d239Shappy-lx 420*41d8d239Shappy-lx is ("b101".U) { 421*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LW 422*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr - 1.U 423*41d8d239Shappy-lx lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 424*41d8d239Shappy-lx lowResultShift := BYTE1 425*41d8d239Shappy-lx lowResultWidth := BYTE3 426*41d8d239Shappy-lx 427*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LD 428*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 3.U 429*41d8d239Shappy-lx highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 430*41d8d239Shappy-lx highResultShift := BYTE0 431*41d8d239Shappy-lx highResultWidth := BYTE5 432*41d8d239Shappy-lx } 433*41d8d239Shappy-lx 434*41d8d239Shappy-lx is ("b110".U) { 435*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LH 436*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 437*41d8d239Shappy-lx lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 438*41d8d239Shappy-lx lowResultShift := BYTE0 439*41d8d239Shappy-lx lowResultWidth := BYTE2 440*41d8d239Shappy-lx 441*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LD 442*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 2.U 443*41d8d239Shappy-lx highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 444*41d8d239Shappy-lx highResultShift := BYTE0 445*41d8d239Shappy-lx highResultWidth := BYTE6 446*41d8d239Shappy-lx } 447*41d8d239Shappy-lx 448*41d8d239Shappy-lx is ("b111".U) { 449*41d8d239Shappy-lx lowAddrLoad.uop.fuOpType := LB 450*41d8d239Shappy-lx lowAddrLoad.vaddr := req.vaddr 451*41d8d239Shappy-lx lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 452*41d8d239Shappy-lx lowResultShift := BYTE0 453*41d8d239Shappy-lx lowResultWidth := BYTE1 454*41d8d239Shappy-lx 455*41d8d239Shappy-lx highAddrLoad.uop.fuOpType := LD 456*41d8d239Shappy-lx highAddrLoad.vaddr := req.vaddr + 1.U 457*41d8d239Shappy-lx highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 458*41d8d239Shappy-lx highResultShift := BYTE0 459*41d8d239Shappy-lx highResultWidth := BYTE7 460*41d8d239Shappy-lx } 461*41d8d239Shappy-lx } 462*41d8d239Shappy-lx } 463*41d8d239Shappy-lx } 464*41d8d239Shappy-lx 465*41d8d239Shappy-lx splitLoadReqs(0) := lowAddrLoad 466*41d8d239Shappy-lx splitLoadReqs(1) := highAddrLoad 467*41d8d239Shappy-lx } 468*41d8d239Shappy-lx } 469*41d8d239Shappy-lx 470*41d8d239Shappy-lx io.splitLoadReq.valid := req_valid && (bufferState === s_req) 471*41d8d239Shappy-lx io.splitLoadReq.bits := splitLoadReqs(curPtr) 472*41d8d239Shappy-lx 473*41d8d239Shappy-lx when (io.splitLoadResp.valid) { 474*41d8d239Shappy-lx splitLoadResp(curPtr) := io.splitLoadResp.bits 475*41d8d239Shappy-lx when (isMMIO) { 476*41d8d239Shappy-lx unSentLoads := 0.U 477*41d8d239Shappy-lx splitLoadResp(curPtr).uop.exceptionVec := 0.U.asTypeOf(ExceptionVec()) 478*41d8d239Shappy-lx // delegate to software 479*41d8d239Shappy-lx splitLoadResp(curPtr).uop.exceptionVec(loadAddrMisaligned) := true.B 480*41d8d239Shappy-lx } .elsewhen (hasException) { 481*41d8d239Shappy-lx unSentLoads := 0.U 482*41d8d239Shappy-lx } .elsewhen (!io.splitLoadResp.bits.rep_info.need_rep) { 483*41d8d239Shappy-lx unSentLoads := unSentLoads & ~UIntToOH(curPtr) 484*41d8d239Shappy-lx curPtr := curPtr + 1.U 485*41d8d239Shappy-lx } 486*41d8d239Shappy-lx } 487*41d8d239Shappy-lx 488*41d8d239Shappy-lx val combinedData = RegInit(0.U(XLEN.W)) 489*41d8d239Shappy-lx 490*41d8d239Shappy-lx when (bufferState === s_comb) { 491*41d8d239Shappy-lx when (!cross16BytesBoundary) { 492*41d8d239Shappy-lx val shiftData = LookupTree(aligned16BytesSel, List( 493*41d8d239Shappy-lx "b0000".U -> splitLoadResp(0).data(63, 0), 494*41d8d239Shappy-lx "b0001".U -> splitLoadResp(0).data(71, 8), 495*41d8d239Shappy-lx "b0010".U -> splitLoadResp(0).data(79, 16), 496*41d8d239Shappy-lx "b0011".U -> splitLoadResp(0).data(87, 24), 497*41d8d239Shappy-lx "b0100".U -> splitLoadResp(0).data(95, 32), 498*41d8d239Shappy-lx "b0101".U -> splitLoadResp(0).data(103, 40), 499*41d8d239Shappy-lx "b0110".U -> splitLoadResp(0).data(111, 48), 500*41d8d239Shappy-lx "b0111".U -> splitLoadResp(0).data(119, 56), 501*41d8d239Shappy-lx "b1000".U -> splitLoadResp(0).data(127, 64), 502*41d8d239Shappy-lx "b1001".U -> splitLoadResp(0).data(127, 72), 503*41d8d239Shappy-lx "b1010".U -> splitLoadResp(0).data(127, 80), 504*41d8d239Shappy-lx "b1011".U -> splitLoadResp(0).data(127, 88), 505*41d8d239Shappy-lx "b1100".U -> splitLoadResp(0).data(127, 96), 506*41d8d239Shappy-lx "b1101".U -> splitLoadResp(0).data(127, 104), 507*41d8d239Shappy-lx "b1110".U -> splitLoadResp(0).data(127, 112), 508*41d8d239Shappy-lx "b1111".U -> splitLoadResp(0).data(127, 120) 509*41d8d239Shappy-lx )) 510*41d8d239Shappy-lx val truncateData = LookupTree(req.uop.fuOpType(1, 0), List( 511*41d8d239Shappy-lx LB -> shiftData(7, 0), // lb 512*41d8d239Shappy-lx LH -> shiftData(15, 0), // lh 513*41d8d239Shappy-lx LW -> shiftData(31, 0), // lw 514*41d8d239Shappy-lx LD -> shiftData(63, 0) // ld 515*41d8d239Shappy-lx )) 516*41d8d239Shappy-lx combinedData := rdataHelper(req.uop, truncateData(XLEN - 1, 0)) 517*41d8d239Shappy-lx } .otherwise { 518*41d8d239Shappy-lx val lowAddrResult = getShiftAndTruncateData(lowResultShift, lowResultWidth, splitLoadResp(0).data) 519*41d8d239Shappy-lx .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 520*41d8d239Shappy-lx val highAddrResult = getShiftAndTruncateData(highResultShift, highResultWidth, splitLoadResp(1).data) 521*41d8d239Shappy-lx .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 522*41d8d239Shappy-lx val catResult = Wire(Vec(XLEN / 8, UInt(8.W))) 523*41d8d239Shappy-lx (0 until XLEN / 8) .map { 524*41d8d239Shappy-lx case i => { 525*41d8d239Shappy-lx when (i.U < lowResultWidth) { 526*41d8d239Shappy-lx catResult(i) := lowAddrResult(i) 527*41d8d239Shappy-lx } .otherwise { 528*41d8d239Shappy-lx catResult(i) := highAddrResult(i.U - lowResultWidth) 529*41d8d239Shappy-lx } 530*41d8d239Shappy-lx } 531*41d8d239Shappy-lx } 532*41d8d239Shappy-lx combinedData := rdataHelper(req.uop, (catResult.asUInt)(XLEN - 1, 0)) 533*41d8d239Shappy-lx } 534*41d8d239Shappy-lx } 535*41d8d239Shappy-lx 536*41d8d239Shappy-lx io.writeBack.valid := req_valid && (bufferState === s_wb) 537*41d8d239Shappy-lx io.writeBack.bits.uop := req.uop 538*41d8d239Shappy-lx io.writeBack.bits.uop.exceptionVec := Mux( 539*41d8d239Shappy-lx globalMMIO || globalException, 540*41d8d239Shappy-lx splitLoadResp(curPtr).uop.exceptionVec, 541*41d8d239Shappy-lx 0.U.asTypeOf(ExceptionVec()) // TODO: is this ok? 542*41d8d239Shappy-lx ) 543*41d8d239Shappy-lx io.writeBack.bits.uop.flushPipe := Mux(globalMMIO || globalException, false.B, true.B) 544*41d8d239Shappy-lx io.writeBack.bits.uop.replayInst := false.B 545*41d8d239Shappy-lx io.writeBack.bits.data := combinedData 546*41d8d239Shappy-lx io.writeBack.bits.debug.isMMIO := globalMMIO 547*41d8d239Shappy-lx io.writeBack.bits.debug.isPerfCnt := false.B 548*41d8d239Shappy-lx io.writeBack.bits.debug.paddr := req.paddr 549*41d8d239Shappy-lx io.writeBack.bits.debug.vaddr := req.vaddr 550*41d8d239Shappy-lx 551*41d8d239Shappy-lx val flush = req_valid && req.uop.robIdx.needFlush(io.redirect) 552*41d8d239Shappy-lx 553*41d8d239Shappy-lx when (flush && (bufferState =/= s_idle)) { 554*41d8d239Shappy-lx bufferState := s_idle 555*41d8d239Shappy-lx req_valid := false.B 556*41d8d239Shappy-lx curPtr := 0.U 557*41d8d239Shappy-lx unSentLoads := 0.U 558*41d8d239Shappy-lx globalException := false.B 559*41d8d239Shappy-lx globalMMIO := false.B 560*41d8d239Shappy-lx } 561*41d8d239Shappy-lx 562*41d8d239Shappy-lx // NOTE: spectial case (unaligned load cross page, page fault happens in next page) 563*41d8d239Shappy-lx // if exception happens in the higher page address part, overwrite the loadExceptionBuffer vaddr 564*41d8d239Shappy-lx val overwriteExpBuf = GatedValidRegNext(req_valid && cross16BytesBoundary && globalException && (curPtr === 1.U)) 565*41d8d239Shappy-lx val overwriteAddr = GatedRegNext(splitLoadResp(curPtr).vaddr) 566*41d8d239Shappy-lx 567*41d8d239Shappy-lx io.overwriteExpBuf.valid := overwriteExpBuf 568*41d8d239Shappy-lx io.overwriteExpBuf.vaddr := overwriteAddr 569*41d8d239Shappy-lx 570*41d8d239Shappy-lx // when no exception or mmio, flush loadExceptionBuffer at s_wb 571*41d8d239Shappy-lx val flushLdExpBuff = GatedValidRegNext(req_valid && (bufferState === s_wb) && !(globalMMIO || globalException)) 572*41d8d239Shappy-lx io.flushLdExpBuff := flushLdExpBuff 573*41d8d239Shappy-lx 574*41d8d239Shappy-lx XSPerfAccumulate("alloc", RegNext(!req_valid) && req_valid) 575*41d8d239Shappy-lx XSPerfAccumulate("flush", flush) 576*41d8d239Shappy-lx XSPerfAccumulate("flush_idle", flush && (bufferState === s_idle)) 577*41d8d239Shappy-lx XSPerfAccumulate("flush_non_idle", flush && (bufferState =/= s_idle)) 578*41d8d239Shappy-lx}