1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.fu.FuConfig._ 26import xiangshan.backend.fu.fpu.FPU 27import xiangshan.backend.rob.RobLsqIO 28import xiangshan.cache.mmu.HasTlbConst 29import xiangshan.cache._ 30import xiangshan.frontend.FtqPtr 31import xiangshan.ExceptionNO._ 32import xiangshan.cache.wpu.ReplayCarry 33import xiangshan.backend.rob.RobPtr 34import xiangshan.backend.Bundles.{MemExuOutput, DynInst} 35import xiangshan.backend.fu.FuConfig.LduCfg 36 37class LoadMisalignBuffer(implicit p: Parameters) extends XSModule 38 with HasCircularQueuePtrHelper 39 with HasLoadHelper 40 with HasTlbConst 41{ 42 private val enqPortNum = LoadPipelineWidth 43 private val maxSplitNum = 2 44 45 require(maxSplitNum == 2) 46 47 private val LB = "b00".U(2.W) 48 private val LH = "b01".U(2.W) 49 private val LW = "b10".U(2.W) 50 private val LD = "b11".U(2.W) 51 52 // encode of how many bytes to shift or truncate 53 private val BYTE0 = "b000".U(3.W) 54 private val BYTE1 = "b001".U(3.W) 55 private val BYTE2 = "b010".U(3.W) 56 private val BYTE3 = "b011".U(3.W) 57 private val BYTE4 = "b100".U(3.W) 58 private val BYTE5 = "b101".U(3.W) 59 private val BYTE6 = "b110".U(3.W) 60 private val BYTE7 = "b111".U(3.W) 61 62 def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List( 63 LB -> 0x1.U, // lb 64 LH -> 0x3.U, // lh 65 LW -> 0xf.U, // lw 66 LD -> 0xff.U // ld 67 )) 68 69 def getShiftAndTruncateData(shiftEncode: UInt, truncateEncode: UInt, data: UInt) = { 70 val shiftData = LookupTree(shiftEncode, List( 71 BYTE0 -> data(63, 0), 72 BYTE1 -> data(63, 8), 73 BYTE2 -> data(63, 16), 74 BYTE3 -> data(63, 24), 75 BYTE4 -> data(63, 32), 76 BYTE5 -> data(63, 40), 77 BYTE6 -> data(63, 48), 78 BYTE7 -> data(63, 56) 79 )) 80 val truncateData = LookupTree(truncateEncode, List( 81 BYTE0 -> 0.U(XLEN.W), // can not truncate with 0 byte width 82 BYTE1 -> shiftData(7, 0), 83 BYTE2 -> shiftData(15, 0), 84 BYTE3 -> shiftData(23, 0), 85 BYTE4 -> shiftData(31, 0), 86 BYTE5 -> shiftData(39, 0), 87 BYTE6 -> shiftData(47, 0), 88 BYTE7 -> shiftData(55, 0) 89 )) 90 truncateData(XLEN - 1, 0) 91 } 92 93 def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 94 assert(valid.length == bits.length) 95 if (valid.length == 0 || valid.length == 1) { 96 (valid, bits) 97 } else if (valid.length == 2) { 98 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 99 for (i <- res.indices) { 100 res(i).valid := valid(i) 101 res(i).bits := bits(i) 102 } 103 val oldest = Mux(valid(0) && valid(1), 104 Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) || 105 (isNotBefore(bits(0).uop.robIdx, bits(1).uop.robIdx) && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)), 106 Mux(valid(0) && !valid(1), res(0), res(1))) 107 (Seq(oldest.valid), Seq(oldest.bits)) 108 } else { 109 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 110 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 111 selectOldest(left._1 ++ right._1, left._2 ++ right._2) 112 } 113 } 114 115 val io = IO(new Bundle() { 116 val redirect = Flipped(Valid(new Redirect)) 117 val req = Vec(enqPortNum, Flipped(Valid(new LqWriteBundle))) 118 val rob = Flipped(new RobLsqIO) 119 val splitLoadReq = Decoupled(new LsPipelineBundle) 120 val splitLoadResp = Flipped(Valid(new LqWriteBundle)) 121 val writeBack = Decoupled(new MemExuOutput) 122 val overwriteExpBuf = Output(new XSBundle { 123 val valid = Bool() 124 val vaddr = UInt(XLEN.W) 125 val isHyper = Bool() 126 val gpaddr = UInt(XLEN.W) 127 val isForVSnonLeafPTE = Bool() 128 }) 129 val flushLdExpBuff = Output(Bool()) 130 }) 131 132 io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool())) 133 io.rob.uop := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst)) 134 135 val req_valid = RegInit(false.B) 136 val req = Reg(new LqWriteBundle) 137 138 // enqueue 139 // s1: 140 val s1_req = VecInit(io.req.map(_.bits)) 141 val s1_valid = VecInit(io.req.map(x => x.valid)) 142 143 // s2: delay 1 cycle 144 val s2_req = RegNext(s1_req) 145 val s2_valid = (0 until enqPortNum).map(i => 146 RegNext(s1_valid(i)) && 147 !s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) && 148 !s2_req(i).uop.robIdx.needFlush(io.redirect) 149 ) 150 val s2_miss_aligned = s2_req.map(x => 151 x.uop.exceptionVec(loadAddrMisaligned) && !x.uop.exceptionVec(breakPoint) && !TriggerAction.isDmode(x.uop.trigger) 152 ) 153 154 val s2_enqueue = Wire(Vec(enqPortNum, Bool())) 155 for (w <- 0 until enqPortNum) { 156 s2_enqueue(w) := s2_valid(w) && s2_miss_aligned(w) 157 } 158 159 when (req_valid && req.uop.robIdx.needFlush(io.redirect)) { 160 req_valid := s2_enqueue.asUInt.orR 161 } .elsewhen (s2_enqueue.asUInt.orR) { 162 req_valid := req_valid || true.B 163 } 164 165 val reqSel = selectOldest(s2_enqueue, s2_req) 166 167 when (req_valid) { 168 req := Mux( 169 reqSel._1(0) && (isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx) || (isNotBefore(req.uop.robIdx, reqSel._2(0).uop.robIdx) && req.uop.uopIdx > reqSel._2(0).uop.uopIdx)), 170 reqSel._2(0), 171 req) 172 } .elsewhen (s2_enqueue.asUInt.orR) { 173 req := reqSel._2(0) 174 } 175 176 val robMatch = req_valid && io.rob.pendingld && (io.rob.pendingPtr === req.uop.robIdx) 177 178 // buffer control: 179 // - split miss-aligned load into aligned loads 180 // - send split load to ldu and get result from ldu 181 // - merge them and write back to rob 182 val s_idle :: s_split :: s_req :: s_resp :: s_comb :: s_wb :: s_wait :: Nil = Enum(7) 183 val bufferState = RegInit(s_idle) 184 val splitLoadReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle)))) 185 val splitLoadResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LqWriteBundle)))) 186 val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec())) 187 val unSentLoads = RegInit(0.U(maxSplitNum.W)) 188 val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W)) 189 190 // if there is exception or mmio in split load 191 val globalException = RegInit(false.B) 192 val globalMMIO = RegInit(false.B) 193 194 val hasException = ExceptionNO.selectByFu(io.splitLoadResp.bits.uop.exceptionVec, LduCfg).asUInt.orR 195 val isMMIO = io.splitLoadResp.bits.mmio 196 197 switch(bufferState) { 198 is (s_idle) { 199 when (robMatch) { 200 bufferState := s_split 201 } 202 } 203 204 is (s_split) { 205 bufferState := s_req 206 } 207 208 is (s_req) { 209 when (io.splitLoadReq.fire) { 210 bufferState := s_resp 211 } 212 } 213 214 is (s_resp) { 215 when (io.splitLoadResp.valid) { 216 val clearOh = UIntToOH(curPtr) 217 when (hasException || isMMIO) { 218 // commit directly when exception ocurs 219 // if any split load reaches mmio space, delegate to software loadAddrMisaligned exception 220 bufferState := s_wb 221 globalException := hasException 222 globalMMIO := isMMIO 223 } .elsewhen(io.splitLoadResp.bits.rep_info.need_rep || (unSentLoads & ~clearOh).orR) { 224 // need replay or still has unsent requests 225 bufferState := s_req 226 } .otherwise { 227 // merge the split load results 228 bufferState := s_comb 229 } 230 } 231 } 232 233 is (s_comb) { 234 bufferState := s_wb 235 } 236 237 is (s_wb) { 238 when(io.writeBack.fire) { 239 bufferState := s_wait 240 } 241 } 242 243 is (s_wait) { 244 when(io.rob.lcommit =/= 0.U || req.uop.robIdx.needFlush(io.redirect)) { 245 // rob commits the unaligned load or handled the exception, reset all state 246 bufferState := s_idle 247 req_valid := false.B 248 curPtr := 0.U 249 unSentLoads := 0.U 250 globalException := false.B 251 globalMMIO := false.B 252 } 253 } 254 } 255 256 val highAddress = LookupTree(req.uop.fuOpType(1, 0), List( 257 LB -> 0.U, 258 LH -> 1.U, 259 LW -> 3.U, 260 LD -> 7.U 261 )) + req.vaddr(4, 0) 262 // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region 263 val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4)) 264 val aligned16BytesAddr = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U) 265 val aligned16BytesSel = req.vaddr(3, 0) 266 267 // meta of 128 bit load 268 val new128Load = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 269 // meta of split loads 270 val lowAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 271 val highAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 272 val lowResultShift = RegInit(0.U(3.W)) // how many bytes should we shift right when got result 273 val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from result 274 val highResultShift = RegInit(0.U(3.W)) 275 val highResultWidth = RegInit(0.U(3.W)) 276 277 when (bufferState === s_split) { 278 when (!cross16BytesBoundary) { 279 // change this unaligned load into a 128 bits load 280 unSentLoads := 1.U 281 curPtr := 0.U 282 new128Load.vaddr := aligned16BytesAddr 283 new128Load.fullva := req.fullva 284 // new128Load.mask := (getMask(req.uop.fuOpType(1, 0)) << aligned16BytesSel).asUInt 285 new128Load.mask := 0xffff.U 286 new128Load.uop := req.uop 287 new128Load.uop.exceptionVec(loadAddrMisaligned) := false.B 288 new128Load.is128bit := true.B 289 splitLoadReqs(0) := new128Load 290 } .otherwise { 291 // split this unaligned load into `maxSplitNum` aligned loads 292 unSentLoads := Fill(maxSplitNum, 1.U(1.W)) 293 curPtr := 0.U 294 lowAddrLoad.uop := req.uop 295 lowAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 296 lowAddrLoad.fullva := req.fullva 297 highAddrLoad.uop := req.uop 298 highAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 299 highAddrLoad.fullva := req.fullva 300 301 switch (req.uop.fuOpType(1, 0)) { 302 is (LB) { 303 assert(false.B, "lb should not trigger miss align") 304 } 305 306 is (LH) { 307 lowAddrLoad.uop.fuOpType := LB 308 lowAddrLoad.vaddr := req.vaddr 309 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 310 lowResultShift := BYTE0 311 lowResultWidth := BYTE1 312 313 highAddrLoad.uop.fuOpType := LB 314 highAddrLoad.vaddr := req.vaddr + 1.U 315 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 316 highResultShift := BYTE0 317 highResultWidth := BYTE1 318 } 319 320 is (LW) { 321 switch (req.vaddr(1, 0)) { 322 is ("b00".U) { 323 assert(false.B, "should not trigger miss align") 324 } 325 326 is ("b01".U) { 327 lowAddrLoad.uop.fuOpType := LW 328 lowAddrLoad.vaddr := req.vaddr - 1.U 329 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 330 lowResultShift := BYTE1 331 lowResultWidth := BYTE3 332 333 highAddrLoad.uop.fuOpType := LB 334 highAddrLoad.vaddr := req.vaddr + 3.U 335 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 336 highResultShift := BYTE0 337 highResultWidth := BYTE1 338 } 339 340 is ("b10".U) { 341 lowAddrLoad.uop.fuOpType := LH 342 lowAddrLoad.vaddr := req.vaddr 343 lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 344 lowResultShift := BYTE0 345 lowResultWidth := BYTE2 346 347 highAddrLoad.uop.fuOpType := LH 348 highAddrLoad.vaddr := req.vaddr + 2.U 349 highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 350 highResultShift := BYTE0 351 highResultWidth := BYTE2 352 } 353 354 is ("b11".U) { 355 lowAddrLoad.uop.fuOpType := LB 356 lowAddrLoad.vaddr := req.vaddr 357 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 358 lowResultShift := BYTE0 359 lowResultWidth := BYTE1 360 361 highAddrLoad.uop.fuOpType := LW 362 highAddrLoad.vaddr := req.vaddr + 1.U 363 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 364 highResultShift := BYTE0 365 highResultWidth := BYTE3 366 } 367 } 368 } 369 370 is (LD) { 371 switch (req.vaddr(2, 0)) { 372 is ("b000".U) { 373 assert(false.B, "should not trigger miss align") 374 } 375 376 is ("b001".U) { 377 lowAddrLoad.uop.fuOpType := LD 378 lowAddrLoad.vaddr := req.vaddr - 1.U 379 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 380 lowResultShift := BYTE1 381 lowResultWidth := BYTE7 382 383 highAddrLoad.uop.fuOpType := LB 384 highAddrLoad.vaddr := req.vaddr + 7.U 385 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 386 highResultShift := BYTE0 387 highResultWidth := BYTE1 388 } 389 390 is ("b010".U) { 391 lowAddrLoad.uop.fuOpType := LD 392 lowAddrLoad.vaddr := req.vaddr - 2.U 393 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 394 lowResultShift := BYTE2 395 lowResultWidth := BYTE6 396 397 highAddrLoad.uop.fuOpType := LH 398 highAddrLoad.vaddr := req.vaddr + 6.U 399 highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 400 highResultShift := BYTE0 401 highResultWidth := BYTE2 402 } 403 404 is ("b011".U) { 405 lowAddrLoad.uop.fuOpType := LD 406 lowAddrLoad.vaddr := req.vaddr - 3.U 407 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 408 lowResultShift := BYTE3 409 lowResultWidth := BYTE5 410 411 highAddrLoad.uop.fuOpType := LW 412 highAddrLoad.vaddr := req.vaddr + 5.U 413 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 414 highResultShift := BYTE0 415 highResultWidth := BYTE3 416 } 417 418 is ("b100".U) { 419 lowAddrLoad.uop.fuOpType := LW 420 lowAddrLoad.vaddr := req.vaddr 421 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 422 lowResultShift := BYTE0 423 lowResultWidth := BYTE4 424 425 highAddrLoad.uop.fuOpType := LW 426 highAddrLoad.vaddr := req.vaddr + 4.U 427 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 428 highResultShift := BYTE0 429 highResultWidth := BYTE4 430 } 431 432 is ("b101".U) { 433 lowAddrLoad.uop.fuOpType := LW 434 lowAddrLoad.vaddr := req.vaddr - 1.U 435 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 436 lowResultShift := BYTE1 437 lowResultWidth := BYTE3 438 439 highAddrLoad.uop.fuOpType := LD 440 highAddrLoad.vaddr := req.vaddr + 3.U 441 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 442 highResultShift := BYTE0 443 highResultWidth := BYTE5 444 } 445 446 is ("b110".U) { 447 lowAddrLoad.uop.fuOpType := LH 448 lowAddrLoad.vaddr := req.vaddr 449 lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 450 lowResultShift := BYTE0 451 lowResultWidth := BYTE2 452 453 highAddrLoad.uop.fuOpType := LD 454 highAddrLoad.vaddr := req.vaddr + 2.U 455 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 456 highResultShift := BYTE0 457 highResultWidth := BYTE6 458 } 459 460 is ("b111".U) { 461 lowAddrLoad.uop.fuOpType := LB 462 lowAddrLoad.vaddr := req.vaddr 463 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 464 lowResultShift := BYTE0 465 lowResultWidth := BYTE1 466 467 highAddrLoad.uop.fuOpType := LD 468 highAddrLoad.vaddr := req.vaddr + 1.U 469 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 470 highResultShift := BYTE0 471 highResultWidth := BYTE7 472 } 473 } 474 } 475 } 476 477 splitLoadReqs(0) := lowAddrLoad 478 splitLoadReqs(1) := highAddrLoad 479 } 480 exceptionVec := 0.U.asTypeOf(exceptionVec.cloneType) 481 } 482 483 io.splitLoadReq.valid := req_valid && (bufferState === s_req) 484 io.splitLoadReq.bits := splitLoadReqs(curPtr) 485 // Restore the information of H extension load 486 // bit encoding: | hlv 1 | hlvx 1 | is unsigned(1bit) | size(2bit) | 487 val reqIsHlv = LSUOpType.isHlv(req.uop.fuOpType) 488 val reqIsHlvx = LSUOpType.isHlvx(req.uop.fuOpType) 489 io.splitLoadReq.bits.uop.fuOpType := Cat(reqIsHlv, reqIsHlvx, 0.U(1.W), splitLoadReqs(curPtr).uop.fuOpType(1, 0)) 490 491 when (io.splitLoadResp.valid) { 492 val resp = io.splitLoadResp.bits 493 splitLoadResp(curPtr) := io.splitLoadResp.bits 494 when (isMMIO) { 495 unSentLoads := 0.U 496 exceptionVec := 0.U.asTypeOf(ExceptionVec()) 497 // delegate to software 498 exceptionVec(loadAddrMisaligned) := true.B 499 } .elsewhen (hasException) { 500 unSentLoads := 0.U 501 LduCfg.exceptionOut.map(no => exceptionVec(no) := exceptionVec(no) || resp.uop.exceptionVec(no)) 502 } .elsewhen (!io.splitLoadResp.bits.rep_info.need_rep) { 503 unSentLoads := unSentLoads & ~UIntToOH(curPtr) 504 curPtr := curPtr + 1.U 505 exceptionVec := 0.U.asTypeOf(ExceptionVec()) 506 } 507 } 508 509 val combinedData = RegInit(0.U(XLEN.W)) 510 511 when (bufferState === s_comb) { 512 when (!cross16BytesBoundary) { 513 val shiftData = LookupTree(aligned16BytesSel, List( 514 "b0000".U -> splitLoadResp(0).data(63, 0), 515 "b0001".U -> splitLoadResp(0).data(71, 8), 516 "b0010".U -> splitLoadResp(0).data(79, 16), 517 "b0011".U -> splitLoadResp(0).data(87, 24), 518 "b0100".U -> splitLoadResp(0).data(95, 32), 519 "b0101".U -> splitLoadResp(0).data(103, 40), 520 "b0110".U -> splitLoadResp(0).data(111, 48), 521 "b0111".U -> splitLoadResp(0).data(119, 56), 522 "b1000".U -> splitLoadResp(0).data(127, 64), 523 "b1001".U -> splitLoadResp(0).data(127, 72), 524 "b1010".U -> splitLoadResp(0).data(127, 80), 525 "b1011".U -> splitLoadResp(0).data(127, 88), 526 "b1100".U -> splitLoadResp(0).data(127, 96), 527 "b1101".U -> splitLoadResp(0).data(127, 104), 528 "b1110".U -> splitLoadResp(0).data(127, 112), 529 "b1111".U -> splitLoadResp(0).data(127, 120) 530 )) 531 val truncateData = LookupTree(req.uop.fuOpType(1, 0), List( 532 LB -> shiftData(7, 0), // lb 533 LH -> shiftData(15, 0), // lh 534 LW -> shiftData(31, 0), // lw 535 LD -> shiftData(63, 0) // ld 536 )) 537 combinedData := rdataHelper(req.uop, truncateData(XLEN - 1, 0)) 538 } .otherwise { 539 val lowAddrResult = getShiftAndTruncateData(lowResultShift, lowResultWidth, splitLoadResp(0).data) 540 .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 541 val highAddrResult = getShiftAndTruncateData(highResultShift, highResultWidth, splitLoadResp(1).data) 542 .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 543 val catResult = Wire(Vec(XLEN / 8, UInt(8.W))) 544 (0 until XLEN / 8) .map { 545 case i => { 546 when (i.U < lowResultWidth) { 547 catResult(i) := lowAddrResult(i) 548 } .otherwise { 549 catResult(i) := highAddrResult(i.U - lowResultWidth) 550 } 551 } 552 } 553 combinedData := rdataHelper(req.uop, (catResult.asUInt)(XLEN - 1, 0)) 554 } 555 } 556 557 io.writeBack.valid := req_valid && (bufferState === s_wb) 558 io.writeBack.bits.uop := req.uop 559 io.writeBack.bits.uop.exceptionVec := DontCare 560 LduCfg.exceptionOut.map(no => io.writeBack.bits.uop.exceptionVec(no) := (globalMMIO || globalException) && exceptionVec(no)) 561 io.writeBack.bits.uop.flushPipe := Mux(globalMMIO || globalException, false.B, true.B) 562 io.writeBack.bits.uop.replayInst := false.B 563 io.writeBack.bits.data := combinedData 564 io.writeBack.bits.isFromLoadUnit := DontCare 565 io.writeBack.bits.debug.isMMIO := globalMMIO 566 io.writeBack.bits.debug.isPerfCnt := false.B 567 io.writeBack.bits.debug.paddr := req.paddr 568 io.writeBack.bits.debug.vaddr := req.vaddr 569 570 val flush = req_valid && req.uop.robIdx.needFlush(io.redirect) 571 572 when (flush && (bufferState =/= s_idle)) { 573 bufferState := s_idle 574 req_valid := false.B 575 curPtr := 0.U 576 unSentLoads := 0.U 577 globalException := false.B 578 globalMMIO := false.B 579 } 580 581 // NOTE: spectial case (unaligned load cross page, page fault happens in next page) 582 // if exception happens in the higher page address part, overwrite the loadExceptionBuffer vaddr 583 val shouldOverwrite = req_valid && globalException 584 val overwriteExpBuf = GatedValidRegNext(shouldOverwrite) 585 val overwriteVaddr = RegEnable( 586 Mux( 587 cross16BytesBoundary && (curPtr === 1.U), 588 splitLoadResp(curPtr).vaddr, 589 splitLoadResp(curPtr).fullva), 590 shouldOverwrite) 591 val overwriteGpaddr = RegEnable(splitLoadResp(curPtr).gpaddr, shouldOverwrite) 592 val overwriteIsHyper = RegEnable(splitLoadResp(curPtr).isHyper, shouldOverwrite) 593 val overwriteIsForVSnonLeafPTE = RegEnable(splitLoadResp(curPtr).isForVSnonLeafPTE, shouldOverwrite) 594 595 io.overwriteExpBuf.valid := overwriteExpBuf 596 io.overwriteExpBuf.vaddr := overwriteVaddr 597 io.overwriteExpBuf.isHyper := overwriteIsHyper 598 io.overwriteExpBuf.gpaddr := overwriteGpaddr 599 io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE 600 601 // when no exception or mmio, flush loadExceptionBuffer at s_wb 602 val flushLdExpBuff = GatedValidRegNext(req_valid && (bufferState === s_wb) && !(globalMMIO || globalException)) 603 io.flushLdExpBuff := flushLdExpBuff 604 605 XSPerfAccumulate("alloc", RegNext(!req_valid) && req_valid) 606 XSPerfAccumulate("flush", flush) 607 XSPerfAccumulate("flush_idle", flush && (bufferState === s_idle)) 608 XSPerfAccumulate("flush_non_idle", flush && (bufferState =/= s_idle)) 609}