1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.fu.FuConfig._ 26import xiangshan.backend.fu.fpu.FPU 27import xiangshan.backend.rob.RobLsqIO 28import xiangshan.cache._ 29import xiangshan.frontend.FtqPtr 30import xiangshan.ExceptionNO._ 31import xiangshan.cache.wpu.ReplayCarry 32import xiangshan.backend.rob.RobPtr 33import xiangshan.backend.Bundles.{MemExuOutput, DynInst} 34 35class LoadMisalignBuffer(implicit p: Parameters) extends XSModule 36 with HasCircularQueuePtrHelper 37 with HasLoadHelper 38{ 39 private val enqPortNum = LoadPipelineWidth 40 private val maxSplitNum = 2 41 42 require(maxSplitNum == 2) 43 44 private val LB = "b00".U(2.W) 45 private val LH = "b01".U(2.W) 46 private val LW = "b10".U(2.W) 47 private val LD = "b11".U(2.W) 48 49 // encode of how many bytes to shift or truncate 50 private val BYTE0 = "b000".U(3.W) 51 private val BYTE1 = "b001".U(3.W) 52 private val BYTE2 = "b010".U(3.W) 53 private val BYTE3 = "b011".U(3.W) 54 private val BYTE4 = "b100".U(3.W) 55 private val BYTE5 = "b101".U(3.W) 56 private val BYTE6 = "b110".U(3.W) 57 private val BYTE7 = "b111".U(3.W) 58 59 def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List( 60 LB -> 0x1.U, // lb 61 LH -> 0x3.U, // lh 62 LW -> 0xf.U, // lw 63 LD -> 0xff.U // ld 64 )) 65 66 def getShiftAndTruncateData(shiftEncode: UInt, truncateEncode: UInt, data: UInt) = { 67 val shiftData = LookupTree(shiftEncode, List( 68 BYTE0 -> data(63, 0), 69 BYTE1 -> data(63, 8), 70 BYTE2 -> data(63, 16), 71 BYTE3 -> data(63, 24), 72 BYTE4 -> data(63, 32), 73 BYTE5 -> data(63, 40), 74 BYTE6 -> data(63, 48), 75 BYTE7 -> data(63, 56) 76 )) 77 val truncateData = LookupTree(truncateEncode, List( 78 BYTE0 -> 0.U(XLEN.W), // can not truncate with 0 byte width 79 BYTE1 -> shiftData(7, 0), 80 BYTE2 -> shiftData(15, 0), 81 BYTE3 -> shiftData(23, 0), 82 BYTE4 -> shiftData(31, 0), 83 BYTE5 -> shiftData(39, 0), 84 BYTE6 -> shiftData(47, 0), 85 BYTE7 -> shiftData(55, 0) 86 )) 87 truncateData(XLEN - 1, 0) 88 } 89 90 def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 91 assert(valid.length == bits.length) 92 if (valid.length == 0 || valid.length == 1) { 93 (valid, bits) 94 } else if (valid.length == 2) { 95 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 96 for (i <- res.indices) { 97 res(i).valid := valid(i) 98 res(i).bits := bits(i) 99 } 100 val oldest = Mux(valid(0) && valid(1), 101 Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) || 102 (isNotBefore(bits(0).uop.robIdx, bits(1).uop.robIdx) && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)), 103 Mux(valid(0) && !valid(1), res(0), res(1))) 104 (Seq(oldest.valid), Seq(oldest.bits)) 105 } else { 106 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 107 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 108 selectOldest(left._1 ++ right._1, left._2 ++ right._2) 109 } 110 } 111 112 val io = IO(new Bundle() { 113 val redirect = Flipped(Valid(new Redirect)) 114 val req = Vec(enqPortNum, Flipped(Valid(new LqWriteBundle))) 115 val rob = Flipped(new RobLsqIO) 116 val splitLoadReq = Decoupled(new LsPipelineBundle) 117 val splitLoadResp = Flipped(Valid(new LqWriteBundle)) 118 val writeBack = Decoupled(new MemExuOutput) 119 val overwriteExpBuf = Output(new XSBundle { 120 val valid = Bool() 121 val vaddr = UInt(XLEN.W) 122 val isHyper = Bool() 123 val gpaddr = UInt(XLEN.W) 124 val isForVSnonLeafPTE = Bool() 125 }) 126 val flushLdExpBuff = Output(Bool()) 127 }) 128 129 io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool())) 130 io.rob.uop := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst)) 131 132 val req_valid = RegInit(false.B) 133 val req = Reg(new LqWriteBundle) 134 135 // enqueue 136 // s1: 137 val s1_req = VecInit(io.req.map(_.bits)) 138 val s1_valid = VecInit(io.req.map(x => x.valid)) 139 140 // s2: delay 1 cycle 141 val s2_req = RegNext(s1_req) 142 val s2_valid = (0 until enqPortNum).map(i => 143 RegNext(s1_valid(i)) && 144 !s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) && 145 !s2_req(i).uop.robIdx.needFlush(io.redirect) 146 ) 147 val s2_miss_aligned = s2_req.map(x => 148 x.uop.exceptionVec(loadAddrMisaligned) && !x.uop.exceptionVec(breakPoint) && !TriggerAction.isDmode(x.uop.trigger) 149 ) 150 151 val s2_enqueue = Wire(Vec(enqPortNum, Bool())) 152 for (w <- 0 until enqPortNum) { 153 s2_enqueue(w) := s2_valid(w) && s2_miss_aligned(w) 154 } 155 156 when (req_valid && req.uop.robIdx.needFlush(io.redirect)) { 157 req_valid := s2_enqueue.asUInt.orR 158 } .elsewhen (s2_enqueue.asUInt.orR) { 159 req_valid := req_valid || true.B 160 } 161 162 val reqSel = selectOldest(s2_enqueue, s2_req) 163 164 when (req_valid) { 165 req := Mux( 166 reqSel._1(0) && (isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx) || (isNotBefore(req.uop.robIdx, reqSel._2(0).uop.robIdx) && req.uop.uopIdx > reqSel._2(0).uop.uopIdx)), 167 reqSel._2(0), 168 req) 169 } .elsewhen (s2_enqueue.asUInt.orR) { 170 req := reqSel._2(0) 171 } 172 173 val robMatch = req_valid && io.rob.pendingld && (io.rob.pendingPtr === req.uop.robIdx) 174 175 // buffer control: 176 // - split miss-aligned load into aligned loads 177 // - send split load to ldu and get result from ldu 178 // - merge them and write back to rob 179 val s_idle :: s_split :: s_req :: s_resp :: s_comb :: s_wb :: s_wait :: Nil = Enum(7) 180 val bufferState = RegInit(s_idle) 181 val splitLoadReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle)))) 182 val splitLoadResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LqWriteBundle)))) 183 val unSentLoads = RegInit(0.U(maxSplitNum.W)) 184 val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W)) 185 186 // if there is exception or mmio in split load 187 val globalException = RegInit(false.B) 188 val globalMMIO = RegInit(false.B) 189 190 val hasException = ExceptionNO.selectByFu(io.splitLoadResp.bits.uop.exceptionVec, LduCfg).asUInt.orR 191 val isMMIO = io.splitLoadResp.bits.mmio 192 193 switch(bufferState) { 194 is (s_idle) { 195 when (robMatch) { 196 bufferState := s_split 197 } 198 } 199 200 is (s_split) { 201 bufferState := s_req 202 } 203 204 is (s_req) { 205 when (io.splitLoadReq.fire) { 206 bufferState := s_resp 207 } 208 } 209 210 is (s_resp) { 211 when (io.splitLoadResp.valid) { 212 val clearOh = UIntToOH(curPtr) 213 when (hasException || isMMIO) { 214 // commit directly when exception ocurs 215 // if any split load reaches mmio space, delegate to software loadAddrMisaligned exception 216 bufferState := s_wb 217 globalException := hasException 218 globalMMIO := isMMIO 219 } .elsewhen(io.splitLoadResp.bits.rep_info.need_rep || (unSentLoads & ~clearOh).orR) { 220 // need replay or still has unsent requests 221 bufferState := s_req 222 } .otherwise { 223 // merge the split load results 224 bufferState := s_comb 225 } 226 } 227 } 228 229 is (s_comb) { 230 bufferState := s_wb 231 } 232 233 is (s_wb) { 234 when(io.writeBack.fire) { 235 bufferState := s_wait 236 } 237 } 238 239 is (s_wait) { 240 when(io.rob.lcommit =/= 0.U || req.uop.robIdx.needFlush(io.redirect)) { 241 // rob commits the unaligned load or handled the exception, reset all state 242 bufferState := s_idle 243 req_valid := false.B 244 curPtr := 0.U 245 unSentLoads := 0.U 246 globalException := false.B 247 globalMMIO := false.B 248 } 249 } 250 } 251 252 val highAddress = LookupTree(req.uop.fuOpType(1, 0), List( 253 LB -> 0.U, 254 LH -> 1.U, 255 LW -> 3.U, 256 LD -> 7.U 257 )) + req.vaddr(4, 0) 258 // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region 259 val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4)) 260 val aligned16BytesAddr = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U) 261 val aligned16BytesSel = req.vaddr(3, 0) 262 263 // meta of 128 bit load 264 val new128Load = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 265 // meta of split loads 266 val lowAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 267 val highAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 268 val lowResultShift = RegInit(0.U(3.W)) // how many bytes should we shift right when got result 269 val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from result 270 val highResultShift = RegInit(0.U(3.W)) 271 val highResultWidth = RegInit(0.U(3.W)) 272 273 when (bufferState === s_split) { 274 when (!cross16BytesBoundary) { 275 // change this unaligned load into a 128 bits load 276 unSentLoads := 1.U 277 curPtr := 0.U 278 new128Load.vaddr := aligned16BytesAddr 279 new128Load.fullva := req.fullva 280 // new128Load.mask := (getMask(req.uop.fuOpType(1, 0)) << aligned16BytesSel).asUInt 281 new128Load.mask := 0xffff.U 282 new128Load.uop := req.uop 283 new128Load.uop.exceptionVec(loadAddrMisaligned) := false.B 284 new128Load.is128bit := true.B 285 splitLoadReqs(0) := new128Load 286 } .otherwise { 287 // split this unaligned load into `maxSplitNum` aligned loads 288 unSentLoads := Fill(maxSplitNum, 1.U(1.W)) 289 curPtr := 0.U 290 lowAddrLoad.uop := req.uop 291 lowAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 292 lowAddrLoad.fullva := req.fullva 293 highAddrLoad.uop := req.uop 294 highAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 295 highAddrLoad.fullva := req.fullva 296 297 switch (req.uop.fuOpType(1, 0)) { 298 is (LB) { 299 assert(false.B, "lb should not trigger miss align") 300 } 301 302 is (LH) { 303 lowAddrLoad.uop.fuOpType := LB 304 lowAddrLoad.vaddr := req.vaddr 305 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 306 lowResultShift := BYTE0 307 lowResultWidth := BYTE1 308 309 highAddrLoad.uop.fuOpType := LB 310 highAddrLoad.vaddr := req.vaddr + 1.U 311 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 312 highResultShift := BYTE0 313 highResultWidth := BYTE1 314 } 315 316 is (LW) { 317 switch (req.vaddr(1, 0)) { 318 is ("b00".U) { 319 assert(false.B, "should not trigger miss align") 320 } 321 322 is ("b01".U) { 323 lowAddrLoad.uop.fuOpType := LW 324 lowAddrLoad.vaddr := req.vaddr - 1.U 325 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 326 lowResultShift := BYTE1 327 lowResultWidth := BYTE3 328 329 highAddrLoad.uop.fuOpType := LB 330 highAddrLoad.vaddr := req.vaddr + 3.U 331 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 332 highResultShift := BYTE0 333 highResultWidth := BYTE1 334 } 335 336 is ("b10".U) { 337 lowAddrLoad.uop.fuOpType := LH 338 lowAddrLoad.vaddr := req.vaddr 339 lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 340 lowResultShift := BYTE0 341 lowResultWidth := BYTE2 342 343 highAddrLoad.uop.fuOpType := LH 344 highAddrLoad.vaddr := req.vaddr + 2.U 345 highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 346 highResultShift := BYTE0 347 highResultWidth := BYTE2 348 } 349 350 is ("b11".U) { 351 lowAddrLoad.uop.fuOpType := LB 352 lowAddrLoad.vaddr := req.vaddr 353 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 354 lowResultShift := BYTE0 355 lowResultWidth := BYTE1 356 357 highAddrLoad.uop.fuOpType := LW 358 highAddrLoad.vaddr := req.vaddr + 1.U 359 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 360 highResultShift := BYTE0 361 highResultWidth := BYTE3 362 } 363 } 364 } 365 366 is (LD) { 367 switch (req.vaddr(2, 0)) { 368 is ("b000".U) { 369 assert(false.B, "should not trigger miss align") 370 } 371 372 is ("b001".U) { 373 lowAddrLoad.uop.fuOpType := LD 374 lowAddrLoad.vaddr := req.vaddr - 1.U 375 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 376 lowResultShift := BYTE1 377 lowResultWidth := BYTE7 378 379 highAddrLoad.uop.fuOpType := LB 380 highAddrLoad.vaddr := req.vaddr + 7.U 381 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 382 highResultShift := BYTE0 383 highResultWidth := BYTE1 384 } 385 386 is ("b010".U) { 387 lowAddrLoad.uop.fuOpType := LD 388 lowAddrLoad.vaddr := req.vaddr - 2.U 389 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 390 lowResultShift := BYTE2 391 lowResultWidth := BYTE6 392 393 highAddrLoad.uop.fuOpType := LH 394 highAddrLoad.vaddr := req.vaddr + 6.U 395 highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 396 highResultShift := BYTE0 397 highResultWidth := BYTE2 398 } 399 400 is ("b011".U) { 401 lowAddrLoad.uop.fuOpType := LD 402 lowAddrLoad.vaddr := req.vaddr - 3.U 403 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 404 lowResultShift := BYTE3 405 lowResultWidth := BYTE5 406 407 highAddrLoad.uop.fuOpType := LW 408 highAddrLoad.vaddr := req.vaddr + 5.U 409 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 410 highResultShift := BYTE0 411 highResultWidth := BYTE3 412 } 413 414 is ("b100".U) { 415 lowAddrLoad.uop.fuOpType := LW 416 lowAddrLoad.vaddr := req.vaddr 417 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 418 lowResultShift := BYTE0 419 lowResultWidth := BYTE4 420 421 highAddrLoad.uop.fuOpType := LW 422 highAddrLoad.vaddr := req.vaddr + 4.U 423 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 424 highResultShift := BYTE0 425 highResultWidth := BYTE4 426 } 427 428 is ("b101".U) { 429 lowAddrLoad.uop.fuOpType := LW 430 lowAddrLoad.vaddr := req.vaddr - 1.U 431 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 432 lowResultShift := BYTE1 433 lowResultWidth := BYTE3 434 435 highAddrLoad.uop.fuOpType := LD 436 highAddrLoad.vaddr := req.vaddr + 3.U 437 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 438 highResultShift := BYTE0 439 highResultWidth := BYTE5 440 } 441 442 is ("b110".U) { 443 lowAddrLoad.uop.fuOpType := LH 444 lowAddrLoad.vaddr := req.vaddr 445 lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 446 lowResultShift := BYTE0 447 lowResultWidth := BYTE2 448 449 highAddrLoad.uop.fuOpType := LD 450 highAddrLoad.vaddr := req.vaddr + 2.U 451 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 452 highResultShift := BYTE0 453 highResultWidth := BYTE6 454 } 455 456 is ("b111".U) { 457 lowAddrLoad.uop.fuOpType := LB 458 lowAddrLoad.vaddr := req.vaddr 459 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 460 lowResultShift := BYTE0 461 lowResultWidth := BYTE1 462 463 highAddrLoad.uop.fuOpType := LD 464 highAddrLoad.vaddr := req.vaddr + 1.U 465 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 466 highResultShift := BYTE0 467 highResultWidth := BYTE7 468 } 469 } 470 } 471 } 472 473 splitLoadReqs(0) := lowAddrLoad 474 splitLoadReqs(1) := highAddrLoad 475 } 476 } 477 478 io.splitLoadReq.valid := req_valid && (bufferState === s_req) 479 io.splitLoadReq.bits := splitLoadReqs(curPtr) 480 481 when (io.splitLoadResp.valid) { 482 splitLoadResp(curPtr) := io.splitLoadResp.bits 483 when (isMMIO) { 484 unSentLoads := 0.U 485 splitLoadResp(curPtr).uop.exceptionVec := 0.U.asTypeOf(ExceptionVec()) 486 // delegate to software 487 splitLoadResp(curPtr).uop.exceptionVec(loadAddrMisaligned) := true.B 488 } .elsewhen (hasException) { 489 unSentLoads := 0.U 490 } .elsewhen (!io.splitLoadResp.bits.rep_info.need_rep) { 491 unSentLoads := unSentLoads & ~UIntToOH(curPtr) 492 curPtr := curPtr + 1.U 493 } 494 } 495 496 val combinedData = RegInit(0.U(XLEN.W)) 497 498 when (bufferState === s_comb) { 499 when (!cross16BytesBoundary) { 500 val shiftData = LookupTree(aligned16BytesSel, List( 501 "b0000".U -> splitLoadResp(0).data(63, 0), 502 "b0001".U -> splitLoadResp(0).data(71, 8), 503 "b0010".U -> splitLoadResp(0).data(79, 16), 504 "b0011".U -> splitLoadResp(0).data(87, 24), 505 "b0100".U -> splitLoadResp(0).data(95, 32), 506 "b0101".U -> splitLoadResp(0).data(103, 40), 507 "b0110".U -> splitLoadResp(0).data(111, 48), 508 "b0111".U -> splitLoadResp(0).data(119, 56), 509 "b1000".U -> splitLoadResp(0).data(127, 64), 510 "b1001".U -> splitLoadResp(0).data(127, 72), 511 "b1010".U -> splitLoadResp(0).data(127, 80), 512 "b1011".U -> splitLoadResp(0).data(127, 88), 513 "b1100".U -> splitLoadResp(0).data(127, 96), 514 "b1101".U -> splitLoadResp(0).data(127, 104), 515 "b1110".U -> splitLoadResp(0).data(127, 112), 516 "b1111".U -> splitLoadResp(0).data(127, 120) 517 )) 518 val truncateData = LookupTree(req.uop.fuOpType(1, 0), List( 519 LB -> shiftData(7, 0), // lb 520 LH -> shiftData(15, 0), // lh 521 LW -> shiftData(31, 0), // lw 522 LD -> shiftData(63, 0) // ld 523 )) 524 combinedData := rdataHelper(req.uop, truncateData(XLEN - 1, 0)) 525 } .otherwise { 526 val lowAddrResult = getShiftAndTruncateData(lowResultShift, lowResultWidth, splitLoadResp(0).data) 527 .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 528 val highAddrResult = getShiftAndTruncateData(highResultShift, highResultWidth, splitLoadResp(1).data) 529 .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 530 val catResult = Wire(Vec(XLEN / 8, UInt(8.W))) 531 (0 until XLEN / 8) .map { 532 case i => { 533 when (i.U < lowResultWidth) { 534 catResult(i) := lowAddrResult(i) 535 } .otherwise { 536 catResult(i) := highAddrResult(i.U - lowResultWidth) 537 } 538 } 539 } 540 combinedData := rdataHelper(req.uop, (catResult.asUInt)(XLEN - 1, 0)) 541 } 542 } 543 544 io.writeBack.valid := req_valid && (bufferState === s_wb) 545 io.writeBack.bits.uop := req.uop 546 io.writeBack.bits.uop.exceptionVec := Mux( 547 globalMMIO || globalException, 548 splitLoadResp(curPtr).uop.exceptionVec, 549 0.U.asTypeOf(ExceptionVec()) // TODO: is this ok? 550 ) 551 io.writeBack.bits.uop.flushPipe := Mux(globalMMIO || globalException, false.B, true.B) 552 io.writeBack.bits.uop.replayInst := false.B 553 io.writeBack.bits.data := combinedData 554 io.writeBack.bits.debug.isMMIO := globalMMIO 555 io.writeBack.bits.debug.isPerfCnt := false.B 556 io.writeBack.bits.debug.paddr := req.paddr 557 io.writeBack.bits.debug.vaddr := req.vaddr 558 559 val flush = req_valid && req.uop.robIdx.needFlush(io.redirect) 560 561 when (flush && (bufferState =/= s_idle)) { 562 bufferState := s_idle 563 req_valid := false.B 564 curPtr := 0.U 565 unSentLoads := 0.U 566 globalException := false.B 567 globalMMIO := false.B 568 } 569 570 // NOTE: spectial case (unaligned load cross page, page fault happens in next page) 571 // if exception happens in the higher page address part, overwrite the loadExceptionBuffer vaddr 572 val overwriteExpBuf = GatedValidRegNext(req_valid && globalException) 573 val overwriteVaddr = GatedRegNext(Mux( 574 cross16BytesBoundary && (curPtr === 1.U), 575 splitLoadResp(curPtr).vaddr, 576 splitLoadResp(curPtr).fullva)) 577 val overwriteIsHyper = GatedRegNext(splitLoadResp(curPtr).isHyper) 578 val overwriteGpaddr = GatedRegNext(splitLoadResp(curPtr).gpaddr) 579 val overwriteIsForVSnonLeafPTE = GatedRegNext(splitLoadResp(curPtr).isForVSnonLeafPTE) 580 581 io.overwriteExpBuf.valid := overwriteExpBuf 582 io.overwriteExpBuf.vaddr := overwriteVaddr 583 io.overwriteExpBuf.isHyper := overwriteIsHyper 584 io.overwriteExpBuf.gpaddr := overwriteGpaddr 585 io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE 586 587 // when no exception or mmio, flush loadExceptionBuffer at s_wb 588 val flushLdExpBuff = GatedValidRegNext(req_valid && (bufferState === s_wb) && !(globalMMIO || globalException)) 589 io.flushLdExpBuff := flushLdExpBuff 590 591 XSPerfAccumulate("alloc", RegNext(!req_valid) && req_valid) 592 XSPerfAccumulate("flush", flush) 593 XSPerfAccumulate("flush_idle", flush && (bufferState === s_idle)) 594 XSPerfAccumulate("flush_non_idle", flush && (bufferState =/= s_idle)) 595}