1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.fu.FuConfig._ 26import xiangshan.backend.fu.fpu.FPU 27import xiangshan.backend.rob.RobLsqIO 28import xiangshan.cache.mmu.HasTlbConst 29import xiangshan.cache._ 30import xiangshan.frontend.FtqPtr 31import xiangshan.ExceptionNO._ 32import xiangshan.cache.wpu.ReplayCarry 33import xiangshan.backend.rob.RobPtr 34import xiangshan.backend.Bundles.{MemExuOutput, DynInst} 35 36class LoadMisalignBuffer(implicit p: Parameters) extends XSModule 37 with HasCircularQueuePtrHelper 38 with HasLoadHelper 39 with HasTlbConst 40{ 41 private val enqPortNum = LoadPipelineWidth 42 private val maxSplitNum = 2 43 44 require(maxSplitNum == 2) 45 46 private val LB = "b00".U(2.W) 47 private val LH = "b01".U(2.W) 48 private val LW = "b10".U(2.W) 49 private val LD = "b11".U(2.W) 50 51 // encode of how many bytes to shift or truncate 52 private val BYTE0 = "b000".U(3.W) 53 private val BYTE1 = "b001".U(3.W) 54 private val BYTE2 = "b010".U(3.W) 55 private val BYTE3 = "b011".U(3.W) 56 private val BYTE4 = "b100".U(3.W) 57 private val BYTE5 = "b101".U(3.W) 58 private val BYTE6 = "b110".U(3.W) 59 private val BYTE7 = "b111".U(3.W) 60 61 def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List( 62 LB -> 0x1.U, // lb 63 LH -> 0x3.U, // lh 64 LW -> 0xf.U, // lw 65 LD -> 0xff.U // ld 66 )) 67 68 def getShiftAndTruncateData(shiftEncode: UInt, truncateEncode: UInt, data: UInt) = { 69 val shiftData = LookupTree(shiftEncode, List( 70 BYTE0 -> data(63, 0), 71 BYTE1 -> data(63, 8), 72 BYTE2 -> data(63, 16), 73 BYTE3 -> data(63, 24), 74 BYTE4 -> data(63, 32), 75 BYTE5 -> data(63, 40), 76 BYTE6 -> data(63, 48), 77 BYTE7 -> data(63, 56) 78 )) 79 val truncateData = LookupTree(truncateEncode, List( 80 BYTE0 -> 0.U(XLEN.W), // can not truncate with 0 byte width 81 BYTE1 -> shiftData(7, 0), 82 BYTE2 -> shiftData(15, 0), 83 BYTE3 -> shiftData(23, 0), 84 BYTE4 -> shiftData(31, 0), 85 BYTE5 -> shiftData(39, 0), 86 BYTE6 -> shiftData(47, 0), 87 BYTE7 -> shiftData(55, 0) 88 )) 89 truncateData(XLEN - 1, 0) 90 } 91 92 def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 93 assert(valid.length == bits.length) 94 if (valid.length == 0 || valid.length == 1) { 95 (valid, bits) 96 } else if (valid.length == 2) { 97 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 98 for (i <- res.indices) { 99 res(i).valid := valid(i) 100 res(i).bits := bits(i) 101 } 102 val oldest = Mux(valid(0) && valid(1), 103 Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) || 104 (isNotBefore(bits(0).uop.robIdx, bits(1).uop.robIdx) && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)), 105 Mux(valid(0) && !valid(1), res(0), res(1))) 106 (Seq(oldest.valid), Seq(oldest.bits)) 107 } else { 108 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 109 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 110 selectOldest(left._1 ++ right._1, left._2 ++ right._2) 111 } 112 } 113 114 val io = IO(new Bundle() { 115 val redirect = Flipped(Valid(new Redirect)) 116 val req = Vec(enqPortNum, Flipped(Valid(new LqWriteBundle))) 117 val rob = Flipped(new RobLsqIO) 118 val splitLoadReq = Decoupled(new LsPipelineBundle) 119 val splitLoadResp = Flipped(Valid(new LqWriteBundle)) 120 val writeBack = Decoupled(new MemExuOutput) 121 val overwriteExpBuf = Output(new XSBundle { 122 val valid = Bool() 123 val vaddr = UInt(XLEN.W) 124 val isHyper = Bool() 125 val gpaddr = UInt(XLEN.W) 126 val isForVSnonLeafPTE = Bool() 127 }) 128 val flushLdExpBuff = Output(Bool()) 129 }) 130 131 io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool())) 132 io.rob.uop := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst)) 133 134 val req_valid = RegInit(false.B) 135 val req = Reg(new LqWriteBundle) 136 137 // enqueue 138 // s1: 139 val s1_req = VecInit(io.req.map(_.bits)) 140 val s1_valid = VecInit(io.req.map(x => x.valid)) 141 142 // s2: delay 1 cycle 143 val s2_req = RegNext(s1_req) 144 val s2_valid = (0 until enqPortNum).map(i => 145 RegNext(s1_valid(i)) && 146 !s2_req(i).uop.robIdx.needFlush(RegNext(io.redirect)) && 147 !s2_req(i).uop.robIdx.needFlush(io.redirect) 148 ) 149 val s2_miss_aligned = s2_req.map(x => 150 x.uop.exceptionVec(loadAddrMisaligned) && !x.uop.exceptionVec(breakPoint) && !TriggerAction.isDmode(x.uop.trigger) 151 ) 152 153 val s2_enqueue = Wire(Vec(enqPortNum, Bool())) 154 for (w <- 0 until enqPortNum) { 155 s2_enqueue(w) := s2_valid(w) && s2_miss_aligned(w) 156 } 157 158 when (req_valid && req.uop.robIdx.needFlush(io.redirect)) { 159 req_valid := s2_enqueue.asUInt.orR 160 } .elsewhen (s2_enqueue.asUInt.orR) { 161 req_valid := req_valid || true.B 162 } 163 164 val reqSel = selectOldest(s2_enqueue, s2_req) 165 166 when (req_valid) { 167 req := Mux( 168 reqSel._1(0) && (isAfter(req.uop.robIdx, reqSel._2(0).uop.robIdx) || (isNotBefore(req.uop.robIdx, reqSel._2(0).uop.robIdx) && req.uop.uopIdx > reqSel._2(0).uop.uopIdx)), 169 reqSel._2(0), 170 req) 171 } .elsewhen (s2_enqueue.asUInt.orR) { 172 req := reqSel._2(0) 173 } 174 175 val robMatch = req_valid && io.rob.pendingld && (io.rob.pendingPtr === req.uop.robIdx) 176 177 // buffer control: 178 // - split miss-aligned load into aligned loads 179 // - send split load to ldu and get result from ldu 180 // - merge them and write back to rob 181 val s_idle :: s_split :: s_req :: s_resp :: s_comb :: s_wb :: s_wait :: Nil = Enum(7) 182 val bufferState = RegInit(s_idle) 183 val splitLoadReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle)))) 184 val splitLoadResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LqWriteBundle)))) 185 val unSentLoads = RegInit(0.U(maxSplitNum.W)) 186 val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W)) 187 188 // if there is exception or mmio in split load 189 val globalException = RegInit(false.B) 190 val globalMMIO = RegInit(false.B) 191 192 val hasException = ExceptionNO.selectByFu(io.splitLoadResp.bits.uop.exceptionVec, LduCfg).asUInt.orR 193 val isMMIO = io.splitLoadResp.bits.mmio 194 195 switch(bufferState) { 196 is (s_idle) { 197 when (robMatch) { 198 bufferState := s_split 199 } 200 } 201 202 is (s_split) { 203 bufferState := s_req 204 } 205 206 is (s_req) { 207 when (io.splitLoadReq.fire) { 208 bufferState := s_resp 209 } 210 } 211 212 is (s_resp) { 213 when (io.splitLoadResp.valid) { 214 val clearOh = UIntToOH(curPtr) 215 when (hasException || isMMIO) { 216 // commit directly when exception ocurs 217 // if any split load reaches mmio space, delegate to software loadAddrMisaligned exception 218 bufferState := s_wb 219 globalException := hasException 220 globalMMIO := isMMIO 221 } .elsewhen(io.splitLoadResp.bits.rep_info.need_rep || (unSentLoads & ~clearOh).orR) { 222 // need replay or still has unsent requests 223 bufferState := s_req 224 } .otherwise { 225 // merge the split load results 226 bufferState := s_comb 227 } 228 } 229 } 230 231 is (s_comb) { 232 bufferState := s_wb 233 } 234 235 is (s_wb) { 236 when(io.writeBack.fire) { 237 bufferState := s_wait 238 } 239 } 240 241 is (s_wait) { 242 when(io.rob.lcommit =/= 0.U || req.uop.robIdx.needFlush(io.redirect)) { 243 // rob commits the unaligned load or handled the exception, reset all state 244 bufferState := s_idle 245 req_valid := false.B 246 curPtr := 0.U 247 unSentLoads := 0.U 248 globalException := false.B 249 globalMMIO := false.B 250 } 251 } 252 } 253 254 val highAddress = LookupTree(req.uop.fuOpType(1, 0), List( 255 LB -> 0.U, 256 LH -> 1.U, 257 LW -> 3.U, 258 LD -> 7.U 259 )) + req.vaddr(4, 0) 260 // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region 261 val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4)) 262 val aligned16BytesAddr = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U) 263 val aligned16BytesSel = req.vaddr(3, 0) 264 265 // meta of 128 bit load 266 val new128Load = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 267 // meta of split loads 268 val lowAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 269 val highAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 270 val lowResultShift = RegInit(0.U(3.W)) // how many bytes should we shift right when got result 271 val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from result 272 val highResultShift = RegInit(0.U(3.W)) 273 val highResultWidth = RegInit(0.U(3.W)) 274 275 when (bufferState === s_split) { 276 when (!cross16BytesBoundary) { 277 // change this unaligned load into a 128 bits load 278 unSentLoads := 1.U 279 curPtr := 0.U 280 new128Load.vaddr := aligned16BytesAddr 281 new128Load.fullva := req.fullva 282 // new128Load.mask := (getMask(req.uop.fuOpType(1, 0)) << aligned16BytesSel).asUInt 283 new128Load.mask := 0xffff.U 284 new128Load.uop := req.uop 285 new128Load.uop.exceptionVec(loadAddrMisaligned) := false.B 286 new128Load.is128bit := true.B 287 splitLoadReqs(0) := new128Load 288 } .otherwise { 289 // split this unaligned load into `maxSplitNum` aligned loads 290 unSentLoads := Fill(maxSplitNum, 1.U(1.W)) 291 curPtr := 0.U 292 lowAddrLoad.uop := req.uop 293 lowAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 294 lowAddrLoad.fullva := req.fullva 295 highAddrLoad.uop := req.uop 296 highAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 297 highAddrLoad.fullva := req.fullva 298 299 switch (req.uop.fuOpType(1, 0)) { 300 is (LB) { 301 assert(false.B, "lb should not trigger miss align") 302 } 303 304 is (LH) { 305 lowAddrLoad.uop.fuOpType := LB 306 lowAddrLoad.vaddr := req.vaddr 307 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 308 lowResultShift := BYTE0 309 lowResultWidth := BYTE1 310 311 highAddrLoad.uop.fuOpType := LB 312 highAddrLoad.vaddr := req.vaddr + 1.U 313 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 314 highResultShift := BYTE0 315 highResultWidth := BYTE1 316 } 317 318 is (LW) { 319 switch (req.vaddr(1, 0)) { 320 is ("b00".U) { 321 assert(false.B, "should not trigger miss align") 322 } 323 324 is ("b01".U) { 325 lowAddrLoad.uop.fuOpType := LW 326 lowAddrLoad.vaddr := req.vaddr - 1.U 327 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 328 lowResultShift := BYTE1 329 lowResultWidth := BYTE3 330 331 highAddrLoad.uop.fuOpType := LB 332 highAddrLoad.vaddr := req.vaddr + 3.U 333 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 334 highResultShift := BYTE0 335 highResultWidth := BYTE1 336 } 337 338 is ("b10".U) { 339 lowAddrLoad.uop.fuOpType := LH 340 lowAddrLoad.vaddr := req.vaddr 341 lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 342 lowResultShift := BYTE0 343 lowResultWidth := BYTE2 344 345 highAddrLoad.uop.fuOpType := LH 346 highAddrLoad.vaddr := req.vaddr + 2.U 347 highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 348 highResultShift := BYTE0 349 highResultWidth := BYTE2 350 } 351 352 is ("b11".U) { 353 lowAddrLoad.uop.fuOpType := LB 354 lowAddrLoad.vaddr := req.vaddr 355 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 356 lowResultShift := BYTE0 357 lowResultWidth := BYTE1 358 359 highAddrLoad.uop.fuOpType := LW 360 highAddrLoad.vaddr := req.vaddr + 1.U 361 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 362 highResultShift := BYTE0 363 highResultWidth := BYTE3 364 } 365 } 366 } 367 368 is (LD) { 369 switch (req.vaddr(2, 0)) { 370 is ("b000".U) { 371 assert(false.B, "should not trigger miss align") 372 } 373 374 is ("b001".U) { 375 lowAddrLoad.uop.fuOpType := LD 376 lowAddrLoad.vaddr := req.vaddr - 1.U 377 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 378 lowResultShift := BYTE1 379 lowResultWidth := BYTE7 380 381 highAddrLoad.uop.fuOpType := LB 382 highAddrLoad.vaddr := req.vaddr + 7.U 383 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 384 highResultShift := BYTE0 385 highResultWidth := BYTE1 386 } 387 388 is ("b010".U) { 389 lowAddrLoad.uop.fuOpType := LD 390 lowAddrLoad.vaddr := req.vaddr - 2.U 391 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 392 lowResultShift := BYTE2 393 lowResultWidth := BYTE6 394 395 highAddrLoad.uop.fuOpType := LH 396 highAddrLoad.vaddr := req.vaddr + 6.U 397 highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 398 highResultShift := BYTE0 399 highResultWidth := BYTE2 400 } 401 402 is ("b011".U) { 403 lowAddrLoad.uop.fuOpType := LD 404 lowAddrLoad.vaddr := req.vaddr - 3.U 405 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 406 lowResultShift := BYTE3 407 lowResultWidth := BYTE5 408 409 highAddrLoad.uop.fuOpType := LW 410 highAddrLoad.vaddr := req.vaddr + 5.U 411 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 412 highResultShift := BYTE0 413 highResultWidth := BYTE3 414 } 415 416 is ("b100".U) { 417 lowAddrLoad.uop.fuOpType := LW 418 lowAddrLoad.vaddr := req.vaddr 419 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 420 lowResultShift := BYTE0 421 lowResultWidth := BYTE4 422 423 highAddrLoad.uop.fuOpType := LW 424 highAddrLoad.vaddr := req.vaddr + 4.U 425 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 426 highResultShift := BYTE0 427 highResultWidth := BYTE4 428 } 429 430 is ("b101".U) { 431 lowAddrLoad.uop.fuOpType := LW 432 lowAddrLoad.vaddr := req.vaddr - 1.U 433 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 434 lowResultShift := BYTE1 435 lowResultWidth := BYTE3 436 437 highAddrLoad.uop.fuOpType := LD 438 highAddrLoad.vaddr := req.vaddr + 3.U 439 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 440 highResultShift := BYTE0 441 highResultWidth := BYTE5 442 } 443 444 is ("b110".U) { 445 lowAddrLoad.uop.fuOpType := LH 446 lowAddrLoad.vaddr := req.vaddr 447 lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 448 lowResultShift := BYTE0 449 lowResultWidth := BYTE2 450 451 highAddrLoad.uop.fuOpType := LD 452 highAddrLoad.vaddr := req.vaddr + 2.U 453 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 454 highResultShift := BYTE0 455 highResultWidth := BYTE6 456 } 457 458 is ("b111".U) { 459 lowAddrLoad.uop.fuOpType := LB 460 lowAddrLoad.vaddr := req.vaddr 461 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 462 lowResultShift := BYTE0 463 lowResultWidth := BYTE1 464 465 highAddrLoad.uop.fuOpType := LD 466 highAddrLoad.vaddr := req.vaddr + 1.U 467 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 468 highResultShift := BYTE0 469 highResultWidth := BYTE7 470 } 471 } 472 } 473 } 474 475 splitLoadReqs(0) := lowAddrLoad 476 splitLoadReqs(1) := highAddrLoad 477 } 478 } 479 480 io.splitLoadReq.valid := req_valid && (bufferState === s_req) 481 io.splitLoadReq.bits := splitLoadReqs(curPtr) 482 // Restore the information of H extension load 483 // bit encoding: | hlv 1 | hlvx 1 | is unsigned(1bit) | size(2bit) | 484 val reqIsHlv = LSUOpType.isHlv(req.uop.fuOpType) 485 val reqIsHlvx = LSUOpType.isHlvx(req.uop.fuOpType) 486 io.splitLoadReq.bits.uop.fuOpType := Cat(reqIsHlv, reqIsHlvx, 0.U(1.W), splitLoadReqs(curPtr).uop.fuOpType(1, 0)) 487 488 when (io.splitLoadResp.valid) { 489 splitLoadResp(curPtr) := io.splitLoadResp.bits 490 when (isMMIO) { 491 unSentLoads := 0.U 492 splitLoadResp(curPtr).uop.exceptionVec := 0.U.asTypeOf(ExceptionVec()) 493 // delegate to software 494 splitLoadResp(curPtr).uop.exceptionVec(loadAddrMisaligned) := true.B 495 } .elsewhen (hasException) { 496 unSentLoads := 0.U 497 } .elsewhen (!io.splitLoadResp.bits.rep_info.need_rep) { 498 unSentLoads := unSentLoads & ~UIntToOH(curPtr) 499 curPtr := curPtr + 1.U 500 } 501 } 502 503 val combinedData = RegInit(0.U(XLEN.W)) 504 505 when (bufferState === s_comb) { 506 when (!cross16BytesBoundary) { 507 val shiftData = LookupTree(aligned16BytesSel, List( 508 "b0000".U -> splitLoadResp(0).data(63, 0), 509 "b0001".U -> splitLoadResp(0).data(71, 8), 510 "b0010".U -> splitLoadResp(0).data(79, 16), 511 "b0011".U -> splitLoadResp(0).data(87, 24), 512 "b0100".U -> splitLoadResp(0).data(95, 32), 513 "b0101".U -> splitLoadResp(0).data(103, 40), 514 "b0110".U -> splitLoadResp(0).data(111, 48), 515 "b0111".U -> splitLoadResp(0).data(119, 56), 516 "b1000".U -> splitLoadResp(0).data(127, 64), 517 "b1001".U -> splitLoadResp(0).data(127, 72), 518 "b1010".U -> splitLoadResp(0).data(127, 80), 519 "b1011".U -> splitLoadResp(0).data(127, 88), 520 "b1100".U -> splitLoadResp(0).data(127, 96), 521 "b1101".U -> splitLoadResp(0).data(127, 104), 522 "b1110".U -> splitLoadResp(0).data(127, 112), 523 "b1111".U -> splitLoadResp(0).data(127, 120) 524 )) 525 val truncateData = LookupTree(req.uop.fuOpType(1, 0), List( 526 LB -> shiftData(7, 0), // lb 527 LH -> shiftData(15, 0), // lh 528 LW -> shiftData(31, 0), // lw 529 LD -> shiftData(63, 0) // ld 530 )) 531 combinedData := rdataHelper(req.uop, truncateData(XLEN - 1, 0)) 532 } .otherwise { 533 val lowAddrResult = getShiftAndTruncateData(lowResultShift, lowResultWidth, splitLoadResp(0).data) 534 .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 535 val highAddrResult = getShiftAndTruncateData(highResultShift, highResultWidth, splitLoadResp(1).data) 536 .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 537 val catResult = Wire(Vec(XLEN / 8, UInt(8.W))) 538 (0 until XLEN / 8) .map { 539 case i => { 540 when (i.U < lowResultWidth) { 541 catResult(i) := lowAddrResult(i) 542 } .otherwise { 543 catResult(i) := highAddrResult(i.U - lowResultWidth) 544 } 545 } 546 } 547 combinedData := rdataHelper(req.uop, (catResult.asUInt)(XLEN - 1, 0)) 548 } 549 } 550 551 io.writeBack.valid := req_valid && (bufferState === s_wb) 552 io.writeBack.bits.uop := req.uop 553 io.writeBack.bits.uop.exceptionVec := ExceptionNO.selectByFu(Mux( 554 globalMMIO || globalException, 555 splitLoadResp(curPtr).uop.exceptionVec, 556 0.U.asTypeOf(ExceptionVec()) // TODO: is this ok? 557 ), LduCfg) 558 io.writeBack.bits.uop.flushPipe := Mux(globalMMIO || globalException, false.B, true.B) 559 io.writeBack.bits.uop.replayInst := false.B 560 io.writeBack.bits.data := combinedData 561 io.writeBack.bits.isFromLoadUnit := DontCare 562 io.writeBack.bits.debug.isMMIO := globalMMIO 563 io.writeBack.bits.debug.isPerfCnt := false.B 564 io.writeBack.bits.debug.paddr := req.paddr 565 io.writeBack.bits.debug.vaddr := req.vaddr 566 567 val flush = req_valid && req.uop.robIdx.needFlush(io.redirect) 568 569 when (flush && (bufferState =/= s_idle)) { 570 bufferState := s_idle 571 req_valid := false.B 572 curPtr := 0.U 573 unSentLoads := 0.U 574 globalException := false.B 575 globalMMIO := false.B 576 } 577 578 // NOTE: spectial case (unaligned load cross page, page fault happens in next page) 579 // if exception happens in the higher page address part, overwrite the loadExceptionBuffer vaddr 580 val shouldOverwrite = req_valid && globalException 581 val overwriteExpBuf = GatedValidRegNext(shouldOverwrite) 582 val overwriteVaddr = RegEnable( 583 Mux( 584 cross16BytesBoundary && (curPtr === 1.U), 585 splitLoadResp(curPtr).vaddr, 586 splitLoadResp(curPtr).fullva), 587 shouldOverwrite) 588 val overwriteGpaddr = RegEnable( 589 Mux( 590 cross16BytesBoundary && (curPtr === 1.U), 591 // when cross-page, offset should always be 0 592 Cat(get_pn(splitLoadResp(curPtr).gpaddr), get_off(0.U(splitLoadResp(curPtr).gpaddr.getWidth.W))), 593 splitLoadResp(curPtr).gpaddr), 594 shouldOverwrite) 595 val overwriteIsHyper = RegEnable(splitLoadResp(curPtr).isHyper, shouldOverwrite) 596 val overwriteIsForVSnonLeafPTE = RegEnable(splitLoadResp(curPtr).isForVSnonLeafPTE, shouldOverwrite) 597 598 io.overwriteExpBuf.valid := overwriteExpBuf 599 io.overwriteExpBuf.vaddr := overwriteVaddr 600 io.overwriteExpBuf.isHyper := overwriteIsHyper 601 io.overwriteExpBuf.gpaddr := overwriteGpaddr 602 io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE 603 604 // when no exception or mmio, flush loadExceptionBuffer at s_wb 605 val flushLdExpBuff = GatedValidRegNext(req_valid && (bufferState === s_wb) && !(globalMMIO || globalException)) 606 io.flushLdExpBuff := flushLdExpBuff 607 608 XSPerfAccumulate("alloc", RegNext(!req_valid) && req_valid) 609 XSPerfAccumulate("flush", flush) 610 XSPerfAccumulate("flush_idle", flush && (bufferState === s_idle)) 611 XSPerfAccumulate("flush_non_idle", flush && (bufferState =/= s_idle)) 612}