1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.fu.FuConfig._ 26import xiangshan.backend.fu.FuType 27import xiangshan.backend.fu.fpu.FPU 28import xiangshan.backend.rob.RobLsqIO 29import xiangshan.cache.mmu.HasTlbConst 30import xiangshan.cache._ 31import xiangshan.frontend.FtqPtr 32import xiangshan.ExceptionNO._ 33import xiangshan.cache.wpu.ReplayCarry 34import xiangshan.backend.rob.RobPtr 35import xiangshan.backend.Bundles.{MemExuOutput, DynInst} 36import xiangshan.backend.fu.FuConfig.LduCfg 37 38class LoadMisalignBuffer(implicit p: Parameters) extends XSModule 39 with HasCircularQueuePtrHelper 40 with HasLoadHelper 41 with HasTlbConst 42{ 43 private val enqPortNum = LoadPipelineWidth 44 private val maxSplitNum = 2 45 46 require(maxSplitNum == 2) 47 48 private val LB = "b00".U(2.W) 49 private val LH = "b01".U(2.W) 50 private val LW = "b10".U(2.W) 51 private val LD = "b11".U(2.W) 52 53 // encode of how many bytes to shift or truncate 54 private val BYTE0 = "b000".U(3.W) 55 private val BYTE1 = "b001".U(3.W) 56 private val BYTE2 = "b010".U(3.W) 57 private val BYTE3 = "b011".U(3.W) 58 private val BYTE4 = "b100".U(3.W) 59 private val BYTE5 = "b101".U(3.W) 60 private val BYTE6 = "b110".U(3.W) 61 private val BYTE7 = "b111".U(3.W) 62 63 def getMask(sizeEncode: UInt) = LookupTree(sizeEncode, List( 64 LB -> 0x1.U, // lb 65 LH -> 0x3.U, // lh 66 LW -> 0xf.U, // lw 67 LD -> 0xff.U // ld 68 )) 69 70 def getShiftAndTruncateData(shiftEncode: UInt, truncateEncode: UInt, data: UInt) = { 71 val shiftData = LookupTree(shiftEncode, List( 72 BYTE0 -> data(63, 0), 73 BYTE1 -> data(63, 8), 74 BYTE2 -> data(63, 16), 75 BYTE3 -> data(63, 24), 76 BYTE4 -> data(63, 32), 77 BYTE5 -> data(63, 40), 78 BYTE6 -> data(63, 48), 79 BYTE7 -> data(63, 56) 80 )) 81 val truncateData = LookupTree(truncateEncode, List( 82 BYTE0 -> 0.U(XLEN.W), // can not truncate with 0 byte width 83 BYTE1 -> shiftData(7, 0), 84 BYTE2 -> shiftData(15, 0), 85 BYTE3 -> shiftData(23, 0), 86 BYTE4 -> shiftData(31, 0), 87 BYTE5 -> shiftData(39, 0), 88 BYTE6 -> shiftData(47, 0), 89 BYTE7 -> shiftData(55, 0) 90 )) 91 truncateData(XLEN - 1, 0) 92 } 93 94 def selectOldest[T <: LqWriteBundle](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 95 assert(valid.length == bits.length) 96 if (valid.length == 0 || valid.length == 1) { 97 (valid, bits) 98 } else if (valid.length == 2) { 99 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 100 for (i <- res.indices) { 101 res(i).valid := valid(i) 102 res(i).bits := bits(i) 103 } 104 val oldest = Mux(valid(0) && valid(1), 105 Mux(isAfter(bits(0).uop.robIdx, bits(1).uop.robIdx) || 106 (bits(0).uop.robIdx === bits(1).uop.robIdx && bits(0).uop.uopIdx > bits(1).uop.uopIdx), res(1), res(0)), 107 Mux(valid(0) && !valid(1), res(0), res(1))) 108 (Seq(oldest.valid), Seq(oldest.bits)) 109 } else { 110 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 111 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 112 selectOldest(left._1 ++ right._1, left._2 ++ right._2) 113 } 114 } 115 116 val io = IO(new Bundle() { 117 val redirect = Flipped(Valid(new Redirect)) 118 val req = Vec(enqPortNum, Flipped(Decoupled(new LqWriteBundle))) 119 val rob = Flipped(new RobLsqIO) 120 val splitLoadReq = Decoupled(new LsPipelineBundle) 121 val splitLoadResp = Flipped(Valid(new LqWriteBundle)) 122 val writeBack = Decoupled(new MemExuOutput) 123 val vecWriteBack = Decoupled(new VecPipelineFeedbackIO(isVStore = false)) 124 val loadOutValid = Input(Bool()) 125 val loadVecOutValid = Input(Bool()) 126 val overwriteExpBuf = Output(new XSBundle { 127 val valid = Bool() 128 val vaddr = UInt(XLEN.W) 129 val isHyper = Bool() 130 val gpaddr = UInt(XLEN.W) 131 val isForVSnonLeafPTE = Bool() 132 }) 133 val flushLdExpBuff = Output(Bool()) 134 val loadMisalignFull = Output(Bool()) 135 }) 136 137 io.rob.mmio := 0.U.asTypeOf(Vec(LoadPipelineWidth, Bool())) 138 io.rob.uop := 0.U.asTypeOf(Vec(LoadPipelineWidth, new DynInst)) 139 140 val req_valid = RegInit(false.B) 141 val req = Reg(new LqWriteBundle) 142 143 io.loadMisalignFull := req_valid 144 145 (0 until io.req.length).map{i => 146 if (i == 0) { 147 io.req(0).ready := !req_valid && io.req(0).valid 148 } 149 else { 150 io.req(i).ready := !io.req.take(i).map(_.ready).reduce(_ || _) && !req_valid && io.req(i).valid 151 } 152 } 153 154 155 val select_req_bit = ParallelPriorityMux(io.req.map(_.valid), io.req.map(_.bits)) 156 val select_req_valid = io.req.map(_.valid).reduce(_ || _) 157 val canEnqValid = !req_valid && !select_req_bit.uop.robIdx.needFlush(io.redirect) && select_req_valid 158 when(canEnqValid) { 159 req := select_req_bit 160 req_valid := true.B 161 } 162 163 // buffer control: 164 // - s_idle: idle 165 // - s_split: split misalign laod 166 // - s_req: issue a split memory access request 167 // - s_resp: Responds to a split load access request 168 // - s_comb_wakeup_rep: Merge the data and issue a wakeup load 169 // - s_wb: writeback yo rob/vecMergeBuffer 170 val s_idle :: s_split :: s_req :: s_resp :: s_comb_wakeup_rep :: s_wb :: Nil = Enum(6) 171 val bufferState = RegInit(s_idle) 172 val splitLoadReqs = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LsPipelineBundle)))) 173 val splitLoadResp = RegInit(VecInit(List.fill(maxSplitNum)(0.U.asTypeOf(new LqWriteBundle)))) 174 val exceptionVec = RegInit(0.U.asTypeOf(ExceptionVec())) 175 val unSentLoads = RegInit(0.U(maxSplitNum.W)) 176 val curPtr = RegInit(0.U(log2Ceil(maxSplitNum).W)) 177 val needWakeUpReqsWire = Wire(Bool()) 178 val needWakeUpWB = RegInit(false.B) 179 val data_select = RegEnable(genRdataOH(select_req_bit.uop), 0.U(genRdataOH(select_req_bit.uop).getWidth.W), canEnqValid) 180 181 // if there is exception or mmio in split load 182 val globalException = RegInit(false.B) 183 val globalMMIO = RegInit(false.B) 184 185 val hasException = io.splitLoadResp.bits.vecActive && 186 ExceptionNO.selectByFu(io.splitLoadResp.bits.uop.exceptionVec, LduCfg).asUInt.orR || TriggerAction.isDmode(io.splitLoadResp.bits.uop.trigger) 187 val isMMIO = io.splitLoadResp.bits.mmio 188 needWakeUpReqsWire := false.B 189 switch(bufferState) { 190 is (s_idle) { 191 when (req_valid) { 192 bufferState := s_split 193 } 194 } 195 196 is (s_split) { 197 bufferState := s_req 198 } 199 200 is (s_req) { 201 when (io.splitLoadReq.fire) { 202 bufferState := s_resp 203 } 204 } 205 206 is (s_resp) { 207 when (io.splitLoadResp.valid) { 208 val clearOh = UIntToOH(curPtr) 209 when (hasException || isMMIO) { 210 // commit directly when exception ocurs 211 // if any split load reaches mmio space, delegate to software loadAddrMisaligned exception 212 bufferState := s_wb 213 globalException := hasException 214 globalMMIO := isMMIO 215 } .elsewhen(io.splitLoadResp.bits.rep_info.need_rep || (unSentLoads & ~clearOh).orR) { 216 // need replay or still has unsent requests 217 bufferState := s_req 218 } .otherwise { 219 // merge the split load results 220 bufferState := s_comb_wakeup_rep 221 needWakeUpWB := !req.isvec 222 } 223 } 224 } 225 226 is (s_comb_wakeup_rep) { 227 when(!req.isvec) { 228 when(io.splitLoadReq.fire) { 229 bufferState := s_wb 230 }.otherwise { 231 bufferState := s_comb_wakeup_rep 232 } 233 needWakeUpReqsWire := true.B 234 } .otherwise { 235 bufferState := s_wb 236 } 237 238 } 239 240 is (s_wb) { 241 when(req.isvec) { 242 when(io.vecWriteBack.fire) { 243 bufferState := s_idle 244 req_valid := false.B 245 curPtr := 0.U 246 unSentLoads := 0.U 247 globalException := false.B 248 globalMMIO := false.B 249 needWakeUpWB := false.B 250 } 251 252 } .otherwise { 253 when(io.writeBack.fire) { 254 bufferState := s_idle 255 req_valid := false.B 256 curPtr := 0.U 257 unSentLoads := 0.U 258 globalException := false.B 259 globalMMIO := false.B 260 needWakeUpWB := false.B 261 } 262 } 263 264 } 265 } 266 267 val alignedType = Mux(req.isvec, req.alignedType(1,0), req.uop.fuOpType(1, 0)) 268 val highAddress = LookupTree(alignedType, List( 269 LB -> 0.U, 270 LH -> 1.U, 271 LW -> 3.U, 272 LD -> 7.U 273 )) + req.vaddr(4, 0) 274 // to see if (vaddr + opSize - 1) and vaddr are in the same 16 bytes region 275 val cross16BytesBoundary = req_valid && (highAddress(4) =/= req.vaddr(4)) 276 val aligned16BytesAddr = (req.vaddr >> 4) << 4// req.vaddr & ~("b1111".U) 277 val aligned16BytesSel = req.vaddr(3, 0) 278 279 // meta of 128 bit load 280 val new128Load = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 281 // meta of split loads 282 val lowAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 283 val highAddrLoad = WireInit(0.U.asTypeOf(new LsPipelineBundle)) 284 val lowResultShift = RegInit(0.U(3.W)) // how many bytes should we shift right when got result 285 val lowResultWidth = RegInit(0.U(3.W)) // how many bytes should we take from result 286 val highResultShift = RegInit(0.U(3.W)) 287 val highResultWidth = RegInit(0.U(3.W)) 288 289 when (bufferState === s_split) { 290 when (!cross16BytesBoundary) { 291 assert(false.B, s"There should be no non-aligned access that does not cross 16Byte boundaries.") 292 } .otherwise { 293 // split this unaligned load into `maxSplitNum` aligned loads 294 unSentLoads := Fill(maxSplitNum, 1.U(1.W)) 295 curPtr := 0.U 296 lowAddrLoad.uop := req.uop 297 lowAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 298 lowAddrLoad.fullva := req.fullva 299 highAddrLoad.uop := req.uop 300 highAddrLoad.uop.exceptionVec(loadAddrMisaligned) := false.B 301 highAddrLoad.fullva := req.fullva 302 303 switch (alignedType(1, 0)) { 304 is (LB) { 305 assert(false.B, "lb should not trigger miss align") 306 } 307 308 is (LH) { 309 lowAddrLoad.uop.fuOpType := LB 310 lowAddrLoad.vaddr := req.vaddr 311 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 312 lowResultShift := BYTE0 313 lowResultWidth := BYTE1 314 315 highAddrLoad.uop.fuOpType := LB 316 highAddrLoad.vaddr := req.vaddr + 1.U 317 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 318 highResultShift := BYTE0 319 highResultWidth := BYTE1 320 } 321 322 is (LW) { 323 switch (req.vaddr(1, 0)) { 324 is ("b00".U) { 325 assert(false.B, "should not trigger miss align") 326 } 327 328 is ("b01".U) { 329 lowAddrLoad.uop.fuOpType := LW 330 lowAddrLoad.vaddr := req.vaddr - 1.U 331 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 332 lowResultShift := BYTE1 333 lowResultWidth := BYTE3 334 335 highAddrLoad.uop.fuOpType := LB 336 highAddrLoad.vaddr := req.vaddr + 3.U 337 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 338 highResultShift := BYTE0 339 highResultWidth := BYTE1 340 } 341 342 is ("b10".U) { 343 lowAddrLoad.uop.fuOpType := LH 344 lowAddrLoad.vaddr := req.vaddr 345 lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 346 lowResultShift := BYTE0 347 lowResultWidth := BYTE2 348 349 highAddrLoad.uop.fuOpType := LH 350 highAddrLoad.vaddr := req.vaddr + 2.U 351 highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 352 highResultShift := BYTE0 353 highResultWidth := BYTE2 354 } 355 356 is ("b11".U) { 357 lowAddrLoad.uop.fuOpType := LB 358 lowAddrLoad.vaddr := req.vaddr 359 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 360 lowResultShift := BYTE0 361 lowResultWidth := BYTE1 362 363 highAddrLoad.uop.fuOpType := LW 364 highAddrLoad.vaddr := req.vaddr + 1.U 365 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 366 highResultShift := BYTE0 367 highResultWidth := BYTE3 368 } 369 } 370 } 371 372 is (LD) { 373 switch (req.vaddr(2, 0)) { 374 is ("b000".U) { 375 assert(false.B, "should not trigger miss align") 376 } 377 378 is ("b001".U) { 379 lowAddrLoad.uop.fuOpType := LD 380 lowAddrLoad.vaddr := req.vaddr - 1.U 381 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 382 lowResultShift := BYTE1 383 lowResultWidth := BYTE7 384 385 highAddrLoad.uop.fuOpType := LB 386 highAddrLoad.vaddr := req.vaddr + 7.U 387 highAddrLoad.mask := 0x1.U << highAddrLoad.vaddr(3, 0) 388 highResultShift := BYTE0 389 highResultWidth := BYTE1 390 } 391 392 is ("b010".U) { 393 lowAddrLoad.uop.fuOpType := LD 394 lowAddrLoad.vaddr := req.vaddr - 2.U 395 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 396 lowResultShift := BYTE2 397 lowResultWidth := BYTE6 398 399 highAddrLoad.uop.fuOpType := LH 400 highAddrLoad.vaddr := req.vaddr + 6.U 401 highAddrLoad.mask := 0x3.U << highAddrLoad.vaddr(3, 0) 402 highResultShift := BYTE0 403 highResultWidth := BYTE2 404 } 405 406 is ("b011".U) { 407 lowAddrLoad.uop.fuOpType := LD 408 lowAddrLoad.vaddr := req.vaddr - 3.U 409 lowAddrLoad.mask := 0xff.U << lowAddrLoad.vaddr(3, 0) 410 lowResultShift := BYTE3 411 lowResultWidth := BYTE5 412 413 highAddrLoad.uop.fuOpType := LW 414 highAddrLoad.vaddr := req.vaddr + 5.U 415 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 416 highResultShift := BYTE0 417 highResultWidth := BYTE3 418 } 419 420 is ("b100".U) { 421 lowAddrLoad.uop.fuOpType := LW 422 lowAddrLoad.vaddr := req.vaddr 423 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 424 lowResultShift := BYTE0 425 lowResultWidth := BYTE4 426 427 highAddrLoad.uop.fuOpType := LW 428 highAddrLoad.vaddr := req.vaddr + 4.U 429 highAddrLoad.mask := 0xf.U << highAddrLoad.vaddr(3, 0) 430 highResultShift := BYTE0 431 highResultWidth := BYTE4 432 } 433 434 is ("b101".U) { 435 lowAddrLoad.uop.fuOpType := LW 436 lowAddrLoad.vaddr := req.vaddr - 1.U 437 lowAddrLoad.mask := 0xf.U << lowAddrLoad.vaddr(3, 0) 438 lowResultShift := BYTE1 439 lowResultWidth := BYTE3 440 441 highAddrLoad.uop.fuOpType := LD 442 highAddrLoad.vaddr := req.vaddr + 3.U 443 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 444 highResultShift := BYTE0 445 highResultWidth := BYTE5 446 } 447 448 is ("b110".U) { 449 lowAddrLoad.uop.fuOpType := LH 450 lowAddrLoad.vaddr := req.vaddr 451 lowAddrLoad.mask := 0x3.U << lowAddrLoad.vaddr(3, 0) 452 lowResultShift := BYTE0 453 lowResultWidth := BYTE2 454 455 highAddrLoad.uop.fuOpType := LD 456 highAddrLoad.vaddr := req.vaddr + 2.U 457 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 458 highResultShift := BYTE0 459 highResultWidth := BYTE6 460 } 461 462 is ("b111".U) { 463 lowAddrLoad.uop.fuOpType := LB 464 lowAddrLoad.vaddr := req.vaddr 465 lowAddrLoad.mask := 0x1.U << lowAddrLoad.vaddr(3, 0) 466 lowResultShift := BYTE0 467 lowResultWidth := BYTE1 468 469 highAddrLoad.uop.fuOpType := LD 470 highAddrLoad.vaddr := req.vaddr + 1.U 471 highAddrLoad.mask := 0xff.U << highAddrLoad.vaddr(3, 0) 472 highResultShift := BYTE0 473 highResultWidth := BYTE7 474 } 475 } 476 } 477 } 478 479 splitLoadReqs(0) := lowAddrLoad 480 splitLoadReqs(1) := highAddrLoad 481 } 482 exceptionVec := 0.U.asTypeOf(exceptionVec.cloneType) 483 } 484 485 io.splitLoadReq.valid := req_valid && (bufferState === s_req || bufferState === s_comb_wakeup_rep && needWakeUpReqsWire && !req.isvec) 486 io.splitLoadReq.bits := splitLoadReqs(curPtr) 487 io.splitLoadReq.bits.isvec := req.isvec 488 io.splitLoadReq.bits.misalignNeedWakeUp := needWakeUpReqsWire 489 io.splitLoadReq.bits.isFinalSplit := curPtr(0) && !needWakeUpReqsWire 490 // Restore the information of H extension load 491 // bit encoding: | hlv 1 | hlvx 1 | is unsigned(1bit) | size(2bit) | 492 val reqIsHlv = LSUOpType.isHlv(req.uop.fuOpType) 493 val reqIsHlvx = LSUOpType.isHlvx(req.uop.fuOpType) 494 io.splitLoadReq.bits.uop.fuOpType := Mux(req.isvec, req.uop.fuOpType, Cat(reqIsHlv, reqIsHlvx, 0.U(1.W), splitLoadReqs(curPtr).uop.fuOpType(1, 0))) 495 io.splitLoadReq.bits.alignedType := Mux(req.isvec, splitLoadReqs(curPtr).uop.fuOpType(1, 0), req.alignedType) 496 497 when (io.splitLoadResp.valid) { 498 val resp = io.splitLoadResp.bits 499 splitLoadResp(curPtr) := io.splitLoadResp.bits 500 when (isMMIO) { 501 unSentLoads := 0.U 502 exceptionVec := ExceptionNO.selectByFu(0.U.asTypeOf(exceptionVec.cloneType), LduCfg) 503 // delegate to software 504 exceptionVec(loadAddrMisaligned) := true.B 505 } .elsewhen (hasException) { 506 unSentLoads := 0.U 507 LduCfg.exceptionOut.map(no => exceptionVec(no) := exceptionVec(no) || resp.uop.exceptionVec(no)) 508 } .elsewhen (!io.splitLoadResp.bits.rep_info.need_rep) { 509 unSentLoads := unSentLoads & ~UIntToOH(curPtr) 510 curPtr := curPtr + 1.U 511 exceptionVec := 0.U.asTypeOf(ExceptionVec()) 512 } 513 } 514 515 val combinedData = RegInit(0.U(XLEN.W)) 516 517 when (bufferState === s_comb_wakeup_rep) { 518 val lowAddrResult = getShiftAndTruncateData(lowResultShift, lowResultWidth, splitLoadResp(0).data) 519 .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 520 val highAddrResult = getShiftAndTruncateData(highResultShift, highResultWidth, splitLoadResp(1).data) 521 .asTypeOf(Vec(XLEN / 8, UInt(8.W))) 522 val catResult = Wire(Vec(XLEN / 8, UInt(8.W))) 523 (0 until XLEN / 8) .map { 524 case i => { 525 when (i.U < lowResultWidth) { 526 catResult(i) := lowAddrResult(i) 527 } .otherwise { 528 catResult(i) := highAddrResult(i.U - lowResultWidth) 529 } 530 } 531 } 532 combinedData := Mux(req.isvec, rdataVecHelper(req.alignedType, (catResult.asUInt)(XLEN - 1, 0)), rdataHelper(req.uop, (catResult.asUInt)(XLEN - 1, 0))) 533 534 } 535 536 io.writeBack.valid := req_valid && (bufferState === s_wb) && (io.splitLoadResp.valid && io.splitLoadResp.bits.misalignNeedWakeUp || globalMMIO || globalException) && !io.loadOutValid && !req.isvec 537 io.writeBack.bits.uop := req.uop 538 io.writeBack.bits.uop.exceptionVec := DontCare 539 LduCfg.exceptionOut.map(no => io.writeBack.bits.uop.exceptionVec(no) := (globalMMIO || globalException) && exceptionVec(no)) 540 io.writeBack.bits.uop.rfWen := !globalException && !globalMMIO && req.uop.rfWen 541 io.writeBack.bits.uop.fuType := FuType.ldu.U 542 io.writeBack.bits.uop.flushPipe := false.B 543 io.writeBack.bits.uop.replayInst := false.B 544 io.writeBack.bits.data := newRdataHelper(data_select, combinedData) 545 io.writeBack.bits.isFromLoadUnit := needWakeUpWB 546 io.writeBack.bits.debug.isMMIO := globalMMIO 547 // FIXME lyq: temporarily set to false 548 io.writeBack.bits.debug.isNC := false.B 549 io.writeBack.bits.debug.isPerfCnt := false.B 550 io.writeBack.bits.debug.paddr := req.paddr 551 io.writeBack.bits.debug.vaddr := req.vaddr 552 553 554 // vector output 555 io.vecWriteBack.valid := req_valid && (bufferState === s_wb) && !io.loadVecOutValid && req.isvec 556 557 io.vecWriteBack.bits.alignedType := req.alignedType 558 io.vecWriteBack.bits.vecFeedback := true.B 559 io.vecWriteBack.bits.vecdata.get := combinedData 560 io.vecWriteBack.bits.isvec := req.isvec 561 io.vecWriteBack.bits.elemIdx := req.elemIdx 562 io.vecWriteBack.bits.elemIdxInsideVd.get := req.elemIdxInsideVd 563 io.vecWriteBack.bits.mask := req.mask 564 io.vecWriteBack.bits.reg_offset.get := 0.U 565 io.vecWriteBack.bits.usSecondInv := req.usSecondInv 566 io.vecWriteBack.bits.mBIndex := req.mbIndex 567 io.vecWriteBack.bits.hit := true.B 568 io.vecWriteBack.bits.sourceType := RSFeedbackType.lrqFull 569 io.vecWriteBack.bits.trigger := TriggerAction.None 570 io.vecWriteBack.bits.flushState := DontCare 571 io.vecWriteBack.bits.exceptionVec := ExceptionNO.selectByFu(exceptionVec, VlduCfg) 572 io.vecWriteBack.bits.hasException := globalException 573 io.vecWriteBack.bits.vaddr := req.fullva 574 io.vecWriteBack.bits.vaNeedExt := req.vaNeedExt 575 io.vecWriteBack.bits.gpaddr := req.gpaddr 576 io.vecWriteBack.bits.isForVSnonLeafPTE := req.isForVSnonLeafPTE 577 io.vecWriteBack.bits.mmio := DontCare 578 io.vecWriteBack.bits.vstart := req.uop.vpu.vstart 579 io.vecWriteBack.bits.vecTriggerMask := req.vecTriggerMask 580 io.vecWriteBack.bits.nc := false.B 581 582 583 val flush = req_valid && req.uop.robIdx.needFlush(io.redirect) 584 585 when (flush) { 586 bufferState := s_idle 587 req_valid := false.B 588 curPtr := 0.U 589 unSentLoads := 0.U 590 globalException := false.B 591 globalMMIO := false.B 592 } 593 594 // NOTE: spectial case (unaligned load cross page, page fault happens in next page) 595 // if exception happens in the higher page address part, overwrite the loadExceptionBuffer vaddr 596 val shouldOverwrite = req_valid && globalException 597 val overwriteExpBuf = GatedValidRegNext(shouldOverwrite) 598 val overwriteVaddr = RegEnable( 599 Mux( 600 cross16BytesBoundary && (curPtr === 1.U), 601 splitLoadResp(curPtr).vaddr, 602 splitLoadResp(curPtr).fullva), 603 shouldOverwrite) 604 val overwriteGpaddr = RegEnable(splitLoadResp(curPtr).gpaddr, shouldOverwrite) 605 val overwriteIsHyper = RegEnable(splitLoadResp(curPtr).isHyper, shouldOverwrite) 606 val overwriteIsForVSnonLeafPTE = RegEnable(splitLoadResp(curPtr).isForVSnonLeafPTE, shouldOverwrite) 607 608 //TODO In theory, there is no need to overwrite, but for now, the signal is retained in the code in this way. 609 // and the signal will be removed after sufficient verification. 610 io.overwriteExpBuf.valid := false.B 611 io.overwriteExpBuf.vaddr := overwriteVaddr 612 io.overwriteExpBuf.isHyper := overwriteIsHyper 613 io.overwriteExpBuf.gpaddr := overwriteGpaddr 614 io.overwriteExpBuf.isForVSnonLeafPTE := overwriteIsForVSnonLeafPTE 615 616 // when no exception or mmio, flush loadExceptionBuffer at s_wb 617 val flushLdExpBuff = GatedValidRegNext(req_valid && (bufferState === s_wb) && !(globalMMIO || globalException)) 618 io.flushLdExpBuff := flushLdExpBuff 619 620 XSPerfAccumulate("alloc", RegNext(!req_valid) && req_valid) 621 XSPerfAccumulate("flush", flush) 622 XSPerfAccumulate("flush_idle", flush && (bufferState === s_idle)) 623 XSPerfAccumulate("flush_non_idle", flush && (bufferState =/= s_idle)) 624}