1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16package xiangshan.mem 17 18import chisel3._ 19import chisel3.util._ 20import chipsalliance.rocketchip.config._ 21import xiangshan._ 22import xiangshan.backend.rob.{RobLsqIO, RobPtr} 23import xiangshan.backend.fu.FuConfig.LduCfg 24import xiangshan.cache._ 25import xiangshan.backend.fu.fpu.FPU 26import xiangshan.cache._ 27import xiangshan.frontend.FtqPtr 28import xiangshan.ExceptionNO._ 29import xiangshan.cache.dcache.ReplayCarry 30import xiangshan.mem.mdp._ 31import utils._ 32import utility._ 33import xiangshan.backend.Bundles.{DynInst, MemExuOutput} 34 35object LoadReplayCauses { 36 // these causes have priority, lower coding has higher priority. 37 // when load replay happens, load unit will select highest priority 38 // from replay causes vector 39 40 /* 41 * Warning: 42 * ************************************************************ 43 * * Don't change the priority. If the priority is changed, * 44 * * deadlock may occur. If you really need to change or * 45 * * add priority, please ensure that no deadlock will occur. * 46 * ************************************************************ 47 * 48 */ 49 // st-ld violation 50 val waitStore = 0 51 // tlb miss check 52 val tlbMiss = 1 53 // st-ld violation re-execute check 54 val schedError = 2 55 // dcache bank conflict check 56 val bankConflict = 3 57 // store-to-load-forwarding check 58 val forwardFail = 4 59 // dcache replay check 60 val dcacheReplay = 5 61 // dcache miss check 62 val dcacheMiss = 6 63 // RAR queue accept check 64 val rarReject = 7 65 // RAW queue accept check 66 val rawReject = 8 67 // total causes 68 val allCauses = 9 69} 70 71class AgeDetector(numEntries: Int, numEnq: Int, regOut: Boolean = true)(implicit p: Parameters) extends XSModule { 72 val io = IO(new Bundle { 73 // NOTE: deq and enq may come at the same cycle. 74 val enq = Vec(numEnq, Input(UInt(numEntries.W))) 75 val deq = Input(UInt(numEntries.W)) 76 val ready = Input(UInt(numEntries.W)) 77 val out = Output(UInt(numEntries.W)) 78 }) 79 80 // age(i)(j): entry i enters queue before entry j 81 val age = Seq.fill(numEntries)(Seq.fill(numEntries)(RegInit(false.B))) 82 val nextAge = Seq.fill(numEntries)(Seq.fill(numEntries)(Wire(Bool()))) 83 84 // to reduce reg usage, only use upper matrix 85 def get_age(row: Int, col: Int): Bool = if (row <= col) age(row)(col) else !age(col)(row) 86 def get_next_age(row: Int, col: Int): Bool = if (row <= col) nextAge(row)(col) else !nextAge(col)(row) 87 def isFlushed(i: Int): Bool = io.deq(i) 88 def isEnqueued(i: Int, numPorts: Int = -1): Bool = { 89 val takePorts = if (numPorts == -1) io.enq.length else numPorts 90 takePorts match { 91 case 0 => false.B 92 case 1 => io.enq.head(i) && !isFlushed(i) 93 case n => VecInit(io.enq.take(n).map(_(i))).asUInt.orR && !isFlushed(i) 94 } 95 } 96 97 for ((row, i) <- nextAge.zipWithIndex) { 98 val thisValid = get_age(i, i) || isEnqueued(i) 99 for ((elem, j) <- row.zipWithIndex) { 100 when (isFlushed(i)) { 101 // (1) when entry i is flushed or dequeues, set row(i) to false.B 102 elem := false.B 103 }.elsewhen (isFlushed(j)) { 104 // (2) when entry j is flushed or dequeues, set column(j) to validVec 105 elem := thisValid 106 }.elsewhen (isEnqueued(i)) { 107 // (3) when entry i enqueues from port k, 108 // (3.1) if entry j enqueues from previous ports, set to false 109 // (3.2) otherwise, set to true if and only of entry j is invalid 110 // overall: !jEnqFromPreviousPorts && !jIsValid 111 val sel = io.enq.map(_(i)) 112 val result = (0 until numEnq).map(k => isEnqueued(j, k)) 113 // why ParallelMux: sel must be one-hot since enq is one-hot 114 elem := !get_age(j, j) && !ParallelMux(sel, result) 115 }.otherwise { 116 // default: unchanged 117 elem := get_age(i, j) 118 } 119 age(i)(j) := elem 120 } 121 } 122 123 def getOldest(get: (Int, Int) => Bool): UInt = { 124 VecInit((0 until numEntries).map(i => { 125 io.ready(i) & VecInit((0 until numEntries).map(j => if (i != j) !io.ready(j) || get(i, j) else true.B)).asUInt.andR 126 })).asUInt 127 } 128 val best = getOldest(get_age) 129 val nextBest = getOldest(get_next_age) 130 131 io.out := (if (regOut) best else nextBest) 132} 133 134object AgeDetector { 135 def apply(numEntries: Int, enq: Vec[UInt], deq: UInt, ready: UInt)(implicit p: Parameters): Valid[UInt] = { 136 val age = Module(new AgeDetector(numEntries, enq.length, regOut = true)) 137 age.io.enq := enq 138 age.io.deq := deq 139 age.io.ready:= ready 140 val out = Wire(Valid(UInt(deq.getWidth.W))) 141 out.valid := age.io.out.orR 142 out.bits := age.io.out 143 out 144 } 145} 146 147 148class LoadQueueReplay(implicit p: Parameters) extends XSModule 149 with HasDCacheParameters 150 with HasCircularQueuePtrHelper 151 with HasLoadHelper 152 with HasPerfEvents 153{ 154 val io = IO(new Bundle() { 155 val redirect = Flipped(ValidIO(new Redirect)) 156 val enq = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) 157 val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) 158 val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput))) 159 val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle)) 160 val refill = Flipped(ValidIO(new Refill)) 161 val stAddrReadySqPtr = Input(new SqPtr) 162 val stAddrReadyVec = Input(Vec(StoreQueueSize, Bool())) 163 val stDataReadySqPtr = Input(new SqPtr) 164 val stDataReadyVec = Input(Vec(StoreQueueSize, Bool())) 165 val sqEmpty = Input(Bool()) 166 val lqFull = Output(Bool()) 167 val ldWbPtr = Input(new LqPtr) 168 val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W))) 169 val rarFull = Input(Bool()) 170 val rawFull = Input(Bool()) 171 }) 172 173 println("LoadQueueReplay size: " + LoadQueueReplaySize) 174 // LoadQueueReplay field: 175 // +-----------+---------+-------+-------------+--------+ 176 // | Allocated | MicroOp | VAddr | Cause | Flags | 177 // +-----------+---------+-------+-------------+--------+ 178 // Allocated : entry has been allocated already 179 // MicroOp : inst's microOp 180 // VAddr : virtual address 181 // Cause : replay cause 182 // Flags : rar/raw queue allocate flags 183 val allocated = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) // The control signals need to explicitly indicate the initial value 184 val sleep = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) 185 val uop = Reg(Vec(LoadQueueReplaySize, new DynInst)) 186 val vaddrModule = Module(new LqVAddrModule( 187 gen = UInt(VAddrBits.W), 188 numEntries = LoadQueueReplaySize, 189 numRead = LoadPipelineWidth, 190 numWrite = LoadPipelineWidth, 191 numWBank = LoadQueueNWriteBanks, 192 numWDelay = 2, 193 numCamPort = 0)) 194 vaddrModule.io := DontCare 195 val cause = RegInit(VecInit(List.fill(LoadQueueReplaySize)(0.U(LoadReplayCauses.allCauses.W)))) 196 197 // freeliset: store valid entries index. 198 // +---+---+--------------+-----+-----+ 199 // | 0 | 1 | ...... | n-2 | n-1 | 200 // +---+---+--------------+-----+-----+ 201 val freeList = Module(new FreeList( 202 size = LoadQueueReplaySize, 203 allocWidth = LoadPipelineWidth, 204 freeWidth = 4, 205 moduleName = "LoadQueueReplay freelist" 206 )) 207 freeList.io := DontCare 208 /** 209 * used for re-select control 210 */ 211 val credit = RegInit(VecInit(List.fill(LoadQueueReplaySize)(0.U(ReSelectLen.W)))) 212 val selBlocked = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) 213 // Ptrs to control which cycle to choose 214 val blockPtrTlb = RegInit(VecInit(List.fill(LoadQueueReplaySize)(0.U(2.W)))) 215 val blockPtrCache = RegInit(VecInit(List.fill(LoadQueueReplaySize)(0.U(2.W)))) 216 val blockPtrOthers = RegInit(VecInit(List.fill(LoadQueueReplaySize)(0.U(2.W)))) 217 // Specific cycles to block 218 val blockCyclesTlb = Reg(Vec(4, UInt(ReSelectLen.W))) 219 blockCyclesTlb := io.tlbReplayDelayCycleCtrl 220 val blockCyclesCache = RegInit(VecInit(Seq(11.U(ReSelectLen.W), 18.U(ReSelectLen.W), 127.U(ReSelectLen.W), 17.U(ReSelectLen.W)))) 221 val blockCyclesOthers = RegInit(VecInit(Seq(0.U(ReSelectLen.W), 0.U(ReSelectLen.W), 0.U(ReSelectLen.W), 0.U(ReSelectLen.W)))) 222 val blockSqIdx = Reg(Vec(LoadQueueReplaySize, new SqPtr)) 223 // block causes 224 val blockByTlbMiss = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) 225 val blockByForwardFail = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) 226 val blockByWaitStore = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) 227 val blockByCacheMiss = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) 228 val blockByRARReject = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) 229 val blockByRAWReject = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) 230 val blockByOthers = RegInit(VecInit(List.fill(LoadQueueReplaySize)(false.B))) 231 // DCache miss block 232 val missMSHRId = RegInit(VecInit(List.fill(LoadQueueReplaySize)(0.U((log2Up(cfg.nMissEntries).W))))) 233 val trueCacheMissReplay = WireInit(VecInit(cause.map(_(LoadReplayCauses.dcacheMiss)))) 234 val creditUpdate = WireInit(VecInit(List.fill(LoadQueueReplaySize)(0.U(ReSelectLen.W)))) 235 (0 until LoadQueueReplaySize).map(i => { 236 creditUpdate(i) := Mux(credit(i) > 0.U(ReSelectLen.W), credit(i)-1.U(ReSelectLen.W), credit(i)) 237 selBlocked(i) := creditUpdate(i) =/= 0.U(ReSelectLen.W) || credit(i) =/= 0.U(ReSelectLen.W) 238 }) 239 val replayCarryReg = RegInit(VecInit(List.fill(LoadQueueReplaySize)(ReplayCarry(0.U, false.B)))) 240 241 /** 242 * Enqueue 243 */ 244 val canEnqueue = io.enq.map(_.valid) 245 val cancelEnq = io.enq.map(enq => enq.bits.uop.robIdx.needFlush(io.redirect)) 246 val needReplay = io.enq.map(enq => enq.bits.replayInfo.needReplay()) 247 val hasExceptions = io.enq.map(enq => ExceptionNO.selectByFu(enq.bits.uop.exceptionVec, LduCfg).asUInt.orR && !enq.bits.tlbMiss) 248 val loadReplay = io.enq.map(enq => enq.bits.isLoadReplay) 249 val needEnqueue = VecInit((0 until LoadPipelineWidth).map(w => { 250 canEnqueue(w) && !cancelEnq(w) && needReplay(w) && !hasExceptions(w) 251 })) 252 val canFreeVec = VecInit((0 until LoadPipelineWidth).map(w => { 253 canEnqueue(w) && loadReplay(w) && (!needReplay(w) || hasExceptions(w)) 254 })) 255 256 // select LoadPipelineWidth valid index. 257 val lqFull = freeList.io.empty 258 val lqFreeNums = freeList.io.validCount 259 260 // replay logic 261 // release logic generation 262 val storeAddrInSameCycleVec = Wire(Vec(LoadQueueReplaySize, Bool())) 263 val storeDataInSameCycleVec = Wire(Vec(LoadQueueReplaySize, Bool())) 264 val addrNotBlockVec = Wire(Vec(LoadQueueReplaySize, Bool())) 265 val dataNotBlockVec = Wire(Vec(LoadQueueReplaySize, Bool())) 266 val storeAddrValidVec = addrNotBlockVec.asUInt | storeAddrInSameCycleVec.asUInt 267 val storeDataValidVec = dataNotBlockVec.asUInt | storeDataInSameCycleVec.asUInt 268 269 // store data valid check 270 val stAddrReadyVec = io.stAddrReadyVec 271 val stDataReadyVec = io.stDataReadyVec 272 273 for (i <- 0 until LoadQueueReplaySize) { 274 // dequeue 275 // FIXME: store*Ptr is not accurate 276 dataNotBlockVec(i) := !isBefore(io.stDataReadySqPtr, blockSqIdx(i)) || stDataReadyVec(blockSqIdx(i).value) || io.sqEmpty // for better timing 277 addrNotBlockVec(i) := !isBefore(io.stAddrReadySqPtr, blockSqIdx(i)) || stAddrReadyVec(blockSqIdx(i).value) || io.sqEmpty // for better timing 278 279 // store address execute 280 storeAddrInSameCycleVec(i) := VecInit((0 until StorePipelineWidth).map(w => { 281 io.storeAddrIn(w).valid && 282 !io.storeAddrIn(w).bits.miss && 283 blockSqIdx(i) === io.storeAddrIn(w).bits.uop.sqIdx 284 })).asUInt.orR // for better timing 285 286 // store data execute 287 storeDataInSameCycleVec(i) := VecInit((0 until StorePipelineWidth).map(w => { 288 io.storeDataIn(w).valid && 289 blockSqIdx(i) === io.storeDataIn(w).bits.uop.sqIdx 290 })).asUInt.orR // for better timing 291 292 } 293 294 // store addr issue check 295 val stAddrDeqVec = Wire(Vec(LoadQueueReplaySize, Bool())) 296 (0 until LoadQueueReplaySize).map(i => { 297 stAddrDeqVec(i) := allocated(i) && storeAddrValidVec(i) 298 }) 299 300 // store data issue check 301 val stDataDeqVec = Wire(Vec(LoadQueueReplaySize, Bool())) 302 (0 until LoadQueueReplaySize).map(i => { 303 stDataDeqVec(i) := allocated(i) && storeDataValidVec(i) 304 }) 305 306 // update block condition 307 (0 until LoadQueueReplaySize).map(i => { 308 blockByForwardFail(i) := Mux(blockByForwardFail(i) && stDataDeqVec(i), false.B, blockByForwardFail(i)) 309 blockByWaitStore(i) := Mux(blockByWaitStore(i) && stAddrDeqVec(i), false.B, blockByWaitStore(i)) 310 blockByCacheMiss(i) := Mux(blockByCacheMiss(i) && io.refill.valid && io.refill.bits.id === missMSHRId(i), false.B, blockByCacheMiss(i)) 311 312 when (blockByCacheMiss(i) && io.refill.valid && io.refill.bits.id === missMSHRId(i)) { creditUpdate(i) := 0.U } 313 when (blockByCacheMiss(i) && creditUpdate(i) === 0.U) { blockByCacheMiss(i) := false.B } 314 when (blockByRARReject(i) && (!io.rarFull || !isAfter(uop(i).lqIdx, io.ldWbPtr))) { blockByRARReject(i) := false.B } 315 when (blockByRAWReject(i) && (!io.rawFull || !isAfter(uop(i).sqIdx, io.stAddrReadySqPtr))) { blockByRAWReject(i) := false.B } 316 when (blockByTlbMiss(i) && creditUpdate(i) === 0.U) { blockByTlbMiss(i) := false.B } 317 when (blockByOthers(i) && creditUpdate(i) === 0.U) { blockByOthers(i) := false.B } 318 }) 319 320 // Replay is splitted into 3 stages 321 def getRemBits(input: UInt)(rem: Int): UInt = { 322 VecInit((0 until LoadQueueReplaySize / LoadPipelineWidth).map(i => { input(LoadPipelineWidth * i + rem) })).asUInt 323 } 324 325 def getRemSeq(input: Seq[Seq[Bool]])(rem: Int) = { 326 (0 until LoadQueueReplaySize / LoadPipelineWidth).map(i => { input(LoadPipelineWidth * i + rem) }) 327 } 328 329 // stage1: select 2 entries and read their vaddr 330 val s1_oldestSel = Wire(Vec(LoadPipelineWidth, Valid(UInt(log2Up(LoadQueueReplaySize).W)))) 331 val s2_oldestSel = Wire(Vec(LoadPipelineWidth, Valid(UInt(log2Up(LoadQueueReplaySize).W)))) 332 333 // generate mask 334 val needCancel = Wire(Vec(LoadQueueReplaySize, Bool())) 335 // generate enq mask 336 val selectIndexOH = Wire(Vec(LoadPipelineWidth, UInt(LoadQueueReplaySize.W))) 337 val loadEnqFireMask = io.enq.map(x => x.fire && !x.bits.isLoadReplay).zip(selectIndexOH).map(x => Mux(x._1, x._2, 0.U)) 338 val remLoadEnqFireVec = loadEnqFireMask.map(x => VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(x)(rem)))) 339 val remEnqSelVec = Seq.tabulate(LoadPipelineWidth)(w => VecInit(remLoadEnqFireVec.map(x => x(w)))) 340 341 // generate free mask 342 val loadReplayFreeMask = io.enq.map(_.bits).zip(canFreeVec).map(x => Mux(x._2, UIntToOH(x._1.sleepIndex), 0.U)).reduce(_|_) 343 val loadFreeSelMask = VecInit((0 until LoadQueueReplaySize).map(i => { 344 needCancel(i) || loadReplayFreeMask(i) 345 })).asUInt 346 val remFreeSelVec = VecInit(Seq.tabulate(LoadPipelineWidth)(rem => getRemBits(loadFreeSelMask)(rem))) 347 348 // generate cancel mask 349 val loadReplayFireMask = (0 until LoadPipelineWidth).map(w => Mux(io.replay(w).fire, UIntToOH(s2_oldestSel(w).bits), 0.U)).reduce(_|_) 350 val loadCancelSelMask = VecInit((0 until LoadQueueReplaySize).map(i => { 351 needCancel(i) || loadReplayFireMask(i) 352 })).asUInt 353 val remCancelSelVec = VecInit(Seq.tabulate(LoadPipelineWidth)(rem => getRemBits(loadCancelSelMask)(rem))) 354 355 // generate replay mask 356 val loadHigherPriorityReplaySelMask = VecInit((0 until LoadQueueReplaySize).map(i => { 357 val blocked = blockByForwardFail(i) || blockByCacheMiss(i) || blockByTlbMiss(i) 358 allocated(i) && sleep(i) && !blocked && !loadCancelSelMask(i) 359 })).asUInt // use uint instead vec to reduce verilog lines 360 val loadLowerPriorityReplaySelMask = VecInit((0 until LoadQueueReplaySize).map(i => { 361 val blocked = selBlocked(i) || blockByWaitStore(i) || blockByRARReject(i) || blockByRAWReject(i) || blockByOthers(i) 362 allocated(i) && sleep(i) && !blocked && !loadCancelSelMask(i) 363 })).asUInt // use uint instead vec to reduce verilog lines 364 val loadNormalReplaySelMask = loadLowerPriorityReplaySelMask | loadHigherPriorityReplaySelMask 365 val remNormalReplaySelVec = VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(loadNormalReplaySelMask)(rem))) 366 val loadPriorityReplaySelMask = Mux(loadHigherPriorityReplaySelMask.orR, loadHigherPriorityReplaySelMask, loadLowerPriorityReplaySelMask) 367 val remPriorityReplaySelVec = VecInit((0 until LoadPipelineWidth).map(rem => getRemBits(loadPriorityReplaySelMask)(rem))) 368 369 /****************************************************************************************** 370 * WARNING: Make sure that OldestSelectStride must less than or equal stages of load unit.* 371 ****************************************************************************************** 372 */ 373 val OldestSelectStride = 4 374 val oldestPtrExt = (0 until OldestSelectStride).map(i => io.ldWbPtr + i.U) 375 val oldestMatchMaskVec = (0 until LoadQueueReplaySize).map(i => (0 until OldestSelectStride).map(j => loadNormalReplaySelMask(i) && uop(i).lqIdx === oldestPtrExt(j))) 376 val remOldsetMatchMaskVec = (0 until LoadPipelineWidth).map(rem => getRemSeq(oldestMatchMaskVec.map(_.take(1)))(rem)) 377 val remOlderMatchMaskVec = (0 until LoadPipelineWidth).map(rem => getRemSeq(oldestMatchMaskVec.map(_.drop(1)))(rem)) 378 val remOldestSelVec = VecInit(Seq.tabulate(LoadPipelineWidth)(rem => { 379 VecInit((0 until LoadQueueReplaySize / LoadPipelineWidth).map(i => { 380 Mux(VecInit(remOldsetMatchMaskVec(rem).map(_(0))).asUInt.orR, remOldsetMatchMaskVec(rem)(i)(0), remOlderMatchMaskVec(rem)(i).reduce(_|_)) 381 })).asUInt 382 })) 383 384 // select oldest logic 385 s1_oldestSel := VecInit((0 until LoadPipelineWidth).map(rport => { 386 // select enqueue earlest inst 387 val ageOldest = AgeDetector(LoadQueueReplaySize / LoadPipelineWidth, remEnqSelVec(rport), remFreeSelVec(rport), remPriorityReplaySelVec(rport)) 388 assert(!(ageOldest.valid && PopCount(ageOldest.bits) > 1.U), "oldest index must be one-hot!") 389 val ageOldestValid = ageOldest.valid 390 val ageOldestIndex = OHToUInt(ageOldest.bits) 391 392 // select program order oldest 393 val issOldestValid = remOldestSelVec(rport).orR 394 val issOldestIndex = OHToUInt(PriorityEncoderOH(remOldestSelVec(rport))) 395 396 val oldest = Wire(Valid(UInt())) 397 oldest.valid := ageOldest.valid || issOldestValid 398 oldest.bits := Cat(Mux(issOldestValid, issOldestIndex, ageOldestIndex), rport.U(log2Ceil(LoadPipelineWidth).W)) 399 oldest 400 })) 401 402 403 // Replay port reorder 404 class BalanceEntry extends XSBundle { 405 val balance = Bool() 406 val index = UInt(log2Up(LoadQueueReplaySize).W) 407 val port = UInt(log2Up(LoadPipelineWidth).W) 408 } 409 410 def balanceReOrder(sel: Seq[ValidIO[BalanceEntry]]): Seq[ValidIO[BalanceEntry]] = { 411 require(sel.length > 0) 412 val balancePick = ParallelPriorityMux(sel.map(x => (x.valid && x.bits.balance) -> x)) 413 val reorderSel = Wire(Vec(sel.length, ValidIO(new BalanceEntry))) 414 (0 until sel.length).map(i => 415 if (i == 0) { 416 when (balancePick.valid && balancePick.bits.balance) { 417 reorderSel(i) := balancePick 418 } .otherwise { 419 reorderSel(i) := sel(i) 420 } 421 } else { 422 when (balancePick.valid && balancePick.bits.balance && i.U === balancePick.bits.port) { 423 reorderSel(i) := sel(0) 424 } .otherwise { 425 reorderSel(i) := sel(i) 426 } 427 } 428 ) 429 reorderSel 430 } 431 432 // stage2: send replay request to load unit 433 // replay cold down 434 val ColdDownCycles = 16 435 val coldCounter = RegInit(VecInit(List.fill(LoadPipelineWidth)(0.U(log2Up(ColdDownCycles).W)))) 436 val ColdDownThreshold = Wire(UInt(log2Up(ColdDownCycles).W)) 437 ColdDownThreshold := Constantin.createRecord("ColdDownThreshold_"+p(XSCoreParamsKey).HartId.toString(), initValue = 12.U) 438 assert(ColdDownCycles.U > ColdDownThreshold, "ColdDownCycles must great than ColdDownThreshold!") 439 440 def replayCanFire(i: Int) = coldCounter(i) >= 0.U && coldCounter(i) < ColdDownThreshold 441 def coldDownNow(i: Int) = coldCounter(i) >= ColdDownThreshold 442 443 val s1_balanceOldestSelExt = (0 until LoadPipelineWidth).map(i => { 444 val wrapper = Wire(Valid(new BalanceEntry)) 445 wrapper.valid := s1_oldestSel(i).valid 446 wrapper.bits.balance := cause(s1_oldestSel(i).bits)(LoadReplayCauses.bankConflict) 447 wrapper.bits.index := s1_oldestSel(i).bits 448 wrapper.bits.port := i.U 449 wrapper 450 }) 451 val s1_balanceOldestSel = balanceReOrder(s1_balanceOldestSelExt) 452 (0 until LoadPipelineWidth).map(w => { 453 vaddrModule.io.raddr(w) := s1_balanceOldestSel(w).bits.index 454 }) 455 456 for (i <- 0 until LoadPipelineWidth) { 457 val s2_replayIdx = RegNext(s1_balanceOldestSel(i).bits.index) 458 val s2_replayUop = uop(s2_replayIdx) 459 val s2_replayMSHRId = missMSHRId(s2_replayIdx) 460 val s2_replayCauses = cause(s2_replayIdx) 461 val s2_replayCarry = replayCarryReg(s2_replayIdx) 462 val s2_replayCacheMissReplay = trueCacheMissReplay(s2_replayIdx) 463 val cancelReplay = s2_replayUop.robIdx.needFlush(io.redirect) 464 465 val s2_loadCancelSelMask = RegNext(loadCancelSelMask) 466 s2_oldestSel(i).valid := RegNext(s1_balanceOldestSel(i).valid) && !s2_loadCancelSelMask(s2_replayIdx) 467 s2_oldestSel(i).bits := s2_replayIdx 468 469 io.replay(i).valid := s2_oldestSel(i).valid && !cancelReplay && replayCanFire(i) 470 io.replay(i).bits := DontCare 471 io.replay(i).bits.uop := s2_replayUop 472 io.replay(i).bits.vaddr := vaddrModule.io.rdata(i) 473 io.replay(i).bits.isFirstIssue := false.B 474 io.replay(i).bits.isLoadReplay := true.B 475 io.replay(i).bits.replayCarry := s2_replayCarry 476 io.replay(i).bits.mshrid := s2_replayMSHRId 477 io.replay(i).bits.forward_tlDchannel := s2_replayCauses(LoadReplayCauses.dcacheMiss) 478 io.replay(i).bits.sleepIndex := s2_oldestSel(i).bits 479 480 when (io.replay(i).fire) { 481 sleep(s2_oldestSel(i).bits) := false.B 482 assert(allocated(s2_oldestSel(i).bits), s"LoadQueueReplay: why replay an invalid entry ${s2_oldestSel(i).bits} ?\n") 483 } 484 } 485 486 // update cold counter 487 val lastReplay = RegNext(VecInit(io.replay.map(_.fire))) 488 for (i <- 0 until LoadPipelineWidth) { 489 when (lastReplay(i) && io.replay(i).fire) { 490 coldCounter(i) := coldCounter(i) + 1.U 491 } .elsewhen (coldDownNow(i)) { 492 coldCounter(i) := coldCounter(i) + 1.U 493 } .otherwise { 494 coldCounter(i) := 0.U 495 } 496 } 497 498 when(io.refill.valid) { 499 XSDebug("miss resp: paddr:0x%x data %x\n", io.refill.bits.addr, io.refill.bits.data) 500 } 501 502 // LoadQueueReplay deallocate 503 val freeMaskVec = Wire(Vec(LoadQueueReplaySize, Bool())) 504 505 // init 506 freeMaskVec.map(e => e := false.B) 507 508 // Allocate logic 509 val enqValidVec = Wire(Vec(LoadPipelineWidth, Bool())) 510 val enqIndexVec = Wire(Vec(LoadPipelineWidth, UInt())) 511 val enqOffset = Wire(Vec(LoadPipelineWidth, UInt())) 512 513 val newEnqueue = (0 until LoadPipelineWidth).map(i => { 514 needEnqueue(i) && !io.enq(i).bits.isLoadReplay 515 }) 516 517 for ((enq, w) <- io.enq.zipWithIndex) { 518 vaddrModule.io.wen(w) := false.B 519 freeList.io.doAllocate(w) := false.B 520 521 enqOffset(w) := PopCount(newEnqueue.take(w)) 522 freeList.io.allocateReq(w) := newEnqueue(w) 523 524 // Allocated ready 525 enqValidVec(w) := freeList.io.canAllocate(enqOffset(w)) 526 enqIndexVec(w) := Mux(enq.bits.isLoadReplay, enq.bits.sleepIndex, freeList.io.allocateSlot(enqOffset(w))) 527 selectIndexOH(w) := UIntToOH(enqIndexVec(w)) 528 enq.ready := Mux(enq.bits.isLoadReplay, true.B, enqValidVec(w)) 529 530 val enqIndex = enqIndexVec(w) 531 when (needEnqueue(w) && enq.ready) { 532 533 val debug_robIdx = enq.bits.uop.robIdx.asUInt 534 XSError(allocated(enqIndex) && !enq.bits.isLoadReplay, p"LoadQueueReplay: can not accept more load, check: ldu $w, robIdx $debug_robIdx!") 535 XSError(hasExceptions(w), p"LoadQueueReplay: The instruction has exception, it can not be replay, check: ldu $w, robIdx $debug_robIdx!") 536 537 freeList.io.doAllocate(w) := !enq.bits.isLoadReplay 538 539 // Allocate new entry 540 allocated(enqIndex) := true.B 541 sleep(enqIndex) := true.B 542 uop(enqIndex) := enq.bits.uop 543 544 vaddrModule.io.wen(w) := true.B 545 vaddrModule.io.waddr(w) := enqIndex 546 vaddrModule.io.wdata(w) := enq.bits.vaddr 547 548 /** 549 * used for feedback and replay 550 */ 551 // set flags 552 val replayInfo = enq.bits.replayInfo 553 val dataInLastBeat = replayInfo.dataInLastBeat 554 cause(enqIndex) := replayInfo.cause.asUInt 555 556 // update credit 557 val blockCyclesTlbPtr = blockPtrTlb(enqIndex) 558 val blockCyclesCachePtr = blockPtrCache(enqIndex) 559 val blockCyclesOtherPtr = blockPtrOthers(enqIndex) 560 creditUpdate(enqIndex) := Mux(replayInfo.cause(LoadReplayCauses.tlbMiss), blockCyclesTlb(blockCyclesTlbPtr), 561 Mux(replayInfo.cause(LoadReplayCauses.dcacheMiss), blockCyclesCache(blockCyclesCachePtr) + dataInLastBeat, blockCyclesOthers(blockCyclesOtherPtr))) 562 563 // init 564 blockByTlbMiss(enqIndex) := false.B 565 blockByWaitStore(enqIndex) := false.B 566 blockByForwardFail(enqIndex) := false.B 567 blockByCacheMiss(enqIndex) := false.B 568 blockByRARReject(enqIndex) := false.B 569 blockByRAWReject(enqIndex) := false.B 570 blockByOthers(enqIndex) := false.B 571 572 // update block pointer 573 when (replayInfo.cause(LoadReplayCauses.dcacheReplay)) { 574 // normal case: dcache replay 575 blockByOthers(enqIndex) := true.B 576 blockPtrOthers(enqIndex) := Mux(blockPtrOthers(enqIndex) === 3.U(2.W), blockPtrOthers(enqIndex), blockPtrOthers(enqIndex) + 1.U(2.W)) 577 } .elsewhen (replayInfo.cause(LoadReplayCauses.bankConflict) || replayInfo.cause(LoadReplayCauses.schedError)) { 578 // normal case: bank conflict or schedule error 579 // can replay next cycle 580 creditUpdate(enqIndex) := 0.U 581 blockByOthers(enqIndex) := false.B 582 } 583 584 // special case: tlb miss 585 when (replayInfo.cause(LoadReplayCauses.tlbMiss)) { 586 blockByTlbMiss(enqIndex) := true.B 587 blockPtrTlb(enqIndex) := Mux(blockPtrTlb(enqIndex) === 3.U(2.W), blockPtrTlb(enqIndex), blockPtrTlb(enqIndex) + 1.U(2.W)) 588 } 589 590 // special case: dcache miss 591 when (replayInfo.cause(LoadReplayCauses.dcacheMiss)) { 592 blockByCacheMiss(enqIndex) := !replayInfo.canForwardFullData && // dcache miss 593 !(io.refill.valid && io.refill.bits.id === replayInfo.missMSHRId) && // no refill in this cycle 594 creditUpdate(enqIndex) =/= 0.U // credit is not zero 595 blockPtrCache(enqIndex) := Mux(blockPtrCache(enqIndex) === 3.U(2.W), blockPtrCache(enqIndex), blockPtrCache(enqIndex) + 1.U(2.W)) 596 } 597 598 // special case: st-ld violation 599 when (replayInfo.cause(LoadReplayCauses.waitStore)) { 600 blockByWaitStore(enqIndex) := true.B 601 blockSqIdx(enqIndex) := replayInfo.addrInvalidSqIdx 602 blockPtrOthers(enqIndex) := Mux(blockPtrOthers(enqIndex) === 3.U(2.W), blockPtrOthers(enqIndex), blockPtrOthers(enqIndex) + 1.U(2.W)) 603 } 604 605 // special case: data forward fail 606 when (replayInfo.cause(LoadReplayCauses.forwardFail)) { 607 blockByForwardFail(enqIndex) := true.B 608 blockSqIdx(enqIndex) := replayInfo.dataInvalidSqIdx 609 blockPtrOthers(enqIndex) := Mux(blockPtrOthers(enqIndex) === 3.U(2.W), blockPtrOthers(enqIndex), blockPtrOthers(enqIndex) + 1.U(2.W)) 610 } 611 612 // special case: rar reject 613 when (replayInfo.cause(LoadReplayCauses.rarReject)) { 614 blockByRARReject(enqIndex) := true.B 615 blockPtrOthers(enqIndex) := Mux(blockPtrOthers(enqIndex) === 3.U(2.W), blockPtrOthers(enqIndex), blockPtrOthers(enqIndex) + 1.U(2.W)) 616 } 617 618 // special case: raw reject 619 when (replayInfo.cause(LoadReplayCauses.rawReject)) { 620 blockByRAWReject(enqIndex) := true.B 621 blockPtrOthers(enqIndex) := Mux(blockPtrOthers(enqIndex) === 3.U(2.W), blockPtrOthers(enqIndex), blockPtrOthers(enqIndex) + 1.U(2.W)) 622 } 623 624 // 625 replayCarryReg(enqIndex) := replayInfo.replayCarry 626 missMSHRId(enqIndex) := replayInfo.missMSHRId 627 } 628 629 // 630 val sleepIndex = enq.bits.sleepIndex 631 when (enq.valid && enq.bits.isLoadReplay) { 632 when (!needReplay(w) || hasExceptions(w)) { 633 allocated(sleepIndex) := false.B 634 freeMaskVec(sleepIndex) := true.B 635 } .otherwise { 636 sleep(sleepIndex) := true.B 637 } 638 } 639 } 640 641 // misprediction recovery / exception redirect 642 for (i <- 0 until LoadQueueReplaySize) { 643 needCancel(i) := uop(i).robIdx.needFlush(io.redirect) && allocated(i) 644 when (needCancel(i)) { 645 allocated(i) := false.B 646 freeMaskVec(i) := true.B 647 } 648 } 649 650 freeList.io.free := freeMaskVec.asUInt 651 652 io.lqFull := lqFull 653 654 // perf cnt 655 val enqCount = PopCount(io.enq.map(enq => enq.fire && !enq.bits.isLoadReplay)) 656 val deqCount = PopCount(io.replay.map(_.fire)) 657 val deqBlockCount = PopCount(io.replay.map(r => r.valid && !r.ready)) 658 val replayTlbMissCount = PopCount(io.enq.map(enq => enq.fire && !enq.bits.isLoadReplay && enq.bits.replayInfo.cause(LoadReplayCauses.tlbMiss))) 659 val replayWaitStoreCount = PopCount(io.enq.map(enq => enq.fire && !enq.bits.isLoadReplay && enq.bits.replayInfo.cause(LoadReplayCauses.waitStore))) 660 val replaySchedErrorCount = PopCount(io.enq.map(enq => enq.fire && !enq.bits.isLoadReplay && enq.bits.replayInfo.cause(LoadReplayCauses.schedError))) 661 val replayRARRejectCount = PopCount(io.enq.map(enq => enq.fire && !enq.bits.isLoadReplay && enq.bits.replayInfo.cause(LoadReplayCauses.rarReject))) 662 val replayRAWRejectCount = PopCount(io.enq.map(enq => enq.fire && !enq.bits.isLoadReplay && enq.bits.replayInfo.cause(LoadReplayCauses.rawReject))) 663 val replayBankConflictCount = PopCount(io.enq.map(enq => enq.fire && !enq.bits.isLoadReplay && enq.bits.replayInfo.cause(LoadReplayCauses.bankConflict))) 664 val replayDCacheReplayCount = PopCount(io.enq.map(enq => enq.fire && !enq.bits.isLoadReplay && enq.bits.replayInfo.cause(LoadReplayCauses.dcacheReplay))) 665 val replayForwardFailCount = PopCount(io.enq.map(enq => enq.fire && !enq.bits.isLoadReplay && enq.bits.replayInfo.cause(LoadReplayCauses.forwardFail))) 666 val replayDCacheMissCount = PopCount(io.enq.map(enq => enq.fire && !enq.bits.isLoadReplay && enq.bits.replayInfo.cause(LoadReplayCauses.dcacheMiss))) 667 XSPerfAccumulate("enq", enqCount) 668 XSPerfAccumulate("deq", deqCount) 669 XSPerfAccumulate("deq_block", deqBlockCount) 670 XSPerfAccumulate("replay_full", io.lqFull) 671 XSPerfAccumulate("replay_rar_reject", replayRARRejectCount) 672 XSPerfAccumulate("replay_raw_reject", replayRAWRejectCount) 673 XSPerfAccumulate("replay_sched_error", replaySchedErrorCount) 674 XSPerfAccumulate("replay_wait_store", replayWaitStoreCount) 675 XSPerfAccumulate("replay_tlb_miss", replayTlbMissCount) 676 XSPerfAccumulate("replay_bank_conflict", replayBankConflictCount) 677 XSPerfAccumulate("replay_dcache_replay", replayDCacheReplayCount) 678 XSPerfAccumulate("replay_forward_fail", replayForwardFailCount) 679 XSPerfAccumulate("replay_dcache_miss", replayDCacheMissCount) 680 681 val perfEvents: Seq[(String, UInt)] = Seq( 682 ("enq", enqCount), 683 ("deq", deqCount), 684 ("deq_block", deqBlockCount), 685 ("replay_full", io.lqFull), 686 ("replay_rar_reject", replayRARRejectCount), 687 ("replay_raw_reject", replayRAWRejectCount), 688 ("replay_advance_sched", replaySchedErrorCount), 689 ("replay_wait_store", replayWaitStoreCount), 690 ("replay_tlb_miss", replayTlbMissCount), 691 ("replay_bank_conflict", replayBankConflictCount), 692 ("replay_dcache_replay", replayDCacheReplayCount), 693 ("replay_forward_fail", replayForwardFailCount), 694 ("replay_dcache_miss", replayDCacheMissCount), 695 ) 696 generatePerfEvent() 697 // end 698} 699