1/*************************************************************************************** 2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4* Copyright (c) 2020-2021 Peng Cheng Laboratory 5* 6* XiangShan is licensed under Mulan PSL v2. 7* You can use this software according to the terms and conditions of the Mulan PSL v2. 8* You may obtain a copy of Mulan PSL v2 at: 9* http://license.coscl.org.cn/MulanPSL2 10* 11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14* 15* See the Mulan PSL v2 for more details. 16***************************************************************************************/ 17package xiangshan.mem 18 19import org.chipsalliance.cde.config._ 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.ExceptionNO._ 26import xiangshan.backend.rob.{RobLsqIO, RobPtr} 27import xiangshan.backend.Bundles.{DynInst, MemExuOutput, UopIdx} 28import xiangshan.backend.fu.FuConfig.LduCfg 29import xiangshan.backend.decode.isa.bitfield.{InstVType, XSInstBitFields} 30import xiangshan.backend.fu.FuType 31import xiangshan.mem.Bundles._ 32import xiangshan.cache._ 33 34class VirtualLoadQueue(implicit p: Parameters) extends XSModule 35 with HasDCacheParameters 36 with HasCircularQueuePtrHelper 37 with HasLoadHelper 38 with HasPerfEvents 39 with HasVLSUParameters { 40 val io = IO(new Bundle() { 41 // control 42 val redirect = Flipped(Valid(new Redirect)) 43 val vecCommit = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO))) 44 // from dispatch 45 val enq = new LqEnqIO 46 // from ldu s3 47 val ldin = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle))) 48 // to LoadQueueReplay and LoadQueueRAR 49 val ldWbPtr = Output(new LqPtr) 50 // global 51 val lqFull = Output(Bool()) 52 val lqEmpty = Output(Bool()) 53 // to dispatch 54 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 55 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 56 // for topdown 57 val noUopsIssued = Input(Bool()) 58 }) 59 60 println("VirtualLoadQueue: size: " + VirtualLoadQueueSize) 61 // VirtualLoadQueue field 62 // +-----------+---------+-------+ 63 // | Allocated | MicroOp | Flags | 64 // +-----------+---------+-------+ 65 // Allocated : entry has been allocated already 66 // MicroOp : inst's microOp 67 // Flags : load flags 68 val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value 69 val robIdx = Reg(Vec(VirtualLoadQueueSize, new RobPtr)) 70 val uopIdx = Reg(Vec(VirtualLoadQueueSize, UopIdx())) 71 val isvec = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load flow 72 val committed = Reg(Vec(VirtualLoadQueueSize, Bool())) 73 74 /** 75 * used for debug 76 */ 77 val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst 78 val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr 79 80 // maintain pointers 81 val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr)))) 82 val enqPtr = enqPtrExt(0).value 83 val deqPtr = Wire(new LqPtr) 84 val deqPtrNext = Wire(new LqPtr) 85 86 /** 87 * update pointer 88 */ 89 val lastCycleRedirect = RegNext(io.redirect) 90 val lastLastCycleRedirect = RegNext(lastCycleRedirect) 91 92 val validCount = distanceBetween(enqPtrExt(0), deqPtr) 93 val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U 94 val canEnqueue = io.enq.req.map(_.valid) 95 val vLoadFlow = io.enq.req.map(_.bits.numLsElem.asTypeOf(UInt(elemIdxBits.W))) 96 val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => { 97 robIdx(i).needFlush(io.redirect) && allocated(i) 98 }))) 99 val lastNeedCancel = GatedValidRegNext(needCancel) 100 val enqCancel = canEnqueue.zip(io.enq.req).map{case (v , x) => 101 v && x.bits.robIdx.needFlush(io.redirect) 102 } 103 val enqCancelNum = enqCancel.zip(vLoadFlow).map{case (v, flow) => 104 Mux(v, flow, 0.U) 105 } 106 val lastEnqCancel = GatedRegNext(enqCancelNum.reduce(_ + _)) 107 val lastCycleCancelCount = PopCount(lastNeedCancel) 108 val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid) 109 110 // update enqueue pointer 111 val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNumItem, index) => Mux(canEnqueue(index), vLoadFlowNumItem, 0.U)} 112 val validVLoadOffset = vLoadFlow.zip(io.enq.needAlloc).map{case (flow, needAllocItem) => Mux(needAllocItem, flow, 0.U)} 113 val validVLoadOffsetRShift = 0.U +: validVLoadOffset.take(validVLoadFlow.length - 1) 114 115 val enqNumber = validVLoadFlow.reduce(_ + _) 116 val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr)) 117 val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr)) 118 when (lastLastCycleRedirect.valid) { 119 // we recover the pointers in the next cycle after redirect 120 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount)) 121 } .otherwise { 122 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber)) 123 } 124 assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U)) 125 126 when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) { 127 enqPtrExtNext := enqPtrExtNextVec 128 } .otherwise { 129 enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U)) 130 } 131 enqPtrExt := enqPtrExtNext 132 133 // update dequeue pointer 134 val DeqPtrMoveStride = CommitWidth 135 require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!") 136 val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U)) 137 val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) && committed(ptr.value) && ptr =/= enqPtrExt(0))) 138 val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value))) 139 // make chisel happy 140 val deqCountMask = Wire(UInt(DeqPtrMoveStride.W)) 141 deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt 142 val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U) 143 val lastCommitCount = GatedRegNext(commitCount) 144 145 // update deqPtr 146 // cycle 1: generate deqPtrNext 147 // cycle 2: update deqPtr 148 val deqPtrUpdateEna = lastCommitCount =/= 0.U 149 deqPtrNext := deqPtr + lastCommitCount 150 deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna) 151 152 io.lqDeq := GatedRegNext(lastCommitCount) 153 io.lqCancelCnt := redirectCancelCount 154 io.ldWbPtr := deqPtr 155 io.lqEmpty := RegNext(validCount === 0.U) 156 157 /** 158 * Enqueue at dispatch 159 * 160 * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth 161 * Dynamic enq based on numLsElem number 162 */ 163 io.enq.canAccept := allowEnqueue 164 val enqLowBound = io.enq.req.map(_.bits.lqIdx) 165 val enqUpBound = io.enq.req.map(x => x.bits.lqIdx + x.bits.numLsElem) 166 val enqCrossLoop = enqLowBound.zip(enqUpBound).map{case (low, up) => low.flag =/= up.flag} 167 168 for(i <- 0 until VirtualLoadQueueSize) { 169 val entryCanEnqSeq = (0 until io.enq.req.length).map { j => 170 val entryHitBound = Mux( 171 enqCrossLoop(j), 172 enqLowBound(j).value <= i.U || i.U < enqUpBound(j).value, 173 enqLowBound(j).value <= i.U && i.U < enqUpBound(j).value 174 ) 175 canEnqueue(j) && !enqCancel(j) && entryHitBound 176 } 177 val entryCanEnq = entryCanEnqSeq.reduce(_ || _) 178 val selectBits = ParallelPriorityMux(entryCanEnqSeq, io.enq.req.map(_.bits)) 179 when (entryCanEnq) { 180 allocated(i) := true.B 181 robIdx(i) := selectBits.robIdx 182 uopIdx(i) := selectBits.uopIdx 183 isvec(i) := FuType.isVLoad(selectBits.fuType) 184 committed(i) := false.B 185 186 debug_mmio(i) := false.B 187 debug_paddr(i) := 0.U 188 } 189 } 190 191 for (i <- 0 until io.enq.req.length) { 192 val lqIdx = enqPtrExt(0) + validVLoadOffsetRShift.take(i + 1).reduce(_ + _) 193 val index = io.enq.req(i).bits.lqIdx 194 XSError(canEnqueue(i) && !enqCancel(i) && (!io.enq.canAccept || !io.enq.sqCanAccept), s"must accept $i\n") 195 XSError(canEnqueue(i) && !enqCancel(i) && index.value =/= lqIdx.value, s"must be the same entry $i\n") 196 io.enq.resp(i) := lqIdx 197 } 198 199 /** 200 * Load commits 201 * 202 * When load commited, mark it as !allocated and move deqPtr forward. 203 */ 204 (0 until DeqPtrMoveStride).map(i => { 205 when (commitCount > i.U) { 206 allocated((deqPtr+i.U).value) := false.B 207 } 208 XSError(commitCount > i.U && !allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n") 209 }) 210 211 // vector commit or replay 212 val vecLdCommittmp = Wire(Vec(VirtualLoadQueueSize, Vec(VecLoadPipelineWidth, Bool()))) 213 val vecLdCommit = Wire(Vec(VirtualLoadQueueSize, Bool())) 214 for (i <- 0 until VirtualLoadQueueSize) { 215 val cmt = io.vecCommit 216 for (j <- 0 until VecLoadPipelineWidth) { 217 vecLdCommittmp(i)(j) := allocated(i) && cmt(j).valid && robIdx(i) === cmt(j).bits.robidx && uopIdx(i) === cmt(j).bits.uopidx 218 } 219 vecLdCommit(i) := vecLdCommittmp(i).reduce(_ || _) 220 221 when (vecLdCommit(i) && isvec(i)) { 222 committed(i) := true.B 223 } 224 } 225 226 // misprediction recovery / exception redirect 227 // invalidate lq term using robIdx 228 for (i <- 0 until VirtualLoadQueueSize) { 229 when (needCancel(i)) { 230 allocated(i) := false.B 231 } 232 } 233 234 XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n") 235 236 /** 237 * Writeback load from load units 238 * 239 * Most load instructions writeback to regfile at the same time. 240 * However, 241 * (1) For ready load instruction (no need replay), it writes back to ROB immediately. 242 */ 243 for(i <- 0 until LoadPipelineWidth) { 244 // most lq status need to be updated immediately after load writeback to lq 245 // flag bits in lq needs to be updated accurately 246 io.ldin(i).ready := true.B 247 val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value 248 249 when (io.ldin(i).valid) { 250 val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR 251 val need_rep = io.ldin(i).bits.rep_info.need_rep 252 val need_valid = io.ldin(i).bits.updateAddrValid 253 254 when (!need_rep && need_valid && !io.ldin(i).bits.isvec) { 255 committed(loadWbIndex) := true.B 256 257 // Debug info 258 debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio 259 debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr 260 } 261 262 XSInfo(!need_rep && need_valid, 263 "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x isvec %x\n", 264 io.ldin(i).bits.uop.lqIdx.asUInt, 265 io.ldin(i).bits.uop.pc, 266 io.ldin(i).bits.vaddr, 267 io.ldin(i).bits.paddr, 268 io.ldin(i).bits.mask, 269 io.ldin(i).bits.forwardData.asUInt, 270 io.ldin(i).bits.forwardMask.asUInt, 271 io.ldin(i).bits.mmio, 272 io.ldin(i).bits.isvec 273 ) 274 } 275 } 276 277 // perf counter 278 QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue) 279 val vecValidVec = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => allocated(i) && isvec(i)))) 280 QueuePerf(VirtualLoadQueueSize, PopCount(vecValidVec), !allowEnqueue) 281 io.lqFull := !allowEnqueue 282 283 def NLoadNotCompleted = 1 284 val validCountReg = RegNext(validCount) 285 val noUopsIssued = io.noUopsIssued 286 val stallLoad = io.noUopsIssued && (validCountReg >= NLoadNotCompleted.U) 287 val memStallAnyLoad = RegNext(stallLoad) 288 289 XSPerfAccumulate("mem_stall_anyload", memStallAnyLoad) 290 291 val perfEvents: Seq[(String, UInt)] = Seq( 292 ("MEMSTALL_ANY_LOAD", memStallAnyLoad), 293 ) 294 generatePerfEvent() 295 296 // debug info 297 XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value) 298 299 def PrintFlag(flag: Bool, name: String): Unit = { 300 XSDebug(false, flag, name) // when(flag) 301 XSDebug(false, !flag, " ") // otherwise 302 } 303 304 for (i <- 0 until VirtualLoadQueueSize) { 305 PrintFlag(allocated(i), "a") 306 PrintFlag(allocated(i) && committed(i), "c") 307 PrintFlag(allocated(i) && isvec(i), "v") 308 XSDebug(false, true.B, "\n") 309 } 310 // end 311} 312