1/*************************************************************************************** 2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4* Copyright (c) 2020-2021 Peng Cheng Laboratory 5* 6* XiangShan is licensed under Mulan PSL v2. 7* You can use this software according to the terms and conditions of the Mulan PSL v2. 8* You may obtain a copy of Mulan PSL v2 at: 9* http://license.coscl.org.cn/MulanPSL2 10* 11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14* 15* See the Mulan PSL v2 for more details. 16***************************************************************************************/ 17package xiangshan.mem 18 19import chisel3._ 20import chisel3.util._ 21import org.chipsalliance.cde.config._ 22import xiangshan._ 23import xiangshan.backend.rob.{RobLsqIO, RobPtr} 24import xiangshan.ExceptionNO._ 25import xiangshan.cache._ 26import utils._ 27import utility._ 28import xiangshan.backend.Bundles.{DynInst, MemExuOutput} 29import xiangshan.backend.fu.FuConfig.LduCfg 30import xiangshan.backend.decode.isa.bitfield.{InstVType, XSInstBitFields} 31import xiangshan.backend.fu.FuType 32 33class VirtualLoadQueue(implicit p: Parameters) extends XSModule 34 with HasDCacheParameters 35 with HasCircularQueuePtrHelper 36 with HasLoadHelper 37 with HasPerfEvents 38 with HasVLSUParameters { 39 val io = IO(new Bundle() { 40 // control 41 val redirect = Flipped(Valid(new Redirect)) 42 val vecCommit = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO))) 43 // from dispatch 44 val enq = new LqEnqIO 45 // from ldu s3 46 val ldin = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle))) 47 // to LoadQueueReplay and LoadQueueRAR 48 val ldWbPtr = Output(new LqPtr) 49 // global 50 val lqFull = Output(Bool()) 51 val lqEmpty = Output(Bool()) 52 // to dispatch 53 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 54 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 55 }) 56 57 println("VirtualLoadQueue: size: " + VirtualLoadQueueSize) 58 // VirtualLoadQueue field 59 // +-----------+---------+-------+ 60 // | Allocated | MicroOp | Flags | 61 // +-----------+---------+-------+ 62 // Allocated : entry has been allocated already 63 // MicroOp : inst's microOp 64 // Flags : load flags 65 val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value 66 val uop = Reg(Vec(VirtualLoadQueueSize, new DynInst)) 67 val addrvalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio addr is valid 68 val datavalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio data is valid 69 // vector load: inst -> uop (pdest registor) -> flow (once load operation in loadunit) 70 val isvec = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load flow 71 val veccommitted = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load uop has commited 72 73 /** 74 * used for debug 75 */ 76 val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst 77 val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr 78 79 // maintain pointers 80 val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr)))) 81 val enqPtr = enqPtrExt(0).value 82 val deqPtr = Wire(new LqPtr) 83 val deqPtrNext = Wire(new LqPtr) 84 85 /** 86 * update pointer 87 */ 88 val lastCycleRedirect = RegNext(io.redirect) 89 val lastLastCycleRedirect = RegNext(lastCycleRedirect) 90 91 val validCount = distanceBetween(enqPtrExt(0), deqPtr) 92 val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U 93 val canEnqueue = io.enq.req.map(_.valid) 94 val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => { 95 uop(i).robIdx.needFlush(io.redirect) && allocated(i) 96 }))) 97 val lastNeedCancel = GatedValidRegNext(needCancel) 98 val enqCancel = canEnqueue.zip(io.enq.req).map{case (v , x) => 99 v && x.bits.robIdx.needFlush(io.redirect) 100 } 101 val enqCancelNum = enqCancel.zip(io.enq.req).map{case (v, req) => 102 Mux(v, req.bits.numLsElem, 0.U) 103 } 104 val lastEnqCancel = GatedRegNext(enqCancelNum.reduce(_ + _)) 105 val lastCycleCancelCount = PopCount(lastNeedCancel) 106 val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid) 107 108 // update enqueue pointer 109 val vLoadFlow = io.enq.req.map(_.bits.numLsElem) 110 val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNumItem, index) => Mux(canEnqueue(index), vLoadFlowNumItem, 0.U)} 111 val validVLoadOffset = vLoadFlow.zip(io.enq.needAlloc).map{case (flow, needAllocItem) => Mux(needAllocItem, flow, 0.U)} 112 val validVLoadOffsetRShift = 0.U +: validVLoadOffset.take(validVLoadFlow.length - 1) 113 114 val enqNumber = validVLoadFlow.reduce(_ + _) 115 val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr)) 116 val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr)) 117 when (lastLastCycleRedirect.valid) { 118 // we recover the pointers in the next cycle after redirect 119 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount)) 120 } .otherwise { 121 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber)) 122 } 123 assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U)) 124 125 when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) { 126 enqPtrExtNext := enqPtrExtNextVec 127 } .otherwise { 128 enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U)) 129 } 130 enqPtrExt := enqPtrExtNext 131 132 // update dequeue pointer 133 val DeqPtrMoveStride = CommitWidth 134 require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!") 135 val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U)) 136 val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) 137 && ((datavalid(ptr.value) && addrvalid(ptr.value) && !isvec(ptr.value)) || (isvec(ptr.value) && veccommitted(ptr.value))) 138 && ptr =/= enqPtrExt(0))) 139 val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value))) 140 // make chisel happy 141 val deqCountMask = Wire(UInt(DeqPtrMoveStride.W)) 142 deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt 143 val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U) 144 val lastCommitCount = GatedRegNext(commitCount) 145 146 // update deqPtr 147 // cycle 1: generate deqPtrNext 148 // cycle 2: update deqPtr 149 val deqPtrUpdateEna = lastCommitCount =/= 0.U 150 deqPtrNext := deqPtr + lastCommitCount 151 deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna) 152 153 io.lqDeq := GatedRegNext(lastCommitCount) 154 io.lqCancelCnt := redirectCancelCount 155 io.ldWbPtr := deqPtr 156 io.lqEmpty := RegNext(validCount === 0.U) 157 158 /** 159 * Enqueue at dispatch 160 * 161 * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth 162 * Dynamic enq based on numLsElem number 163 */ 164 io.enq.canAccept := allowEnqueue 165 val enqLowBound = io.enq.req.map(_.bits.lqIdx) 166 val enqUpBound = io.enq.req.map(x => x.bits.lqIdx + x.bits.numLsElem) 167 val enqCrossLoop = enqLowBound.zip(enqUpBound).map{case (low, up) => low.flag =/= up.flag} 168 169 for(i <- 0 until VirtualLoadQueueSize) { 170 val entryCanEnqSeq = (0 until io.enq.req.length).map { j => 171 val entryHitBound = Mux( 172 enqCrossLoop(j), 173 enqLowBound(j).value <= i.U || i.U < enqUpBound(j).value, 174 enqLowBound(j).value <= i.U && i.U < enqUpBound(j).value 175 ) 176 canEnqueue(j) && !enqCancel(j) && entryHitBound 177 } 178 val entryCanEnq = entryCanEnqSeq.reduce(_ || _) 179 val selectBits = ParallelPriorityMux(entryCanEnqSeq, io.enq.req.map(_.bits)) 180 when (entryCanEnq) { 181 uop(i) := selectBits 182 allocated(i) := true.B 183 datavalid(i) := false.B 184 addrvalid(i) := false.B 185 isvec(i) := FuType.isVLoad(selectBits.fuType) 186 veccommitted(i) := false.B 187 188 debug_mmio(i) := false.B 189 debug_paddr(i) := 0.U 190 } 191 192 } 193 194 for (i <- 0 until io.enq.req.length) { 195 val lqIdx = enqPtrExt(0) + validVLoadOffsetRShift.take(i + 1).reduce(_ + _) 196 val index = io.enq.req(i).bits.lqIdx 197 XSError(canEnqueue(i) && !enqCancel(i) && (!io.enq.canAccept || !io.enq.sqCanAccept), s"must accept $i\n") 198 XSError(canEnqueue(i) && !enqCancel(i) && index.value =/= lqIdx.value, s"must be the same entry $i\n") 199 io.enq.resp(i) := lqIdx 200 } 201 202 /** 203 * Load commits 204 * 205 * When load commited, mark it as !allocated and move deqPtr forward. 206 */ 207 (0 until DeqPtrMoveStride).map(i => { 208 when (commitCount > i.U) { 209 allocated((deqPtr+i.U).value) := false.B 210 } 211 XSError(commitCount > i.U && !allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n") 212 }) 213 214 // vector commit or replay 215 val vecLdCommittmp = Wire(Vec(VirtualLoadQueueSize, Vec(VecLoadPipelineWidth, Bool()))) 216 val vecLdCommit = Wire(Vec(VirtualLoadQueueSize, Bool())) 217 for (i <- 0 until VirtualLoadQueueSize) { 218 val cmt = io.vecCommit 219 for (j <- 0 until VecLoadPipelineWidth) { 220 vecLdCommittmp(i)(j) := allocated(i) && cmt(j).valid && uop(i).robIdx === cmt(j).bits.robidx && uop(i).uopIdx === cmt(j).bits.uopidx 221 } 222 vecLdCommit(i) := vecLdCommittmp(i).reduce(_ || _) 223 224 when (vecLdCommit(i)) { 225 veccommitted(i) := true.B 226 } 227 } 228 229 // misprediction recovery / exception redirect 230 // invalidate lq term using robIdx 231 for (i <- 0 until VirtualLoadQueueSize) { 232 when (needCancel(i)) { 233 allocated(i) := false.B 234 } 235 } 236 237 XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n") 238 239 /** 240 * Writeback load from load units 241 * 242 * Most load instructions writeback to regfile at the same time. 243 * However, 244 * (1) For ready load instruction (no need replay), it writes back to ROB immediately. 245 */ 246 for(i <- 0 until LoadPipelineWidth) { 247 // most lq status need to be updated immediately after load writeback to lq 248 // flag bits in lq needs to be updated accurately 249 io.ldin(i).ready := true.B 250 val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value 251 252 val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR 253 val need_rep = io.ldin(i).bits.rep_info.need_rep 254 val need_valid = io.ldin(i).bits.updateAddrValid 255 when (io.ldin(i).valid) { 256 when (!need_rep && need_valid) { 257 // update control flag 258 addrvalid(loadWbIndex) := hasExceptions || !io.ldin(i).bits.tlbMiss || io.ldin(i).bits.isSWPrefetch 259 datavalid(loadWbIndex) := 260 (if (EnableFastForward) { 261 hasExceptions || 262 io.ldin(i).bits.mmio || 263 !io.ldin(i).bits.miss && // dcache miss 264 !io.ldin(i).bits.dcacheRequireReplay || // do not writeback if that inst will be resend from rs 265 io.ldin(i).bits.isSWPrefetch 266 } else { 267 hasExceptions || 268 io.ldin(i).bits.mmio || 269 !io.ldin(i).bits.miss || 270 io.ldin(i).bits.isSWPrefetch 271 }) 272 273 // 274 when (io.ldin(i).bits.data_wen_dup(1)) { 275 uop(loadWbIndex) := io.ldin(i).bits.uop 276 } 277 when (io.ldin(i).bits.data_wen_dup(4)) { 278 uop(loadWbIndex).debugInfo := io.ldin(i).bits.uop.debugInfo 279 } 280 uop(loadWbIndex).debugInfo := io.ldin(i).bits.rep_info.debug 281 282 // Debug info 283 debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio 284 debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr 285 } 286 } 287 288 XSInfo(io.ldin(i).valid && !need_rep && need_valid, 289 "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x isvec %x\n", 290 io.ldin(i).bits.uop.lqIdx.asUInt, 291 io.ldin(i).bits.uop.pc, 292 io.ldin(i).bits.vaddr, 293 io.ldin(i).bits.paddr, 294 io.ldin(i).bits.mask, 295 io.ldin(i).bits.forwardData.asUInt, 296 io.ldin(i).bits.forwardMask.asUInt, 297 io.ldin(i).bits.mmio, 298 io.ldin(i).bits.isvec 299 ) 300 } 301 302 // perf counter 303 QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue) 304 val vecValidVec = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => allocated(i) && isvec(i)))) 305 QueuePerf(VirtualLoadQueueSize, PopCount(vecValidVec), !allowEnqueue) 306 io.lqFull := !allowEnqueue 307 val perfEvents: Seq[(String, UInt)] = Seq() 308 generatePerfEvent() 309 310 // debug info 311 XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value) 312 313 def PrintFlag(flag: Bool, name: String): Unit = { 314 XSDebug(false, flag, name) // when(flag) 315 XSDebug(false, !flag, " ") // otherwise 316 } 317 318 for (i <- 0 until VirtualLoadQueueSize) { 319 XSDebug(s"$i pc %x pa %x ", uop(i).pc, debug_paddr(i)) 320 PrintFlag(allocated(i), "v") 321 PrintFlag(allocated(i) && datavalid(i), "d") 322 PrintFlag(allocated(i) && addrvalid(i), "a") 323 PrintFlag(allocated(i) && addrvalid(i) && datavalid(i), "w") 324 PrintFlag(allocated(i) && isvec(i), "c") 325 XSDebug(false, true.B, "\n") 326 } 327 // end 328} 329