1/*************************************************************************************** 2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC) 3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences 4* Copyright (c) 2020-2021 Peng Cheng Laboratory 5* 6* XiangShan is licensed under Mulan PSL v2. 7* You can use this software according to the terms and conditions of the Mulan PSL v2. 8* You may obtain a copy of Mulan PSL v2 at: 9* http://license.coscl.org.cn/MulanPSL2 10* 11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 14* 15* See the Mulan PSL v2 for more details. 16***************************************************************************************/ 17package xiangshan.mem 18 19import chisel3._ 20import chisel3.util._ 21import org.chipsalliance.cde.config._ 22import xiangshan._ 23import xiangshan.backend.rob.{RobLsqIO, RobPtr} 24import xiangshan.ExceptionNO._ 25import xiangshan.cache._ 26import utils._ 27import utility._ 28import xiangshan.backend.Bundles.{DynInst, MemExuOutput} 29import xiangshan.backend.fu.FuConfig.LduCfg 30import xiangshan.backend.decode.isa.bitfield.{InstVType, XSInstBitFields} 31 32class VirtualLoadQueue(implicit p: Parameters) extends XSModule 33 with HasDCacheParameters 34 with HasCircularQueuePtrHelper 35 with HasLoadHelper 36 with HasPerfEvents 37 with HasVLSUParameters { 38 val io = IO(new Bundle() { 39 // control 40 val redirect = Flipped(Valid(new Redirect)) 41 val vecCommit = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO))) 42 // from dispatch 43 val enq = new LqEnqIO 44 // from ldu s3 45 val ldin = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle))) 46 // to LoadQueueReplay and LoadQueueRAR 47 val ldWbPtr = Output(new LqPtr) 48 // global 49 val lqFull = Output(Bool()) 50 val lqEmpty = Output(Bool()) 51 // to dispatch 52 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 53 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 54 }) 55 56 println("VirtualLoadQueue: size: " + VirtualLoadQueueSize) 57 // VirtualLoadQueue field 58 // +-----------+---------+-------+ 59 // | Allocated | MicroOp | Flags | 60 // +-----------+---------+-------+ 61 // Allocated : entry has been allocated already 62 // MicroOp : inst's microOp 63 // Flags : load flags 64 val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value 65 val uop = Reg(Vec(VirtualLoadQueueSize, new DynInst)) 66 val addrvalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio addr is valid 67 val datavalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio data is valid 68 // vector load: inst -> uop (pdest registor) -> flow (once load operation in loadunit) 69 val isvec = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load flow 70 val veccommitted = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load uop has commited 71 72 /** 73 * used for debug 74 */ 75 val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst 76 val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr 77 78 // maintain pointers 79 val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr)))) 80 val enqPtr = enqPtrExt(0).value 81 val deqPtr = Wire(new LqPtr) 82 val deqPtrNext = Wire(new LqPtr) 83 84 /** 85 * update pointer 86 */ 87 val lastCycleRedirect = RegNext(io.redirect) 88 val lastLastCycleRedirect = RegNext(lastCycleRedirect) 89 90 val validCount = distanceBetween(enqPtrExt(0), deqPtr) 91 val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U 92 val canEnqueue = io.enq.req.map(_.valid) 93 val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => { 94 uop(i).robIdx.needFlush(io.redirect) && allocated(i) 95 }))) 96 val lastNeedCancel = GatedValidRegNext(needCancel) 97 val enqCancel = canEnqueue.zip(io.enq.req).map{case (v , x) => 98 v && x.bits.robIdx.needFlush(io.redirect) 99 } 100 val enqCancelNum = enqCancel.zip(io.enq.req).map{case (v, req) => 101 Mux(v, req.bits.numLsElem, 0.U) 102 } 103 val lastEnqCancel = GatedRegNext(enqCancelNum.reduce(_ + _)) 104 val lastCycleCancelCount = PopCount(lastNeedCancel) 105 val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid) 106 107 // update enqueue pointer 108 val vLoadFlow = io.enq.req.map(_.bits.numLsElem) 109 val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNumItem, index) => Mux(io.enq.canAccept && io.enq.sqCanAccept && canEnqueue(index), vLoadFlowNumItem, 0.U)} 110 val validVLoadOffset = vLoadFlow.zip(io.enq.needAlloc).map{case (flow, needAllocItem) => Mux(needAllocItem, flow, 0.U)} 111 val validVLoadOffsetRShift = 0.U +: validVLoadOffset.take(validVLoadFlow.length - 1) 112 113 val enqNumber = validVLoadFlow.reduce(_ + _) 114 val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr)) 115 val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr)) 116 when (lastLastCycleRedirect.valid) { 117 // we recover the pointers in the next cycle after redirect 118 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount)) 119 } .otherwise { 120 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber)) 121 } 122 assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U)) 123 124 when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) { 125 enqPtrExtNext := enqPtrExtNextVec 126 } .otherwise { 127 enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U)) 128 } 129 enqPtrExt := enqPtrExtNext 130 131 // update dequeue pointer 132 val DeqPtrMoveStride = CommitWidth 133 require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!") 134 val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U)) 135 val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) 136 && ((datavalid(ptr.value) && addrvalid(ptr.value) && !isvec(ptr.value)) || (isvec(ptr.value) && veccommitted(ptr.value))) 137 && ptr =/= enqPtrExt(0))) 138 val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value))) 139 // make chisel happy 140 val deqCountMask = Wire(UInt(DeqPtrMoveStride.W)) 141 deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt 142 val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U) 143 val lastCommitCount = GatedRegNext(commitCount) 144 145 // update deqPtr 146 // cycle 1: generate deqPtrNext 147 // cycle 2: update deqPtr 148 val deqPtrUpdateEna = lastCommitCount =/= 0.U 149 deqPtrNext := deqPtr + lastCommitCount 150 deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna) 151 152 io.lqDeq := GatedRegNext(lastCommitCount) 153 io.lqCancelCnt := redirectCancelCount 154 io.ldWbPtr := deqPtr 155 io.lqEmpty := RegNext(validCount === 0.U) 156 157 /** 158 * Enqueue at dispatch 159 * 160 * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth 161 */ 162 io.enq.canAccept := allowEnqueue 163 for (i <- 0 until io.enq.req.length) { 164 val lqIdx = enqPtrExt(0) + validVLoadOffsetRShift.take(i + 1).reduce(_ + _) 165 val index = io.enq.req(i).bits.lqIdx 166 val enqInstr = io.enq.req(i).bits.instr.asTypeOf(new XSInstBitFields) 167 when (canEnqueue(i) && !enqCancel(i)) { 168 // The maximum 'numLsElem' number that can be emitted per dispatch port is: 169 // 16 2 2 2 2 2. 170 // Therefore, VecMemLSQEnqIteratorNumberSeq = Seq(16, 2, 2, 2, 2, 2) 171 for (j <- 0 until VecMemLSQEnqIteratorNumberSeq(i)) { 172 when (j.U < validVLoadOffset(i)) { 173 allocated((index + j.U).value) := true.B 174 uop((index + j.U).value) := io.enq.req(i).bits 175 uop((index + j.U).value).lqIdx := lqIdx + j.U 176 177 // init 178 addrvalid((index + j.U).value) := false.B 179 datavalid((index + j.U).value) := false.B 180 isvec((index + j.U).value) := enqInstr.isVecLoad 181 veccommitted((index + j.U).value) := false.B 182 183 debug_mmio((index + j.U).value) := false.B 184 debug_paddr((index + j.U).value) := 0.U 185 186 XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n") 187 XSError(index.value =/= lqIdx.value, s"must be the same entry $i\n") 188 } 189 } 190 } 191 io.enq.resp(i) := lqIdx 192 } 193 194 /** 195 * Load commits 196 * 197 * When load commited, mark it as !allocated and move deqPtr forward. 198 */ 199 (0 until DeqPtrMoveStride).map(i => { 200 when (commitCount > i.U) { 201 allocated((deqPtr+i.U).value) := false.B 202 XSError(!allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n") 203 } 204 }) 205 206 // vector commit or replay 207 val vecLdCommittmp = Wire(Vec(VirtualLoadQueueSize, Vec(VecLoadPipelineWidth, Bool()))) 208 val vecLdCommit = Wire(Vec(VirtualLoadQueueSize, Bool())) 209 for (i <- 0 until VirtualLoadQueueSize) { 210 val cmt = io.vecCommit 211 for (j <- 0 until VecLoadPipelineWidth) { 212 vecLdCommittmp(i)(j) := allocated(i) && cmt(j).valid && cmt(j).bits.isCommit && uop(i).robIdx === cmt(j).bits.robidx && uop(i).uopIdx === cmt(j).bits.uopidx 213 } 214 vecLdCommit(i) := vecLdCommittmp(i).reduce(_ || _) 215 216 when (vecLdCommit(i)) { 217 veccommitted(i) := true.B 218 } 219 } 220 221 // misprediction recovery / exception redirect 222 // invalidate lq term using robIdx 223 for (i <- 0 until VirtualLoadQueueSize) { 224 when (needCancel(i)) { 225 allocated(i) := false.B 226 } 227 } 228 229 XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n") 230 231 /** 232 * Writeback load from load units 233 * 234 * Most load instructions writeback to regfile at the same time. 235 * However, 236 * (1) For ready load instruction (no need replay), it writes back to ROB immediately. 237 */ 238 for(i <- 0 until LoadPipelineWidth) { 239 // most lq status need to be updated immediately after load writeback to lq 240 // flag bits in lq needs to be updated accurately 241 io.ldin(i).ready := true.B 242 val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value 243 244 when (io.ldin(i).valid) { 245 val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR 246 val need_rep = io.ldin(i).bits.rep_info.need_rep 247 248 when (!need_rep) { 249 // update control flag 250 addrvalid(loadWbIndex) := hasExceptions || !io.ldin(i).bits.tlbMiss 251 datavalid(loadWbIndex) := 252 (if (EnableFastForward) { 253 hasExceptions || 254 io.ldin(i).bits.mmio || 255 !io.ldin(i).bits.miss && // dcache miss 256 !io.ldin(i).bits.dcacheRequireReplay // do not writeback if that inst will be resend from rs 257 } else { 258 hasExceptions || 259 io.ldin(i).bits.mmio || 260 !io.ldin(i).bits.miss 261 }) 262 263 // 264 when (io.ldin(i).bits.data_wen_dup(1)) { 265 uop(loadWbIndex) := io.ldin(i).bits.uop 266 } 267 when (io.ldin(i).bits.data_wen_dup(4)) { 268 uop(loadWbIndex).debugInfo := io.ldin(i).bits.uop.debugInfo 269 } 270 uop(loadWbIndex).debugInfo := io.ldin(i).bits.rep_info.debug 271 272 // Debug info 273 debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio 274 debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr 275 276 XSInfo(io.ldin(i).valid, 277 "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x isvec %x\n", 278 io.ldin(i).bits.uop.lqIdx.asUInt, 279 io.ldin(i).bits.uop.pc, 280 io.ldin(i).bits.vaddr, 281 io.ldin(i).bits.paddr, 282 io.ldin(i).bits.mask, 283 io.ldin(i).bits.forwardData.asUInt, 284 io.ldin(i).bits.forwardMask.asUInt, 285 io.ldin(i).bits.mmio, 286 io.ldin(i).bits.isvec 287 ) 288 } 289 } 290 } 291 292 // perf counter 293 QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue) 294 val vecValidVec = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => allocated(i) && isvec(i)))) 295 QueuePerf(VirtualLoadQueueSize, PopCount(vecValidVec), !allowEnqueue) 296 io.lqFull := !allowEnqueue 297 val perfEvents: Seq[(String, UInt)] = Seq() 298 generatePerfEvent() 299 300 // debug info 301 XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value) 302 303 def PrintFlag(flag: Bool, name: String): Unit = { 304 when(flag) { 305 XSDebug(false, true.B, name) 306 }.otherwise { 307 XSDebug(false, true.B, " ") 308 } 309 } 310 311 for (i <- 0 until VirtualLoadQueueSize) { 312 XSDebug(s"$i pc %x pa %x ", uop(i).pc, debug_paddr(i)) 313 PrintFlag(allocated(i), "v") 314 PrintFlag(allocated(i) && datavalid(i), "d") 315 PrintFlag(allocated(i) && addrvalid(i), "a") 316 PrintFlag(allocated(i) && addrvalid(i) && datavalid(i), "w") 317 PrintFlag(allocated(i) && isvec(i), "c") 318 XSDebug(false, true.B, "\n") 319 } 320 // end 321} 322