1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16package xiangshan.mem 17 18import chisel3._ 19import chisel3.util._ 20import org.chipsalliance.cde.config._ 21import xiangshan._ 22import xiangshan.backend.rob.{RobLsqIO, RobPtr} 23import xiangshan.ExceptionNO._ 24import xiangshan.cache._ 25import utils._ 26import utility._ 27import xiangshan.backend.Bundles.{DynInst, MemExuOutput} 28import xiangshan.backend.fu.FuConfig.LduCfg 29import xiangshan.backend.decode.isa.bitfield.{InstVType, XSInstBitFields} 30 31class VirtualLoadQueue(implicit p: Parameters) extends XSModule 32 with HasDCacheParameters 33 with HasCircularQueuePtrHelper 34 with HasLoadHelper 35 with HasPerfEvents 36 with HasVLSUParameters { 37 val io = IO(new Bundle() { 38 // control 39 val redirect = Flipped(Valid(new Redirect)) 40 val vecCommit = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO))) 41 // from dispatch 42 val enq = new LqEnqIO 43 // from ldu s3 44 val ldin = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle))) 45 // to LoadQueueReplay and LoadQueueRAR 46 val ldWbPtr = Output(new LqPtr) 47 // global 48 val lqFull = Output(Bool()) 49 val lqEmpty = Output(Bool()) 50 // to dispatch 51 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 52 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 53 }) 54 55 println("VirtualLoadQueue: size: " + VirtualLoadQueueSize) 56 // VirtualLoadQueue field 57 // +-----------+---------+-------+ 58 // | Allocated | MicroOp | Flags | 59 // +-----------+---------+-------+ 60 // Allocated : entry has been allocated already 61 // MicroOp : inst's microOp 62 // Flags : load flags 63 val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value 64 val uop = Reg(Vec(VirtualLoadQueueSize, new DynInst)) 65 val addrvalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio addr is valid 66 val datavalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio data is valid 67 // vector load: inst -> uop (pdest registor) -> flow (once load operation in loadunit) 68 val isvec = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load flow 69 val veccommitted = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // vector load uop has commited 70 71 /** 72 * used for debug 73 */ 74 val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst 75 val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr 76 77 // maintain pointers 78 val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr)))) 79 val enqPtr = enqPtrExt(0).value 80 val deqPtr = Wire(new LqPtr) 81 val deqPtrNext = Wire(new LqPtr) 82 83 /** 84 * update pointer 85 */ 86 val lastCycleRedirect = RegNext(io.redirect) 87 val lastLastCycleRedirect = RegNext(lastCycleRedirect) 88 89 val validCount = distanceBetween(enqPtrExt(0), deqPtr) 90 val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U 91 val canEnqueue = io.enq.req.map(_.valid) 92 val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => { 93 uop(i).robIdx.needFlush(io.redirect) && allocated(i) 94 }))) 95 val lastNeedCancel = GatedValidRegNext(needCancel) 96 val enqCancel = canEnqueue.zip(io.enq.req).map{case (v , x) => 97 v && x.bits.robIdx.needFlush(io.redirect) 98 } 99 val enqCancelNum = enqCancel.zip(io.enq.req).map{case (v, req) => 100 Mux(v, req.bits.numLsElem, 0.U) 101 } 102 val lastEnqCancel = GatedRegNext(enqCancelNum.reduce(_ + _)) 103 val lastCycleCancelCount = PopCount(lastNeedCancel) 104 val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid) 105 106 // update enqueue pointer 107 val vLoadFlow = io.enq.req.map(_.bits.numLsElem) 108 val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNumItem, index) => Mux(io.enq.canAccept && io.enq.sqCanAccept && canEnqueue(index), vLoadFlowNumItem, 0.U)} 109 val validVLoadOffset = vLoadFlow.zip(io.enq.needAlloc).map{case (flow, needAllocItem) => Mux(needAllocItem, flow, 0.U)} 110 val validVLoadOffsetRShift = 0.U +: validVLoadOffset.take(validVLoadFlow.length - 1) 111 112 val enqNumber = validVLoadFlow.reduce(_ + _) 113 val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr)) 114 val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr)) 115 when (lastLastCycleRedirect.valid) { 116 // we recover the pointers in the next cycle after redirect 117 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount)) 118 } .otherwise { 119 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber)) 120 } 121 assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U)) 122 123 when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) { 124 enqPtrExtNext := enqPtrExtNextVec 125 } .otherwise { 126 enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U)) 127 } 128 enqPtrExt := enqPtrExtNext 129 130 // update dequeue pointer 131 val DeqPtrMoveStride = CommitWidth 132 require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!") 133 val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U)) 134 val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) 135 && ((datavalid(ptr.value) && addrvalid(ptr.value) && !isvec(ptr.value)) || (isvec(ptr.value) && veccommitted(ptr.value))) 136 && ptr =/= enqPtrExt(0))) 137 val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value))) 138 // make chisel happy 139 val deqCountMask = Wire(UInt(DeqPtrMoveStride.W)) 140 deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt 141 val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U) 142 val lastCommitCount = GatedRegNext(commitCount) 143 144 // update deqPtr 145 // cycle 1: generate deqPtrNext 146 // cycle 2: update deqPtr 147 val deqPtrUpdateEna = lastCommitCount =/= 0.U 148 deqPtrNext := deqPtr + lastCommitCount 149 deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna) 150 151 io.lqDeq := GatedRegNext(lastCommitCount) 152 io.lqCancelCnt := redirectCancelCount 153 io.ldWbPtr := deqPtr 154 io.lqEmpty := RegNext(validCount === 0.U) 155 156 /** 157 * Enqueue at dispatch 158 * 159 * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth 160 */ 161 io.enq.canAccept := allowEnqueue 162 for (i <- 0 until io.enq.req.length) { 163 val lqIdx = enqPtrExt(0) + validVLoadOffsetRShift.take(i + 1).reduce(_ + _) 164 val index = io.enq.req(i).bits.lqIdx 165 val enqInstr = io.enq.req(i).bits.instr.asTypeOf(new XSInstBitFields) 166 when (canEnqueue(i) && !enqCancel(i)) { 167 for (j <- 0 until VecMemDispatchMaxNumber) { 168 when (j.U < validVLoadOffset(i)) { 169 allocated((index + j.U).value) := true.B 170 uop((index + j.U).value) := io.enq.req(i).bits 171 uop((index + j.U).value).lqIdx := lqIdx + j.U 172 173 // init 174 addrvalid((index + j.U).value) := false.B 175 datavalid((index + j.U).value) := false.B 176 isvec((index + j.U).value) := enqInstr.isVecLoad 177 veccommitted((index + j.U).value) := false.B 178 179 debug_mmio((index + j.U).value) := false.B 180 debug_paddr((index + j.U).value) := 0.U 181 182 XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n") 183 XSError(index.value =/= lqIdx.value, s"must be the same entry $i\n") 184 } 185 } 186 } 187 io.enq.resp(i) := lqIdx 188 } 189 190 /** 191 * Load commits 192 * 193 * When load commited, mark it as !allocated and move deqPtr forward. 194 */ 195 (0 until DeqPtrMoveStride).map(i => { 196 when (commitCount > i.U) { 197 allocated((deqPtr+i.U).value) := false.B 198 XSError(!allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n") 199 } 200 }) 201 202 // vector commit or replay 203 val vecLdCommittmp = Wire(Vec(VirtualLoadQueueSize, Vec(VecLoadPipelineWidth, Bool()))) 204 val vecLdCommit = Wire(Vec(VirtualLoadQueueSize, Bool())) 205 for (i <- 0 until VirtualLoadQueueSize) { 206 val cmt = io.vecCommit 207 for (j <- 0 until VecLoadPipelineWidth) { 208 vecLdCommittmp(i)(j) := allocated(i) && cmt(j).valid && cmt(j).bits.isCommit && uop(i).robIdx === cmt(j).bits.robidx && uop(i).uopIdx === cmt(j).bits.uopidx 209 } 210 vecLdCommit(i) := vecLdCommittmp(i).reduce(_ || _) 211 212 when (vecLdCommit(i)) { 213 veccommitted(i) := true.B 214 } 215 } 216 217 // misprediction recovery / exception redirect 218 // invalidate lq term using robIdx 219 for (i <- 0 until VirtualLoadQueueSize) { 220 when (needCancel(i)) { 221 allocated(i) := false.B 222 } 223 } 224 225 XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n") 226 227 /** 228 * Writeback load from load units 229 * 230 * Most load instructions writeback to regfile at the same time. 231 * However, 232 * (1) For ready load instruction (no need replay), it writes back to ROB immediately. 233 */ 234 for(i <- 0 until LoadPipelineWidth) { 235 // most lq status need to be updated immediately after load writeback to lq 236 // flag bits in lq needs to be updated accurately 237 io.ldin(i).ready := true.B 238 val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value 239 240 when (io.ldin(i).valid) { 241 val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR 242 val need_rep = io.ldin(i).bits.rep_info.need_rep 243 244 when (!need_rep) { 245 // update control flag 246 addrvalid(loadWbIndex) := hasExceptions || !io.ldin(i).bits.tlbMiss 247 datavalid(loadWbIndex) := 248 (if (EnableFastForward) { 249 hasExceptions || 250 io.ldin(i).bits.mmio || 251 !io.ldin(i).bits.miss && // dcache miss 252 !io.ldin(i).bits.dcacheRequireReplay // do not writeback if that inst will be resend from rs 253 } else { 254 hasExceptions || 255 io.ldin(i).bits.mmio || 256 !io.ldin(i).bits.miss 257 }) 258 259 // 260 when (io.ldin(i).bits.data_wen_dup(1)) { 261 uop(loadWbIndex) := io.ldin(i).bits.uop 262 } 263 when (io.ldin(i).bits.data_wen_dup(4)) { 264 uop(loadWbIndex).debugInfo := io.ldin(i).bits.uop.debugInfo 265 } 266 uop(loadWbIndex).debugInfo := io.ldin(i).bits.rep_info.debug 267 268 // Debug info 269 debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio 270 debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr 271 272 when (io.ldin(i).bits.usSecondInv) { 273 uop(loadWbIndex + 1.U).robIdx := uop(loadWbIndex).robIdx 274 uop(loadWbIndex + 1.U).uopIdx := uop(loadWbIndex).uopIdx 275 } 276 277 XSInfo(io.ldin(i).valid, 278 "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x isvec %x vec_secondInv %x\n", 279 io.ldin(i).bits.uop.lqIdx.asUInt, 280 io.ldin(i).bits.uop.pc, 281 io.ldin(i).bits.vaddr, 282 io.ldin(i).bits.paddr, 283 io.ldin(i).bits.mask, 284 io.ldin(i).bits.forwardData.asUInt, 285 io.ldin(i).bits.forwardMask.asUInt, 286 io.ldin(i).bits.mmio, 287 io.ldin(i).bits.isvec, 288 io.ldin(i).bits.usSecondInv 289 ) 290 } 291 } 292 } 293 294 // perf counter 295 QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue) 296 val vecValidVec = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => allocated(i) && isvec(i)))) 297 QueuePerf(VirtualLoadQueueSize, PopCount(vecValidVec), !allowEnqueue) 298 io.lqFull := !allowEnqueue 299 val perfEvents: Seq[(String, UInt)] = Seq() 300 generatePerfEvent() 301 302 // debug info 303 XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value) 304 305 def PrintFlag(flag: Bool, name: String): Unit = { 306 when(flag) { 307 XSDebug(false, true.B, name) 308 }.otherwise { 309 XSDebug(false, true.B, " ") 310 } 311 } 312 313 for (i <- 0 until VirtualLoadQueueSize) { 314 XSDebug(i + " pc %x pa %x ", uop(i).pc, debug_paddr(i)) 315 PrintFlag(allocated(i), "v") 316 PrintFlag(allocated(i) && datavalid(i), "d") 317 PrintFlag(allocated(i) && addrvalid(i), "a") 318 PrintFlag(allocated(i) && addrvalid(i) && datavalid(i), "w") 319 PrintFlag(allocated(i) && isvec(i), "c") 320 XSDebug(false, true.B, "\n") 321 } 322 // end 323} 324