1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16package xiangshan.mem 17 18import chisel3._ 19import chisel3.util._ 20import chipsalliance.rocketchip.config._ 21import xiangshan._ 22import xiangshan.backend.rob.{RobPtr, RobLsqIO} 23import xiangshan.ExceptionNO._ 24import xiangshan.cache._ 25import utils._ 26import utility._ 27 28class VirtualLoadQueue(implicit p: Parameters) extends XSModule 29 with HasDCacheParameters 30 with HasCircularQueuePtrHelper 31 with HasLoadHelper 32 with HasPerfEvents 33{ 34 val io = IO(new Bundle() { 35 // control 36 val redirect = Flipped(Valid(new Redirect)) 37 // from dispatch 38 val enq = new LqEnqIO 39 // from ldu s3 40 val ldin = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle))) 41 // to LoadQueueReplay and LoadQueueRAR 42 val ldWbPtr = Output(new LqPtr) 43 // global 44 val lqFull = Output(Bool()) 45 val lqEmpty = Output(Bool()) 46 // to dispatch 47 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 48 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 49 }) 50 51 println("VirtualLoadQueue: size: " + VirtualLoadQueueSize) 52 // VirtualLoadQueue field 53 // +-----------+---------+-------+ 54 // | Allocated | MicroOp | Flags | 55 // +-----------+---------+-------+ 56 // Allocated : entry has been allocated already 57 // MicroOp : inst's microOp 58 // Flags : load flags 59 val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value 60 val uop = Reg(Vec(VirtualLoadQueueSize, new MicroOp)) 61 val addrvalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio addr is valid 62 val datavalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio data is valid 63 64 /** 65 * used for debug 66 */ 67 val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst 68 val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr 69 70 // maintain pointers 71 val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr)))) 72 val enqPtr = enqPtrExt(0).value 73 val deqPtr = Wire(new LqPtr) 74 val deqPtrNext = Wire(new LqPtr) 75 76 /** 77 * update pointer 78 */ 79 val lastCycleRedirect = RegNext(io.redirect) 80 val lastLastCycleRedirect = RegNext(lastCycleRedirect) 81 82 val validCount = distanceBetween(enqPtrExt(0), deqPtr) 83 val allowEnqueue = validCount <= (VirtualLoadQueueSize - LoadPipelineWidth).U 84 val canEnqueue = io.enq.req.map(_.valid) 85 val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => { 86 uop(i).robIdx.needFlush(io.redirect) && allocated(i) 87 }))) 88 val lastNeedCancel = RegNext(needCancel) 89 val enqCancel = io.enq.req.map(_.bits.robIdx.needFlush(io.redirect)) 90 val lastEnqCancel = PopCount(RegNext(VecInit(canEnqueue.zip(enqCancel).map(x => x._1 && x._2)))) 91 val lastCycleCancelCount = PopCount(lastNeedCancel) 92 val redirectCancelCount = RegEnable(next = lastCycleCancelCount + lastEnqCancel, init = 0.U, enable = lastCycleRedirect.valid) 93 94 // update enqueue pointer 95 val enqNumber = Mux(io.enq.canAccept && io.enq.sqCanAccept, PopCount(io.enq.req.map(_.valid)), 0.U) 96 val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr)) 97 val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr)) 98 when (lastLastCycleRedirect.valid) { 99 // we recover the pointers in the next cycle after redirect 100 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount)) 101 } .otherwise { 102 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber)) 103 } 104 assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U)) 105 106 when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) { 107 enqPtrExtNext := enqPtrExtNextVec 108 } .otherwise { 109 enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U)) 110 } 111 enqPtrExt := enqPtrExtNext 112 113 // update dequeue pointer 114 val DeqPtrMoveStride = CommitWidth 115 require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!") 116 val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U)) 117 val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) && datavalid(ptr.value) && addrvalid(ptr.value) && ptr =/= enqPtrExt(0))) 118 val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value))) 119 // make chisel happy 120 val deqCountMask = Wire(UInt(DeqPtrMoveStride.W)) 121 deqCountMask := deqLookup.asUInt & ~deqInSameRedirectCycle.asUInt 122 val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U) 123 val lastCommitCount = RegNext(commitCount) 124 125 // update deqPtr 126 // cycle 1: generate deqPtrNext 127 // cycle 2: update deqPtr 128 val deqPtrUpdateEna = lastCommitCount =/= 0.U 129 deqPtrNext := deqPtr + lastCommitCount 130 deqPtr := RegEnable(next = deqPtrNext, init = 0.U.asTypeOf(new LqPtr), enable = deqPtrUpdateEna) 131 132 io.lqDeq := RegNext(lastCommitCount) 133 io.lqCancelCnt := redirectCancelCount 134 io.ldWbPtr := deqPtr 135 io.lqEmpty := RegNext(validCount === 0.U) 136 137 /** 138 * Enqueue at dispatch 139 * 140 * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth 141 */ 142 io.enq.canAccept := allowEnqueue 143 for (i <- 0 until io.enq.req.length) { 144 val offset = PopCount(io.enq.needAlloc.take(i)) 145 val lqIdx = enqPtrExt(offset) 146 val index = io.enq.req(i).bits.lqIdx.value 147 when (canEnqueue(i) && !enqCancel(i)) { 148 allocated(index) := true.B 149 uop(index) := io.enq.req(i).bits 150 uop(index).lqIdx := lqIdx 151 152 // init 153 addrvalid(index) := false.B 154 datavalid(index) := false.B 155 156 debug_mmio(index) := false.B 157 debug_paddr(index) := 0.U 158 159 XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n") 160 XSError(index =/= lqIdx.value, s"must be the same entry $i\n") 161 } 162 io.enq.resp(i) := lqIdx 163 } 164 165 /** 166 * Load commits 167 * 168 * When load commited, mark it as !allocated and move deqPtr forward. 169 */ 170 (0 until DeqPtrMoveStride).map(i => { 171 when (commitCount > i.U) { 172 allocated((deqPtr+i.U).value) := false.B 173 XSError(!allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n") 174 } 175 }) 176 177 // misprediction recovery / exception redirect 178 // invalidate lq term using robIdx 179 for (i <- 0 until VirtualLoadQueueSize) { 180 when (needCancel(i)) { 181 allocated(i) := false.B 182 } 183 } 184 185 XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n") 186 187 /** 188 * Writeback load from load units 189 * 190 * Most load instructions writeback to regfile at the same time. 191 * However, 192 * (1) For ready load instruction (no need replay), it writes back to ROB immediately. 193 */ 194 for(i <- 0 until LoadPipelineWidth) { 195 // most lq status need to be updated immediately after load writeback to lq 196 // flag bits in lq needs to be updated accurately 197 io.ldin(i).ready := true.B 198 val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value 199 200 when (io.ldin(i).valid) { 201 val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.cf.exceptionVec, lduCfg).asUInt.orR 202 val need_rep = io.ldin(i).bits.rep_info.need_rep 203 204 when (!need_rep) { 205 // update control flag 206 addrvalid(loadWbIndex) := hasExceptions || !io.ldin(i).bits.tlbMiss 207 datavalid(loadWbIndex) := 208 (if (EnableFastForward) { 209 hasExceptions || 210 io.ldin(i).bits.mmio || 211 !io.ldin(i).bits.miss && // dcache miss 212 !io.ldin(i).bits.dcacheRequireReplay // do not writeback if that inst will be resend from rs 213 } else { 214 hasExceptions || 215 io.ldin(i).bits.mmio || 216 !io.ldin(i).bits.miss 217 }) 218 219 // 220 when (io.ldin(i).bits.data_wen_dup(1)) { 221 uop(loadWbIndex).pdest := io.ldin(i).bits.uop.pdest 222 } 223 when (io.ldin(i).bits.data_wen_dup(2)) { 224 uop(loadWbIndex).cf := io.ldin(i).bits.uop.cf 225 } 226 when (io.ldin(i).bits.data_wen_dup(3)) { 227 uop(loadWbIndex).ctrl := io.ldin(i).bits.uop.ctrl 228 } 229 when (io.ldin(i).bits.data_wen_dup(4)) { 230 uop(loadWbIndex).debugInfo := io.ldin(i).bits.uop.debugInfo 231 } 232 uop(loadWbIndex).debugInfo := io.ldin(i).bits.rep_info.debug 233 234 // Debug info 235 debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio 236 debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr 237 238 XSInfo(io.ldin(i).valid, "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x\n", 239 io.ldin(i).bits.uop.lqIdx.asUInt, 240 io.ldin(i).bits.uop.cf.pc, 241 io.ldin(i).bits.vaddr, 242 io.ldin(i).bits.paddr, 243 io.ldin(i).bits.mask, 244 io.ldin(i).bits.forwardData.asUInt, 245 io.ldin(i).bits.forwardMask.asUInt, 246 io.ldin(i).bits.mmio 247 ) 248 } 249 } 250 } 251 252 // perf counter 253 QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue) 254 io.lqFull := !allowEnqueue 255 val perfEvents: Seq[(String, UInt)] = Seq() 256 generatePerfEvent() 257 258 // debug info 259 XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value) 260 261 def PrintFlag(flag: Bool, name: String): Unit = { 262 when(flag) { 263 XSDebug(false, true.B, name) 264 }.otherwise { 265 XSDebug(false, true.B, " ") 266 } 267 } 268 269 for (i <- 0 until VirtualLoadQueueSize) { 270 XSDebug(i + " pc %x pa %x ", uop(i).cf.pc, debug_paddr(i)) 271 PrintFlag(allocated(i), "v") 272 PrintFlag(allocated(i) && datavalid(i), "d") 273 PrintFlag(allocated(i) && addrvalid(i), "a") 274 PrintFlag(allocated(i) && addrvalid(i) && datavalid(i), "w") 275 XSDebug(false, true.B, "\n") 276 } 277 // end 278} 279