1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16package xiangshan.mem 17 18import chisel3._ 19import chisel3.util._ 20import chipsalliance.rocketchip.config._ 21import xiangshan._ 22import xiangshan.backend.rob.{RobLsqIO, RobPtr} 23import xiangshan.ExceptionNO._ 24import xiangshan.cache._ 25import utils._ 26import utility._ 27import xiangshan.backend.Bundles.DynInst 28import xiangshan.backend.fu.FuConfig.LduCfg 29 30class VirtualLoadQueue(implicit p: Parameters) extends XSModule 31 with HasDCacheParameters 32 with HasCircularQueuePtrHelper 33 with HasLoadHelper 34 with HasPerfEvents 35{ 36 val io = IO(new Bundle() { 37 val redirect = Flipped(Valid(new Redirect)) 38 val enq = new LqEnqIO 39 val loadIn = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle))) 40 val ldWbPtr = Output(new LqPtr) 41 val lqFull = Output(Bool()) 42 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 43 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 44 }) 45 46 println("VirtualLoadQueue: size: " + VirtualLoadQueueSize) 47 // VirtualLoadQueue field 48 // +-----------+---------+-------+ 49 // | Allocated | MicroOp | Flags | 50 // +-----------+---------+-------+ 51 // Allocated : entry has been allocated already 52 // MicroOp : inst's microOp 53 // Flags : load flags 54 val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value 55 val uop = Reg(Vec(VirtualLoadQueueSize, new DynInst)) 56 val addrvalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio addr is valid 57 val datavalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio data is valid 58 59 /** 60 * used for debug 61 */ 62 val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst 63 val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr 64 65 // maintain pointers 66 val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr)))) 67 val enqPtr = enqPtrExt(0).value 68 val deqPtr = Wire(new LqPtr) 69 val deqPtrNext = Wire(new LqPtr) 70 71 /** 72 * update pointer 73 */ 74 val lastCycleRedirect = RegNext(io.redirect) 75 val lastLastCycleRedirect = RegNext(lastCycleRedirect) 76 77 val validCount = distanceBetween(enqPtrExt(0), deqPtr) 78 val allowEnqueue = validCount <= (VirtualLoadQueueSize - LoadPipelineWidth).U 79 val canEnqueue = io.enq.req.map(_.valid) 80 val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => { 81 uop(i).robIdx.needFlush(io.redirect) && allocated(i) 82 }))) 83 val lastNeedCancel = RegNext(needCancel) 84 val enqCancel = io.enq.req.map(_.bits.robIdx.needFlush(io.redirect)) 85 val lastEnqCancel = PopCount(RegNext(VecInit(canEnqueue.zip(enqCancel).map(x => x._1 && x._2)))) 86 val lastCycleCancelCount = PopCount(lastNeedCancel) 87 88 // update enqueue pointer 89 val enqCount = Mux(io.enq.canAccept && io.enq.sqCanAccept, PopCount(io.enq.req.map(_.valid)), 0.U) 90 val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr)) 91 val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr)) 92 when (lastCycleRedirect.valid) { 93 // we recover the pointers in the next cycle after redirect 94 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - (lastCycleCancelCount + lastEnqCancel))) 95 }.otherwise { 96 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqCount)) 97 } 98 99 when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) { 100 enqPtrExtNext := enqPtrExtNextVec 101 } .otherwise { 102 enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U)) 103 } 104 enqPtrExt := enqPtrExtNext 105 106 // update dequeue pointer 107 val DeqPtrMoveStride = CommitWidth 108 require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!") 109 val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U)) 110 val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) && datavalid(ptr.value) && addrvalid(ptr.value) && ptr =/= enqPtrExt(0))) 111 val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value))) 112 // make chisel happy 113 val deqCountMask = Wire(UInt(DeqPtrMoveStride.W)) 114 deqCountMask := deqLookup.asUInt & ~deqInSameRedirectCycle.asUInt 115 val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U) 116 val lastCommitCount = RegNext(commitCount) 117 118 // update deqPtr 119 // cycle 1: generate deqPtrNext 120 // cycle 2: update deqPtr 121 val deqPtrUpdateEna = lastCommitCount =/= 0.U 122 deqPtrNext := deqPtr + lastCommitCount 123 deqPtr := RegEnable(next = deqPtrNext, init = 0.U.asTypeOf(new LqPtr), enable = deqPtrUpdateEna) 124 125 io.lqDeq := RegNext(lastCommitCount) 126 io.lqCancelCnt := RegNext(lastCycleCancelCount + lastEnqCancel) 127 io.ldWbPtr := deqPtr 128 129 /** 130 * Enqueue at dispatch 131 * 132 * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth 133 */ 134 io.enq.canAccept := allowEnqueue 135 for (i <- 0 until io.enq.req.length) { 136 val offset = PopCount(io.enq.needAlloc.take(i)) 137 val lqIdx = enqPtrExt(offset) 138 val index = io.enq.req(i).bits.lqIdx.value 139 when (canEnqueue(i) && !enqCancel(i)) { 140 allocated(index) := true.B 141 uop(index) := io.enq.req(i).bits 142 uop(index).lqIdx := lqIdx 143 144 // init 145 addrvalid(index) := false.B 146 datavalid(index) := false.B 147 148 debug_mmio(index) := false.B 149 debug_paddr(index) := 0.U 150 151 XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n") 152 XSError(index =/= lqIdx.value, s"must be the same entry $i\n") 153 } 154 io.enq.resp(i) := lqIdx 155 } 156 157 /** 158 * Load commits 159 * 160 * When load commited, mark it as !allocated and move deqPtr forward. 161 */ 162 (0 until DeqPtrMoveStride).map(i => { 163 when (commitCount > i.U) { 164 allocated((deqPtr+i.U).value) := false.B 165 XSError(!allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n") 166 } 167 }) 168 169 // misprediction recovery / exception redirect 170 // invalidate lq term using robIdx 171 for (i <- 0 until VirtualLoadQueueSize) { 172 when (needCancel(i)) { 173 allocated(i) := false.B 174 } 175 } 176 177 XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n") 178 179 /** 180 * Writeback load from load units 181 * 182 * Most load instructions writeback to regfile at the same time. 183 * However, 184 * (1) For ready load instruction (no need replay), it writes back to ROB immediately. 185 */ 186 for(i <- 0 until LoadPipelineWidth) { 187 // most lq status need to be updated immediately after load writeback to lq 188 // flag bits in lq needs to be updated accurately 189 io.loadIn(i).ready := true.B 190 val loadWbIndex = io.loadIn(i).bits.uop.lqIdx.value 191 192 when (io.loadIn(i).valid) { 193 val hasExceptions = ExceptionNO.selectByFu(io.loadIn(i).bits.uop.exceptionVec, LduCfg).asUInt.orR 194 val needReplay = io.loadIn(i).bits.replayInfo.needReplay() 195 196 when (!needReplay) { 197 // update control flag 198 addrvalid(loadWbIndex) := hasExceptions || !io.loadIn(i).bits.tlbMiss 199 datavalid(loadWbIndex) := 200 (if (EnableFastForward) { 201 hasExceptions || 202 io.loadIn(i).bits.mmio || 203 !io.loadIn(i).bits.miss && // dcache miss 204 !io.loadIn(i).bits.dcacheRequireReplay // do not writeback if that inst will be resend from rs 205 } else { 206 hasExceptions || 207 io.loadIn(i).bits.mmio || 208 !io.loadIn(i).bits.miss 209 }) 210 211 // 212 when (io.loadIn(i).bits.lqDataWenDup(1)) { 213 uop(loadWbIndex) := io.loadIn(i).bits.uop 214 } 215 when (io.loadIn(i).bits.lqDataWenDup(4)) { 216 uop(loadWbIndex).debugInfo := io.loadIn(i).bits.uop.debugInfo 217 } 218 uop(loadWbIndex).debugInfo := io.loadIn(i).bits.replayInfo.debug 219 220 // Debug info 221 debug_mmio(loadWbIndex) := io.loadIn(i).bits.mmio 222 debug_paddr(loadWbIndex) := io.loadIn(i).bits.paddr 223 224 XSInfo(io.loadIn(i).valid, "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x\n", 225 io.loadIn(i).bits.uop.lqIdx.asUInt, 226 io.loadIn(i).bits.uop.pc, 227 io.loadIn(i).bits.vaddr, 228 io.loadIn(i).bits.paddr, 229 io.loadIn(i).bits.mask, 230 io.loadIn(i).bits.forwardData.asUInt, 231 io.loadIn(i).bits.forwardMask.asUInt, 232 io.loadIn(i).bits.mmio 233 ) 234 } 235 } 236 } 237 238 if (env.EnableTopDown) { 239 val stall_loads_bound = WireDefault(0.B) 240 ExcitingUtils.addSink(stall_loads_bound, "stall_loads_bound", ExcitingUtils.Perf) 241 val have_miss_entry = (allocated zip datavalid).map(x => x._1 && !x._2).reduce(_ || _) 242 val l1d_loads_bound = stall_loads_bound && !have_miss_entry 243 ExcitingUtils.addSource(l1d_loads_bound, "l1d_loads_bound", ExcitingUtils.Perf) 244 XSPerfAccumulate("l1d_loads_bound", l1d_loads_bound) 245 val stall_l1d_load_miss = stall_loads_bound && have_miss_entry 246 ExcitingUtils.addSource(stall_l1d_load_miss, "stall_l1d_load_miss", ExcitingUtils.Perf) 247 ExcitingUtils.addSink(WireInit(0.U), "stall_l1d_load_miss", ExcitingUtils.Perf) 248 } 249 250 // perf counter 251 QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue) 252 io.lqFull := !allowEnqueue 253 val perfEvents: Seq[(String, UInt)] = Seq() 254 generatePerfEvent() 255 256 // debug info 257 XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value) 258 259 def PrintFlag(flag: Bool, name: String): Unit = { 260 when(flag) { 261 XSDebug(false, true.B, name) 262 }.otherwise { 263 XSDebug(false, true.B, " ") 264 } 265 } 266 267 for (i <- 0 until VirtualLoadQueueSize) { 268 XSDebug(i + " pc %x pa %x ", uop(i).pc, debug_paddr(i)) 269 PrintFlag(allocated(i), "v") 270 PrintFlag(allocated(i) && datavalid(i), "d") 271 PrintFlag(allocated(i) && addrvalid(i), "a") 272 PrintFlag(allocated(i) && addrvalid(i) && datavalid(i), "w") 273 XSDebug(false, true.B, "\n") 274 } 275 // end 276}