1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16package xiangshan.mem 17 18import chisel3._ 19import chisel3.util._ 20import org.chipsalliance.cde.config._ 21import xiangshan._ 22import xiangshan.backend.rob.{RobLsqIO, RobPtr} 23import xiangshan.ExceptionNO._ 24import xiangshan.cache._ 25import utils._ 26import utility._ 27import xiangshan.backend.Bundles.{DynInst, MemExuOutput} 28import xiangshan.backend.fu.FuConfig.LduCfg 29 30class VirtualLoadQueue(implicit p: Parameters) extends XSModule 31 with HasDCacheParameters 32 with HasCircularQueuePtrHelper 33 with HasLoadHelper 34 with HasPerfEvents 35 with HasVLSUParameters { 36 val io = IO(new Bundle() { 37 // control 38 val redirect = Flipped(Valid(new Redirect)) 39 // from dispatch 40 val enq = new LqEnqIO 41 // from ldu s3 42 val ldin = Vec(LoadPipelineWidth, Flipped(DecoupledIO(new LqWriteBundle))) 43 // to LoadQueueReplay and LoadQueueRAR 44 val ldWbPtr = Output(new LqPtr) 45 // global 46 val lqFull = Output(Bool()) 47 val lqEmpty = Output(Bool()) 48 // to dispatch 49 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 50 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 51 // vector load writeback 52 val vecWriteback = Flipped(ValidIO(new MemExuOutput(isVector = true))) 53 }) 54 55 println("VirtualLoadQueue: size: " + VirtualLoadQueueSize) 56 // VirtualLoadQueue field 57 // +-----------+---------+-------+ 58 // | Allocated | MicroOp | Flags | 59 // +-----------+---------+-------+ 60 // Allocated : entry has been allocated already 61 // MicroOp : inst's microOp 62 // Flags : load flags 63 val allocated = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // The control signals need to explicitly indicate the initial value 64 val uop = Reg(Vec(VirtualLoadQueueSize, new DynInst)) 65 val addrvalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio addr is valid 66 val datavalid = RegInit(VecInit(List.fill(VirtualLoadQueueSize)(false.B))) // non-mmio data is valid 67 68 /** 69 * used for debug 70 */ 71 val debug_mmio = Reg(Vec(VirtualLoadQueueSize, Bool())) // mmio: inst is an mmio inst 72 val debug_paddr = Reg(Vec(VirtualLoadQueueSize, UInt(PAddrBits.W))) // mmio: inst's paddr 73 74 // maintain pointers 75 val enqPtrExt = RegInit(VecInit((0 until io.enq.req.length).map(_.U.asTypeOf(new LqPtr)))) 76 val enqPtr = enqPtrExt(0).value 77 val deqPtr = Wire(new LqPtr) 78 val deqPtrNext = Wire(new LqPtr) 79 80 /** 81 * update pointer 82 */ 83 val lastCycleRedirect = RegNext(io.redirect) 84 val lastLastCycleRedirect = RegNext(lastCycleRedirect) 85 86 val validCount = distanceBetween(enqPtrExt(0), deqPtr) 87 val allowEnqueue = validCount <= (VirtualLoadQueueSize - LSQLdEnqWidth).U 88 val canEnqueue = io.enq.req.map(_.valid) 89 val needCancel = WireInit(VecInit((0 until VirtualLoadQueueSize).map(i => { 90 uop(i).robIdx.needFlush(io.redirect) && allocated(i) 91 }))) 92 val lastNeedCancel = RegNext(needCancel) 93 val enqCancel = io.enq.req.map(_.bits.robIdx.needFlush(io.redirect)) 94 val lastEnqCancel = PopCount(RegNext(VecInit(canEnqueue.zip(enqCancel).map(x => x._1 && x._2)))) 95 val lastCycleCancelCount = PopCount(lastNeedCancel) 96 val redirectCancelCount = RegEnable(lastCycleCancelCount + lastEnqCancel, 0.U, lastCycleRedirect.valid) 97 98 // update enqueue pointer 99 val vLoadFlow = io.enq.req.map(_.bits.numLsElem) 100 val validVLoadFlow = vLoadFlow.zipWithIndex.map{case (vLoadFlowNum_Item, index) => Mux(io.enq.canAccept && io.enq.sqCanAccept && canEnqueue(index), vLoadFlowNum_Item, 0.U)} 101 val validVLoadOffset = 0.U +: vLoadFlow.zip(io.enq.needAlloc) 102 .map{case (flow, needAlloc_Item) => Mux(needAlloc_Item, flow, 0.U)} 103 .slice(0, validVLoadFlow.length - 1) 104 val enqNumber = validVLoadFlow.reduce(_ + _) 105 val enqPtrExtNextVec = Wire(Vec(io.enq.req.length, new LqPtr)) 106 val enqPtrExtNext = Wire(Vec(io.enq.req.length, new LqPtr)) 107 when (lastLastCycleRedirect.valid) { 108 // we recover the pointers in the next cycle after redirect 109 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ - redirectCancelCount)) 110 } .otherwise { 111 enqPtrExtNextVec := VecInit(enqPtrExt.map(_ + enqNumber)) 112 } 113 assert(!(lastCycleRedirect.valid && enqNumber =/= 0.U)) 114 115 when (isAfter(enqPtrExtNextVec(0), deqPtrNext)) { 116 enqPtrExtNext := enqPtrExtNextVec 117 } .otherwise { 118 enqPtrExtNext := VecInit((0 until io.enq.req.length).map(i => deqPtrNext + i.U)) 119 } 120 enqPtrExt := enqPtrExtNext 121 122 // update dequeue pointer 123 val DeqPtrMoveStride = CommitWidth 124 require(DeqPtrMoveStride == CommitWidth, "DeqPtrMoveStride must be equal to CommitWidth!") 125 val deqLookupVec = VecInit((0 until DeqPtrMoveStride).map(deqPtr + _.U)) 126 val deqLookup = VecInit(deqLookupVec.map(ptr => allocated(ptr.value) && datavalid(ptr.value) && addrvalid(ptr.value) && ptr =/= enqPtrExt(0))) 127 val deqInSameRedirectCycle = VecInit(deqLookupVec.map(ptr => needCancel(ptr.value))) 128 // make chisel happy 129 val deqCountMask = Wire(UInt(DeqPtrMoveStride.W)) 130 deqCountMask := deqLookup.asUInt & (~deqInSameRedirectCycle.asUInt).asUInt 131 val commitCount = PopCount(PriorityEncoderOH(~deqCountMask) - 1.U) 132 val lastCommitCount = RegNext(commitCount) 133 134 // update deqPtr 135 // cycle 1: generate deqPtrNext 136 // cycle 2: update deqPtr 137 val deqPtrUpdateEna = lastCommitCount =/= 0.U 138 deqPtrNext := deqPtr + lastCommitCount 139 deqPtr := RegEnable(deqPtrNext, 0.U.asTypeOf(new LqPtr), deqPtrUpdateEna) 140 141 io.lqDeq := RegNext(lastCommitCount) 142 io.lqCancelCnt := redirectCancelCount 143 io.ldWbPtr := deqPtr 144 io.lqEmpty := RegNext(validCount === 0.U) 145 146 /** 147 * Enqueue at dispatch 148 * 149 * Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth 150 */ 151 io.enq.canAccept := allowEnqueue 152 for (i <- 0 until io.enq.req.length) { 153 val offset = PopCount(io.enq.needAlloc.take(i)) 154 val lqIdx = enqPtrExt(0) + validVLoadOffset.take(i + 1).reduce(_ + _) 155// val lqIdx = 0.U.asTypeOf(new LqPtr) 156 val index = io.enq.req(i).bits.lqIdx.value 157 when (canEnqueue(i) && !enqCancel(i)) { 158 allocated(index) := true.B 159 uop(index) := io.enq.req(i).bits 160 uop(index).lqIdx := lqIdx 161 162 // init 163 addrvalid(index) := false.B 164 datavalid(index) := false.B 165 166 debug_mmio(index) := false.B 167 debug_paddr(index) := 0.U 168 169 XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n") 170 XSError(index =/= lqIdx.value, s"must be the same entry $i\n") 171 } 172 io.enq.resp(i) := lqIdx 173 } 174 175 /** 176 * Load commits 177 * 178 * When load commited, mark it as !allocated and move deqPtr forward. 179 */ 180 (0 until DeqPtrMoveStride).map(i => { 181 when (commitCount > i.U) { 182 allocated((deqPtr+i.U).value) := false.B 183 XSError(!allocated((deqPtr+i.U).value), s"why commit invalid entry $i?\n") 184 } 185 }) 186 187 // misprediction recovery / exception redirect 188 // invalidate lq term using robIdx 189 for (i <- 0 until VirtualLoadQueueSize) { 190 when (needCancel(i)) { 191 allocated(i) := false.B 192 } 193 } 194 195 XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n") 196 197 /** 198 * Writeback load from load units 199 * 200 * Most load instructions writeback to regfile at the same time. 201 * However, 202 * (1) For ready load instruction (no need replay), it writes back to ROB immediately. 203 */ 204 for(i <- 0 until LoadPipelineWidth) { 205 // most lq status need to be updated immediately after load writeback to lq 206 // flag bits in lq needs to be updated accurately 207 io.ldin(i).ready := true.B 208 val loadWbIndex = io.ldin(i).bits.uop.lqIdx.value 209 val isvec = io.ldin(i).bits.isvec // vector loads are writebacked from uop queue instead of ldus 210 211 when (io.ldin(i).valid && !isvec) { 212 val hasExceptions = ExceptionNO.selectByFu(io.ldin(i).bits.uop.exceptionVec, LduCfg).asUInt.orR 213 val need_rep = io.ldin(i).bits.rep_info.need_rep 214 215 when (!need_rep) { 216 // update control flag 217 addrvalid(loadWbIndex) := hasExceptions || !io.ldin(i).bits.tlbMiss 218 datavalid(loadWbIndex) := 219 (if (EnableFastForward) { 220 hasExceptions || 221 io.ldin(i).bits.mmio || 222 !io.ldin(i).bits.miss && // dcache miss 223 !io.ldin(i).bits.dcacheRequireReplay // do not writeback if that inst will be resend from rs 224 } else { 225 hasExceptions || 226 io.ldin(i).bits.mmio || 227 !io.ldin(i).bits.miss 228 }) 229 230 // 231 when (io.ldin(i).bits.data_wen_dup(1)) { 232 uop(loadWbIndex) := io.ldin(i).bits.uop 233 } 234 when (io.ldin(i).bits.data_wen_dup(4)) { 235 uop(loadWbIndex).debugInfo := io.ldin(i).bits.uop.debugInfo 236 } 237 uop(loadWbIndex).debugInfo := io.ldin(i).bits.rep_info.debug 238 239 // Debug info 240 debug_mmio(loadWbIndex) := io.ldin(i).bits.mmio 241 debug_paddr(loadWbIndex) := io.ldin(i).bits.paddr 242 243 XSInfo(io.ldin(i).valid, "load hit write to lq idx %d pc 0x%x vaddr %x paddr %x mask %x forwardData %x forwardMask: %x mmio %x\n", 244 io.ldin(i).bits.uop.lqIdx.asUInt, 245 io.ldin(i).bits.uop.pc, 246 io.ldin(i).bits.vaddr, 247 io.ldin(i).bits.paddr, 248 io.ldin(i).bits.mask, 249 io.ldin(i).bits.forwardData.asUInt, 250 io.ldin(i).bits.forwardMask.asUInt, 251 io.ldin(i).bits.mmio 252 ) 253 } 254 } 255 } 256 257 XSError(io.vecWriteback.valid && !allocated(io.vecWriteback.bits.uop.lqIdx.value), 258 "wb lqIdx should be allocated at dispatch stage") 259 when (io.vecWriteback.valid) { 260 val vecWbIndex = io.vecWriteback.bits.uop.lqIdx.value 261 addrvalid(vecWbIndex) := true.B 262 datavalid(vecWbIndex) := true.B 263 } 264 265 // perf counter 266 QueuePerf(VirtualLoadQueueSize, validCount, !allowEnqueue) 267 io.lqFull := !allowEnqueue 268 val perfEvents: Seq[(String, UInt)] = Seq() 269 generatePerfEvent() 270 271 // debug info 272 XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtr.flag, deqPtr.value) 273 274 def PrintFlag(flag: Bool, name: String): Unit = { 275 when(flag) { 276 XSDebug(false, true.B, name) 277 }.otherwise { 278 XSDebug(false, true.B, " ") 279 } 280 } 281 282 for (i <- 0 until VirtualLoadQueueSize) { 283 XSDebug(i + " pc %x pa %x ", uop(i).pc, debug_paddr(i)) 284 PrintFlag(allocated(i), "v") 285 PrintFlag(allocated(i) && datavalid(i), "d") 286 PrintFlag(allocated(i) && addrvalid(i), "a") 287 PrintFlag(allocated(i) && addrvalid(i) && datavalid(i), "w") 288 XSDebug(false, true.B, "\n") 289 } 290 // end 291} 292