1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend._ 26import xiangshan.backend.fu.fpu._ 27import xiangshan.backend.rob.RobLsqIO 28import xiangshan.cache._ 29import xiangshan.frontend.FtqPtr 30import xiangshan.ExceptionNO._ 31import xiangshan.mem.mdp._ 32import xiangshan.backend.Bundles.{DynInst, MemExuOutput, MemMicroOpRbExt} 33import xiangshan.backend.rob.RobPtr 34 35class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr]( 36 p => p(XSCoreParamsKey).VirtualLoadQueueSize 37){ 38} 39 40object LqPtr { 41 def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = { 42 val ptr = Wire(new LqPtr) 43 ptr.flag := f 44 ptr.value := v 45 ptr 46 } 47} 48 49trait HasLoadHelper { this: XSModule => 50 def rdataHelper(uop: DynInst, rdata: UInt): UInt = { 51 val fpWen = uop.fpWen 52 LookupTree(uop.fuOpType, List( 53 LSUOpType.lb -> SignExt(rdata(7, 0) , XLEN), 54 LSUOpType.lh -> SignExt(rdata(15, 0), XLEN), 55 /* 56 riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values 57 Any operation that writes a narrower result to an f register must write 58 all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value. 59 */ 60 LSUOpType.lw -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)), 61 LSUOpType.ld -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)), 62 LSUOpType.lbu -> ZeroExt(rdata(7, 0) , XLEN), 63 LSUOpType.lhu -> ZeroExt(rdata(15, 0), XLEN), 64 LSUOpType.lwu -> ZeroExt(rdata(31, 0), XLEN), 65 )) 66 } 67 68 def rdataVecHelper(alignedType: UInt, rdata: UInt): UInt = { 69 LookupTree(alignedType, List( 70 "b00".U -> ZeroExt(rdata(7, 0), VLEN), 71 "b01".U -> ZeroExt(rdata(15, 0), VLEN), 72 "b10".U -> ZeroExt(rdata(31, 0), VLEN), 73 "b11".U -> ZeroExt(rdata(63, 0), VLEN) 74 )) 75 } 76} 77 78class LqEnqIO(implicit p: Parameters) extends MemBlockBundle { 79 val canAccept = Output(Bool()) 80 val sqCanAccept = Input(Bool()) 81 val needAlloc = Vec(LSQEnqWidth, Input(Bool())) 82 val req = Vec(LSQEnqWidth, Flipped(ValidIO(new DynInst))) 83 val resp = Vec(LSQEnqWidth, Output(new LqPtr)) 84} 85 86class LqTriggerIO(implicit p: Parameters) extends XSBundle { 87 val hitLoadAddrTriggerHitVec = Input(Vec(TriggerNum, Bool())) 88 val lqLoadAddrTriggerHitVec = Output(Vec(TriggerNum, Bool())) 89} 90 91class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle { 92 val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 93 val robHeadTlbReplay = Output(Bool()) 94 val robHeadTlbMiss = Output(Bool()) 95 val robHeadLoadVio = Output(Bool()) 96 val robHeadLoadMSHR = Output(Bool()) 97 val robHeadMissInDTlb = Input(Bool()) 98 val robHeadOtherReplay = Output(Bool()) 99} 100 101class LoadQueue(implicit p: Parameters) extends XSModule 102 with HasDCacheParameters 103 with HasCircularQueuePtrHelper 104 with HasLoadHelper 105 with HasPerfEvents 106{ 107 val io = IO(new Bundle() { 108 val redirect = Flipped(Valid(new Redirect)) 109 val enq = new LqEnqIO 110 val ldu = new Bundle() { 111 val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2 112 val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2 113 val ldin = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3 114 } 115 val sta = new Bundle() { 116 val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1 117 val vecStoreAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1 118 } 119 val std = new Bundle() { 120 val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput))) // from store_s0, store data, send to sq from rs 121 } 122 val sq = new Bundle() { 123 val stAddrReadySqPtr = Input(new SqPtr) 124 val stAddrReadyVec = Input(Vec(StoreQueueSize, Bool())) 125 val stDataReadySqPtr = Input(new SqPtr) 126 val stDataReadyVec = Input(Vec(StoreQueueSize, Bool())) 127 val stIssuePtr = Input(new SqPtr) 128 val sqEmpty = Input(Bool()) 129 } 130 val ldout = Vec(LoadPipelineWidth, DecoupledIO(new MemExuOutput)) 131 val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle)) 132 val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle)) 133 val refill = Flipped(ValidIO(new Refill)) 134 val tl_d_channel = Input(new DcacheToLduForwardIO) 135 val release = Flipped(Valid(new Release)) 136 val rollback = Output(Valid(new Redirect)) 137 val rob = Flipped(new RobLsqIO) 138 val uncache = new UncacheWordIO 139 val trigger = Vec(LoadPipelineWidth, new LqTriggerIO) 140 val exceptionAddr = new ExceptionAddrIO 141 val lqFull = Output(Bool()) 142 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 143 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 144 val lq_rep_full = Output(Bool()) 145 val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W))) 146 val l2_hint = Input(Valid(new L2ToL1Hint())) 147 val lqEmpty = Output(Bool()) 148 149 val vecWriteback = Flipped(ValidIO(new MemExuOutput(isVector = true))) 150 val lqDeqPtr = Output(new LqPtr) 151 152 val debugTopDown = new LoadQueueTopDownIO 153 }) 154 155 val loadQueueRAR = Module(new LoadQueueRAR) // read-after-read violation 156 val loadQueueRAW = Module(new LoadQueueRAW) // read-after-write violation 157 val loadQueueReplay = Module(new LoadQueueReplay) // enqueue if need replay 158 val virtualLoadQueue = Module(new VirtualLoadQueue) // control state 159 val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer 160 val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer 161 162 /** 163 * LoadQueueRAR 164 */ 165 loadQueueRAR.io.redirect <> io.redirect 166 loadQueueRAR.io.release <> io.release 167 loadQueueRAR.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 168 for (w <- 0 until LoadPipelineWidth) { 169 loadQueueRAR.io.query(w).req <> io.ldu.ldld_nuke_query(w).req // from load_s1 170 loadQueueRAR.io.query(w).resp <> io.ldu.ldld_nuke_query(w).resp // to load_s2 171 loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3 172 } 173 174 /** 175 * LoadQueueRAW 176 */ 177 loadQueueRAW.io.redirect <> io.redirect 178 loadQueueRAW.io.storeIn <> io.sta.storeAddrIn 179 loadQueueRAW.io.vecStoreIn <> io.sta.vecStoreAddrIn 180 loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 181 loadQueueRAW.io.stIssuePtr <> io.sq.stIssuePtr 182 for (w <- 0 until LoadPipelineWidth) { 183 loadQueueRAW.io.query(w).req <> io.ldu.stld_nuke_query(w).req // from load_s1 184 loadQueueRAW.io.query(w).resp <> io.ldu.stld_nuke_query(w).resp // to load_s2 185 loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3 186 } 187 188 /** 189 * VirtualLoadQueue 190 */ 191 virtualLoadQueue.io.redirect <> io.redirect 192 virtualLoadQueue.io.enq <> io.enq 193 virtualLoadQueue.io.ldin <> io.ldu.ldin // from load_s3 194 virtualLoadQueue.io.lqFull <> io.lqFull 195 virtualLoadQueue.io.lqDeq <> io.lqDeq 196 virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt 197 virtualLoadQueue.io.lqEmpty <> io.lqEmpty 198 virtualLoadQueue.io.vecWriteback <> io.vecWriteback 199 virtualLoadQueue.io.ldWbPtr <> io.lqDeqPtr 200 201 /** 202 * Load queue exception buffer 203 */ 204 exceptionBuffer.io.redirect <> io.redirect 205 for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) { 206 buff.valid := io.ldu.ldin(w).valid // from load_s3 207 buff.bits := io.ldu.ldin(w).bits 208 } 209 io.exceptionAddr <> exceptionBuffer.io.exceptionAddr 210 211 /** 212 * Load uncache buffer 213 */ 214 uncacheBuffer.io.redirect <> io.redirect 215 uncacheBuffer.io.ldout <> io.ldout 216 uncacheBuffer.io.ld_raw_data <> io.ld_raw_data 217 uncacheBuffer.io.rob <> io.rob 218 uncacheBuffer.io.uncache <> io.uncache 219 uncacheBuffer.io.trigger <> io.trigger 220 for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) { 221 buff.valid := io.ldu.ldin(w).valid // from load_s3 222 buff.bits := io.ldu.ldin(w).bits // from load_s3 223 } 224 225 // rollback 226 def selectOldest[T <: Redirect](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 227 assert(valid.length == bits.length) 228 if (valid.length == 0 || valid.length == 1) { 229 (valid, bits) 230 } else if (valid.length == 2) { 231 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 232 for (i <- res.indices) { 233 res(i).valid := valid(i) 234 res(i).bits := bits(i) 235 } 236 val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).robIdx, bits(1).robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1))) 237 (Seq(oldest.valid), Seq(oldest.bits)) 238 } else { 239 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 240 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 241 selectOldest(left._1 ++ right._1, left._2 ++ right._2) 242 } 243 } 244 245 val (rollbackSelV, rollbackSelBits) = selectOldest( 246 Seq(loadQueueRAW.io.rollback.valid, uncacheBuffer.io.rollback.valid), 247 Seq(loadQueueRAW.io.rollback.bits, uncacheBuffer.io.rollback.bits) 248 ) 249 io.rollback.valid := rollbackSelV.head 250 io.rollback.bits := rollbackSelBits.head 251 252 /* <------- DANGEROUS: Don't change sequence here ! -------> */ 253 254 /** 255 * LoadQueueReplay 256 */ 257 loadQueueReplay.io.redirect <> io.redirect 258 loadQueueReplay.io.enq <> io.ldu.ldin // from load_s3 259 loadQueueReplay.io.storeAddrIn <> io.sta.storeAddrIn // from store_s1 260 loadQueueReplay.io.storeDataIn <> io.std.storeDataIn // from store_s0 261 loadQueueReplay.io.replay <> io.replay 262 loadQueueReplay.io.refill <> io.refill 263 loadQueueReplay.io.tl_d_channel <> io.tl_d_channel 264 loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 265 loadQueueReplay.io.stAddrReadyVec <> io.sq.stAddrReadyVec 266 loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr 267 loadQueueReplay.io.stDataReadyVec <> io.sq.stDataReadyVec 268 loadQueueReplay.io.sqEmpty <> io.sq.sqEmpty 269 loadQueueReplay.io.lqFull <> io.lq_rep_full 270 loadQueueReplay.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 271 loadQueueReplay.io.rarFull <> loadQueueRAR.io.lqFull 272 loadQueueReplay.io.rawFull <> loadQueueRAW.io.lqFull 273 loadQueueReplay.io.l2_hint <> io.l2_hint 274 loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl 275 276 loadQueueReplay.io.debugTopDown <> io.debugTopDown 277 278 val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull) 279 XSPerfAccumulate("full_mask_000", full_mask === 0.U) 280 XSPerfAccumulate("full_mask_001", full_mask === 1.U) 281 XSPerfAccumulate("full_mask_010", full_mask === 2.U) 282 XSPerfAccumulate("full_mask_011", full_mask === 3.U) 283 XSPerfAccumulate("full_mask_100", full_mask === 4.U) 284 XSPerfAccumulate("full_mask_101", full_mask === 5.U) 285 XSPerfAccumulate("full_mask_110", full_mask === 6.U) 286 XSPerfAccumulate("full_mask_111", full_mask === 7.U) 287 XSPerfAccumulate("rollback", io.rollback.valid) 288 289 // perf cnt 290 val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++ 291 Seq( 292 ("full_mask_000", full_mask === 0.U), 293 ("full_mask_001", full_mask === 1.U), 294 ("full_mask_010", full_mask === 2.U), 295 ("full_mask_011", full_mask === 3.U), 296 ("full_mask_100", full_mask === 4.U), 297 ("full_mask_101", full_mask === 5.U), 298 ("full_mask_110", full_mask === 6.U), 299 ("full_mask_111", full_mask === 7.U), 300 ("rollback", io.rollback.valid) 301 ) 302 generatePerfEvent() 303 // end 304}