1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend._ 26import xiangshan.backend.fu.fpu._ 27import xiangshan.backend.rob.RobLsqIO 28import xiangshan.cache._ 29import xiangshan.cache.mmu._ 30import xiangshan.frontend.FtqPtr 31import xiangshan.ExceptionNO._ 32import xiangshan.mem.mdp._ 33import xiangshan.backend.Bundles.{DynInst, MemExuOutput, MemMicroOpRbExt} 34import xiangshan.backend.rob.RobPtr 35 36class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr]( 37 p => p(XSCoreParamsKey).VirtualLoadQueueSize 38){ 39} 40 41object LqPtr { 42 def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = { 43 val ptr = Wire(new LqPtr) 44 ptr.flag := f 45 ptr.value := v 46 ptr 47 } 48} 49 50trait HasLoadHelper { this: XSModule => 51 def rdataHelper(uop: DynInst, rdata: UInt): UInt = { 52 val fpWen = uop.fpWen 53 LookupTree(uop.fuOpType, List( 54 LSUOpType.lb -> SignExt(rdata(7, 0) , XLEN), 55 LSUOpType.lh -> SignExt(rdata(15, 0), XLEN), 56 /* 57 riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values 58 Any operation that writes a narrower result to an f register must write 59 all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value. 60 */ 61 LSUOpType.lw -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)), 62 LSUOpType.ld -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)), 63 LSUOpType.lbu -> ZeroExt(rdata(7, 0) , XLEN), 64 LSUOpType.lhu -> ZeroExt(rdata(15, 0), XLEN), 65 LSUOpType.lwu -> ZeroExt(rdata(31, 0), XLEN), 66 67 // hypervisor 68 LSUOpType.hlvb -> SignExt(rdata(7, 0), XLEN), 69 LSUOpType.hlvh -> SignExt(rdata(15, 0), XLEN), 70 LSUOpType.hlvw -> SignExt(rdata(31, 0), XLEN), 71 LSUOpType.hlvd -> SignExt(rdata(63, 0), XLEN), 72 LSUOpType.hlvbu -> ZeroExt(rdata(7, 0), XLEN), 73 LSUOpType.hlvhu -> ZeroExt(rdata(15, 0), XLEN), 74 LSUOpType.hlvwu -> ZeroExt(rdata(31, 0), XLEN), 75 LSUOpType.hlvxhu -> ZeroExt(rdata(15, 0), XLEN), 76 LSUOpType.hlvxwu -> ZeroExt(rdata(31, 0), XLEN), 77 )) 78 } 79 80 def rdataVecHelper(alignedType: UInt, rdata: UInt): UInt = { 81 LookupTree(alignedType, List( 82 "b00".U -> ZeroExt(rdata(7, 0), VLEN), 83 "b01".U -> ZeroExt(rdata(15, 0), VLEN), 84 "b10".U -> ZeroExt(rdata(31, 0), VLEN), 85 "b11".U -> ZeroExt(rdata(63, 0), VLEN) 86 )) 87 } 88} 89 90class LqEnqIO(implicit p: Parameters) extends MemBlockBundle { 91 val canAccept = Output(Bool()) 92 val sqCanAccept = Input(Bool()) 93 val needAlloc = Vec(LSQEnqWidth, Input(Bool())) 94 val req = Vec(LSQEnqWidth, Flipped(ValidIO(new DynInst))) 95 val resp = Vec(LSQEnqWidth, Output(new LqPtr)) 96} 97 98class LqTriggerIO(implicit p: Parameters) extends XSBundle { 99 val hitLoadAddrTriggerHitVec = Input(Vec(TriggerNum, Bool())) 100 val lqLoadAddrTriggerHitVec = Output(Vec(TriggerNum, Bool())) 101} 102 103class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle { 104 val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 105 val robHeadTlbReplay = Output(Bool()) 106 val robHeadTlbMiss = Output(Bool()) 107 val robHeadLoadVio = Output(Bool()) 108 val robHeadLoadMSHR = Output(Bool()) 109 val robHeadMissInDTlb = Input(Bool()) 110 val robHeadOtherReplay = Output(Bool()) 111} 112 113class LoadQueue(implicit p: Parameters) extends XSModule 114 with HasDCacheParameters 115 with HasCircularQueuePtrHelper 116 with HasLoadHelper 117 with HasPerfEvents 118{ 119 val io = IO(new Bundle() { 120 val redirect = Flipped(Valid(new Redirect)) 121 val enq = new LqEnqIO 122 val ldu = new Bundle() { 123 val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2 124 val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2 125 val ldin = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3 126 } 127 val sta = new Bundle() { 128 val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1 129 val vecStoreAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1 130 } 131 val std = new Bundle() { 132 val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput))) // from store_s0, store data, send to sq from rs 133 } 134 val sq = new Bundle() { 135 val stAddrReadySqPtr = Input(new SqPtr) 136 val stAddrReadyVec = Input(Vec(StoreQueueSize, Bool())) 137 val stDataReadySqPtr = Input(new SqPtr) 138 val stDataReadyVec = Input(Vec(StoreQueueSize, Bool())) 139 val stIssuePtr = Input(new SqPtr) 140 val sqEmpty = Input(Bool()) 141 } 142 val ldout = Vec(LoadPipelineWidth, DecoupledIO(new MemExuOutput)) 143 val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle)) 144 val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle)) 145 // val refill = Flipped(ValidIO(new Refill)) 146 val tl_d_channel = Input(new DcacheToLduForwardIO) 147 val release = Flipped(Valid(new Release)) 148 val nuke_rollback = Output(Valid(new Redirect)) 149 val nack_rollback = Output(Valid(new Redirect)) 150 val rob = Flipped(new RobLsqIO) 151 val uncache = new UncacheWordIO 152 val exceptionAddr = new ExceptionAddrIO 153 val lqFull = Output(Bool()) 154 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 155 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 156 val lq_rep_full = Output(Bool()) 157 val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W))) 158 val l2_hint = Input(Valid(new L2ToL1Hint())) 159 val tlb_hint = Flipped(new TlbHintIO) 160 val lqEmpty = Output(Bool()) 161 162 val vecWriteback = Flipped(ValidIO(new MemExuOutput(isVector = true))) 163 val lqDeqPtr = Output(new LqPtr) 164 val vecMMIOReplay = Vec(VecLoadPipelineWidth, DecoupledIO(new LsPipelineBundle())) 165 166 val trigger = Vec(LoadPipelineWidth, new LqTriggerIO) 167 168 val debugTopDown = new LoadQueueTopDownIO 169 }) 170 171 val loadQueueRAR = Module(new LoadQueueRAR) // read-after-read violation 172 val loadQueueRAW = Module(new LoadQueueRAW) // read-after-write violation 173 val loadQueueReplay = Module(new LoadQueueReplay) // enqueue if need replay 174 val virtualLoadQueue = Module(new VirtualLoadQueue) // control state 175 val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer 176 val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer 177 /** 178 * LoadQueueRAR 179 */ 180 loadQueueRAR.io.redirect <> io.redirect 181 loadQueueRAR.io.release <> io.release 182 loadQueueRAR.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 183 for (w <- 0 until LoadPipelineWidth) { 184 loadQueueRAR.io.query(w).req <> io.ldu.ldld_nuke_query(w).req // from load_s1 185 loadQueueRAR.io.query(w).resp <> io.ldu.ldld_nuke_query(w).resp // to load_s2 186 loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3 187 } 188 189 /** 190 * LoadQueueRAW 191 */ 192 loadQueueRAW.io.redirect <> io.redirect 193 loadQueueRAW.io.storeIn <> io.sta.storeAddrIn 194 loadQueueRAW.io.vecStoreIn <> io.sta.vecStoreAddrIn 195 loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 196 loadQueueRAW.io.stIssuePtr <> io.sq.stIssuePtr 197 for (w <- 0 until LoadPipelineWidth) { 198 loadQueueRAW.io.query(w).req <> io.ldu.stld_nuke_query(w).req // from load_s1 199 loadQueueRAW.io.query(w).resp <> io.ldu.stld_nuke_query(w).resp // to load_s2 200 loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3 201 } 202 203 /** 204 * VirtualLoadQueue 205 */ 206 virtualLoadQueue.io.redirect <> io.redirect 207 virtualLoadQueue.io.enq <> io.enq 208 virtualLoadQueue.io.ldin <> io.ldu.ldin // from load_s3 209 virtualLoadQueue.io.lqFull <> io.lqFull 210 virtualLoadQueue.io.lqDeq <> io.lqDeq 211 virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt 212 virtualLoadQueue.io.lqEmpty <> io.lqEmpty 213 virtualLoadQueue.io.vecWriteback <> io.vecWriteback 214 virtualLoadQueue.io.ldWbPtr <> io.lqDeqPtr 215 216 /** 217 * Load queue exception buffer 218 */ 219 exceptionBuffer.io.redirect <> io.redirect 220 for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) { 221 buff.valid := io.ldu.ldin(w).valid // from load_s3 222 buff.bits := io.ldu.ldin(w).bits 223 } 224 io.exceptionAddr <> exceptionBuffer.io.exceptionAddr 225 226 /** 227 * Load uncache buffer 228 */ 229 uncacheBuffer.io.redirect <> io.redirect 230 uncacheBuffer.io.ldout <> io.ldout 231 uncacheBuffer.io.ld_raw_data <> io.ld_raw_data 232 uncacheBuffer.io.rob <> io.rob 233 uncacheBuffer.io.uncache <> io.uncache 234 uncacheBuffer.io.trigger <> io.trigger 235 uncacheBuffer.io.vecReplay <> io.vecMMIOReplay 236 for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) { 237 buff.valid := io.ldu.ldin(w).valid // from load_s3 238 buff.bits := io.ldu.ldin(w).bits // from load_s3 239 } 240 241 242 io.nuke_rollback := loadQueueRAW.io.rollback 243 io.nack_rollback := uncacheBuffer.io.rollback 244 245 /* <------- DANGEROUS: Don't change sequence here ! -------> */ 246 247 /** 248 * LoadQueueReplay 249 */ 250 loadQueueReplay.io.redirect <> io.redirect 251 loadQueueReplay.io.enq <> io.ldu.ldin // from load_s3 252 loadQueueReplay.io.enq.zip(io.ldu.ldin).foreach { case (sink, source) => 253 sink.valid := source.valid && !source.bits.isvec 254 source.ready := sink.ready && !source.bits.isvec 255 } 256 loadQueueReplay.io.storeAddrIn <> io.sta.storeAddrIn // from store_s1 257 loadQueueReplay.io.storeDataIn <> io.std.storeDataIn // from store_s0 258 loadQueueReplay.io.replay <> io.replay 259 //loadQueueReplay.io.refill <> io.refill 260 loadQueueReplay.io.tl_d_channel <> io.tl_d_channel 261 loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 262 loadQueueReplay.io.stAddrReadyVec <> io.sq.stAddrReadyVec 263 loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr 264 loadQueueReplay.io.stDataReadyVec <> io.sq.stDataReadyVec 265 loadQueueReplay.io.sqEmpty <> io.sq.sqEmpty 266 loadQueueReplay.io.lqFull <> io.lq_rep_full 267 loadQueueReplay.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 268 loadQueueReplay.io.rarFull <> loadQueueRAR.io.lqFull 269 loadQueueReplay.io.rawFull <> loadQueueRAW.io.lqFull 270 loadQueueReplay.io.l2_hint <> io.l2_hint 271 loadQueueReplay.io.tlb_hint <> io.tlb_hint 272 loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl 273 274 loadQueueReplay.io.debugTopDown <> io.debugTopDown 275 276 val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull) 277 XSPerfAccumulate("full_mask_000", full_mask === 0.U) 278 XSPerfAccumulate("full_mask_001", full_mask === 1.U) 279 XSPerfAccumulate("full_mask_010", full_mask === 2.U) 280 XSPerfAccumulate("full_mask_011", full_mask === 3.U) 281 XSPerfAccumulate("full_mask_100", full_mask === 4.U) 282 XSPerfAccumulate("full_mask_101", full_mask === 5.U) 283 XSPerfAccumulate("full_mask_110", full_mask === 6.U) 284 XSPerfAccumulate("full_mask_111", full_mask === 7.U) 285 XSPerfAccumulate("nuke_rollback", io.nuke_rollback.valid) 286 XSPerfAccumulate("nack_rollabck", io.nack_rollback.valid) 287 288 // perf cnt 289 val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++ 290 Seq( 291 ("full_mask_000", full_mask === 0.U), 292 ("full_mask_001", full_mask === 1.U), 293 ("full_mask_010", full_mask === 2.U), 294 ("full_mask_011", full_mask === 3.U), 295 ("full_mask_100", full_mask === 4.U), 296 ("full_mask_101", full_mask === 5.U), 297 ("full_mask_110", full_mask === 6.U), 298 ("full_mask_111", full_mask === 7.U), 299 ("nuke_rollback", io.nuke_rollback.valid), 300 ("nack_rollback", io.nack_rollback.valid) 301 ) 302 generatePerfEvent() 303 // end 304}