1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend.fu.fpu.FPU 26import xiangshan.backend.rob.RobLsqIO 27import xiangshan.cache._ 28import xiangshan.frontend.FtqPtr 29import xiangshan.ExceptionNO._ 30import xiangshan.mem.mdp._ 31import xiangshan.backend.rob.RobPtr 32 33class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr]( 34 p => p(XSCoreParamsKey).VirtualLoadQueueSize 35){ 36} 37 38object LqPtr { 39 def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = { 40 val ptr = Wire(new LqPtr) 41 ptr.flag := f 42 ptr.value := v 43 ptr 44 } 45} 46 47trait HasLoadHelper { this: XSModule => 48 def rdataHelper(uop: MicroOp, rdata: UInt): UInt = { 49 val fpWen = uop.ctrl.fpWen 50 LookupTree(uop.ctrl.fuOpType, List( 51 LSUOpType.lb -> SignExt(rdata(7, 0) , XLEN), 52 LSUOpType.lh -> SignExt(rdata(15, 0), XLEN), 53 /* 54 riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values 55 Any operation that writes a narrower result to an f register must write 56 all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value. 57 */ 58 LSUOpType.lw -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)), 59 LSUOpType.ld -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)), 60 LSUOpType.lbu -> ZeroExt(rdata(7, 0) , XLEN), 61 LSUOpType.lhu -> ZeroExt(rdata(15, 0), XLEN), 62 LSUOpType.lwu -> ZeroExt(rdata(31, 0), XLEN), 63 )) 64 } 65} 66 67class LqEnqIO(implicit p: Parameters) extends XSBundle { 68 val canAccept = Output(Bool()) 69 val sqCanAccept = Input(Bool()) 70 val needAlloc = Vec(exuParameters.LsExuCnt, Input(Bool())) 71 val req = Vec(exuParameters.LsExuCnt, Flipped(ValidIO(new MicroOp))) 72 val resp = Vec(exuParameters.LsExuCnt, Output(new LqPtr)) 73} 74 75class LqTriggerIO(implicit p: Parameters) extends XSBundle { 76 val hitLoadAddrTriggerHitVec = Input(Vec(3, Bool())) 77 val lqLoadAddrTriggerHitVec = Output(Vec(3, Bool())) 78} 79 80class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle { 81 val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 82 val robHeadTlbReplay = Output(Bool()) 83 val robHeadTlbMiss = Output(Bool()) 84 val robHeadLoadVio = Output(Bool()) 85 val robHeadLoadMSHR = Output(Bool()) 86 val robHeadMissInDTlb = Input(Bool()) 87 val robHeadOtherReplay = Output(Bool()) 88} 89 90class LoadQueue(implicit p: Parameters) extends XSModule 91 with HasDCacheParameters 92 with HasCircularQueuePtrHelper 93 with HasLoadHelper 94 with HasPerfEvents 95{ 96 val io = IO(new Bundle() { 97 val redirect = Flipped(Valid(new Redirect)) 98 val enq = new LqEnqIO 99 val ldu = new Bundle() { 100 val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2 101 val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2 102 val ldin = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3 103 } 104 val sta = new Bundle() { 105 val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1 106 } 107 val std = new Bundle() { 108 val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new ExuOutput))) // from store_s0, store data, send to sq from rs 109 } 110 val sq = new Bundle() { 111 val stAddrReadySqPtr = Input(new SqPtr) 112 val stAddrReadyVec = Input(Vec(StoreQueueSize, Bool())) 113 val stDataReadySqPtr = Input(new SqPtr) 114 val stDataReadyVec = Input(Vec(StoreQueueSize, Bool())) 115 val stIssuePtr = Input(new SqPtr) 116 val sqEmpty = Input(Bool()) 117 } 118 val ldout = Vec(LoadPipelineWidth, DecoupledIO(new ExuOutput)) 119 val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle)) 120 val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle)) 121 val refill = Flipped(ValidIO(new Refill)) 122 val tl_d_channel = Input(new DcacheToLduForwardIO) 123 val release = Flipped(Valid(new Release)) 124 val rollback = Output(Valid(new Redirect)) 125 val rob = Flipped(new RobLsqIO) 126 val uncache = new UncacheWordIO 127 val trigger = Vec(LoadPipelineWidth, new LqTriggerIO) 128 val exceptionAddr = new ExceptionAddrIO 129 val lqFull = Output(Bool()) 130 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 131 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 132 val lq_rep_full = Output(Bool()) 133 val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W))) 134 val l2_hint = Input(Valid(new L2ToL1Hint())) 135 val lqEmpty = Output(Bool()) 136 val debugTopDown = new LoadQueueTopDownIO 137 }) 138 139 val loadQueueRAR = Module(new LoadQueueRAR) // read-after-read violation 140 val loadQueueRAW = Module(new LoadQueueRAW) // read-after-write violation 141 val loadQueueReplay = Module(new LoadQueueReplay) // enqueue if need replay 142 val virtualLoadQueue = Module(new VirtualLoadQueue) // control state 143 val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer 144 val uncacheBuffer = Module(new UncacheBuffer) // uncache buffer 145 146 /** 147 * LoadQueueRAR 148 */ 149 loadQueueRAR.io.redirect <> io.redirect 150 loadQueueRAR.io.release <> io.release 151 loadQueueRAR.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 152 for (w <- 0 until LoadPipelineWidth) { 153 loadQueueRAR.io.query(w).req <> io.ldu.ldld_nuke_query(w).req // from load_s1 154 loadQueueRAR.io.query(w).resp <> io.ldu.ldld_nuke_query(w).resp // to load_s2 155 loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3 156 } 157 158 /** 159 * LoadQueueRAW 160 */ 161 loadQueueRAW.io.redirect <> io.redirect 162 loadQueueRAW.io.storeIn <> io.sta.storeAddrIn 163 loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 164 loadQueueRAW.io.stIssuePtr <> io.sq.stIssuePtr 165 for (w <- 0 until LoadPipelineWidth) { 166 loadQueueRAW.io.query(w).req <> io.ldu.stld_nuke_query(w).req // from load_s1 167 loadQueueRAW.io.query(w).resp <> io.ldu.stld_nuke_query(w).resp // to load_s2 168 loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3 169 } 170 171 /** 172 * VirtualLoadQueue 173 */ 174 virtualLoadQueue.io.redirect <> io.redirect 175 virtualLoadQueue.io.enq <> io.enq 176 virtualLoadQueue.io.ldin <> io.ldu.ldin // from load_s3 177 virtualLoadQueue.io.lqFull <> io.lqFull 178 virtualLoadQueue.io.lqDeq <> io.lqDeq 179 virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt 180 virtualLoadQueue.io.lqEmpty <> io.lqEmpty 181 182 /** 183 * Load queue exception buffer 184 */ 185 exceptionBuffer.io.redirect <> io.redirect 186 for ((buff, w) <- exceptionBuffer.io.req.zipWithIndex) { 187 buff.valid := io.ldu.ldin(w).valid // from load_s3 188 buff.bits := io.ldu.ldin(w).bits 189 } 190 io.exceptionAddr <> exceptionBuffer.io.exceptionAddr 191 192 /** 193 * Load uncache buffer 194 */ 195 uncacheBuffer.io.redirect <> io.redirect 196 uncacheBuffer.io.ldout <> io.ldout 197 uncacheBuffer.io.ld_raw_data <> io.ld_raw_data 198 uncacheBuffer.io.rob <> io.rob 199 uncacheBuffer.io.uncache <> io.uncache 200 uncacheBuffer.io.trigger <> io.trigger 201 for ((buff, w) <- uncacheBuffer.io.req.zipWithIndex) { 202 buff.valid := io.ldu.ldin(w).valid // from load_s3 203 buff.bits := io.ldu.ldin(w).bits // from load_s3 204 } 205 206 // rollback 207 def selectOldest[T <: Redirect](valid: Seq[Bool], bits: Seq[T]): (Seq[Bool], Seq[T]) = { 208 assert(valid.length == bits.length) 209 if (valid.length == 0 || valid.length == 1) { 210 (valid, bits) 211 } else if (valid.length == 2) { 212 val res = Seq.fill(2)(Wire(ValidIO(chiselTypeOf(bits(0))))) 213 for (i <- res.indices) { 214 res(i).valid := valid(i) 215 res(i).bits := bits(i) 216 } 217 val oldest = Mux(valid(0) && valid(1), Mux(isAfter(bits(0).robIdx, bits(1).robIdx), res(1), res(0)), Mux(valid(0) && !valid(1), res(0), res(1))) 218 (Seq(oldest.valid), Seq(oldest.bits)) 219 } else { 220 val left = selectOldest(valid.take(valid.length / 2), bits.take(bits.length / 2)) 221 val right = selectOldest(valid.takeRight(valid.length - (valid.length / 2)), bits.takeRight(bits.length - (bits.length / 2))) 222 selectOldest(left._1 ++ right._1, left._2 ++ right._2) 223 } 224 } 225 226 val (rollbackSelV, rollbackSelBits) = selectOldest( 227 Seq(loadQueueRAW.io.rollback.valid, uncacheBuffer.io.rollback.valid), 228 Seq(loadQueueRAW.io.rollback.bits, uncacheBuffer.io.rollback.bits) 229 ) 230 io.rollback.valid := rollbackSelV.head 231 io.rollback.bits := rollbackSelBits.head 232 233 /* <------- DANGEROUS: Don't change sequence here ! -------> */ 234 235 /** 236 * LoadQueueReplay 237 */ 238 loadQueueReplay.io.redirect <> io.redirect 239 loadQueueReplay.io.enq <> io.ldu.ldin // from load_s3 240 loadQueueReplay.io.storeAddrIn <> io.sta.storeAddrIn // from store_s1 241 loadQueueReplay.io.storeDataIn <> io.std.storeDataIn // from store_s0 242 loadQueueReplay.io.replay <> io.replay 243 loadQueueReplay.io.refill <> io.refill 244 loadQueueReplay.io.tl_d_channel <> io.tl_d_channel 245 loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 246 loadQueueReplay.io.stAddrReadyVec <> io.sq.stAddrReadyVec 247 loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr 248 loadQueueReplay.io.stDataReadyVec <> io.sq.stDataReadyVec 249 loadQueueReplay.io.sqEmpty <> io.sq.sqEmpty 250 loadQueueReplay.io.lqFull <> io.lq_rep_full 251 loadQueueReplay.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 252 loadQueueReplay.io.rarFull <> loadQueueRAR.io.lqFull 253 loadQueueReplay.io.rawFull <> loadQueueRAW.io.lqFull 254 loadQueueReplay.io.l2_hint <> io.l2_hint 255 loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl 256 257 loadQueueReplay.io.debugTopDown <> io.debugTopDown 258 259 val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull) 260 XSPerfAccumulate("full_mask_000", full_mask === 0.U) 261 XSPerfAccumulate("full_mask_001", full_mask === 1.U) 262 XSPerfAccumulate("full_mask_010", full_mask === 2.U) 263 XSPerfAccumulate("full_mask_011", full_mask === 3.U) 264 XSPerfAccumulate("full_mask_100", full_mask === 4.U) 265 XSPerfAccumulate("full_mask_101", full_mask === 5.U) 266 XSPerfAccumulate("full_mask_110", full_mask === 6.U) 267 XSPerfAccumulate("full_mask_111", full_mask === 7.U) 268 XSPerfAccumulate("rollback", io.rollback.valid) 269 270 // perf cnt 271 val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++ 272 Seq( 273 ("full_mask_000", full_mask === 0.U), 274 ("full_mask_001", full_mask === 1.U), 275 ("full_mask_010", full_mask === 2.U), 276 ("full_mask_011", full_mask === 3.U), 277 ("full_mask_100", full_mask === 4.U), 278 ("full_mask_101", full_mask === 5.U), 279 ("full_mask_110", full_mask === 6.U), 280 ("full_mask_111", full_mask === 7.U), 281 ("rollback", io.rollback.valid) 282 ) 283 generatePerfEvent() 284 // end 285}