1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import utils._ 23import utility._ 24import xiangshan._ 25import xiangshan.backend._ 26import xiangshan.backend.fu.fpu._ 27import xiangshan.backend.rob.RobLsqIO 28import xiangshan.cache._ 29import xiangshan.cache.mmu._ 30import xiangshan.frontend.FtqPtr 31import xiangshan.ExceptionNO._ 32import xiangshan.mem.mdp._ 33import xiangshan.backend.Bundles.{DynInst, MemExuOutput, MemMicroOpRbExt} 34import xiangshan.backend.rob.RobPtr 35 36class LqPtr(implicit p: Parameters) extends CircularQueuePtr[LqPtr]( 37 p => p(XSCoreParamsKey).VirtualLoadQueueSize 38){ 39} 40 41object LqPtr { 42 def apply(f: Bool, v: UInt)(implicit p: Parameters): LqPtr = { 43 val ptr = Wire(new LqPtr) 44 ptr.flag := f 45 ptr.value := v 46 ptr 47 } 48} 49 50trait HasLoadHelper { this: XSModule => 51 def rdataHelper(uop: DynInst, rdata: UInt): UInt = { 52 val fpWen = uop.fpWen 53 LookupTree(uop.fuOpType, List( 54 LSUOpType.lb -> SignExt(rdata(7, 0) , XLEN), 55 LSUOpType.lh -> SignExt(rdata(15, 0), XLEN), 56 /* 57 riscv-spec-20191213: 12.2 NaN Boxing of Narrower Values 58 Any operation that writes a narrower result to an f register must write 59 all 1s to the uppermost FLEN−n bits to yield a legal NaN-boxed value. 60 */ 61 LSUOpType.lw -> Mux(fpWen, FPU.box(rdata, FPU.S), SignExt(rdata(31, 0), XLEN)), 62 LSUOpType.ld -> Mux(fpWen, FPU.box(rdata, FPU.D), SignExt(rdata(63, 0), XLEN)), 63 LSUOpType.lbu -> ZeroExt(rdata(7, 0) , XLEN), 64 LSUOpType.lhu -> ZeroExt(rdata(15, 0), XLEN), 65 LSUOpType.lwu -> ZeroExt(rdata(31, 0), XLEN), 66 67 // hypervisor 68 LSUOpType.hlvb -> SignExt(rdata(7, 0), XLEN), 69 LSUOpType.hlvh -> SignExt(rdata(15, 0), XLEN), 70 LSUOpType.hlvw -> SignExt(rdata(31, 0), XLEN), 71 LSUOpType.hlvd -> SignExt(rdata(63, 0), XLEN), 72 LSUOpType.hlvbu -> ZeroExt(rdata(7, 0), XLEN), 73 LSUOpType.hlvhu -> ZeroExt(rdata(15, 0), XLEN), 74 LSUOpType.hlvwu -> ZeroExt(rdata(31, 0), XLEN), 75 LSUOpType.hlvxhu -> ZeroExt(rdata(15, 0), XLEN), 76 LSUOpType.hlvxwu -> ZeroExt(rdata(31, 0), XLEN), 77 )) 78 } 79 80 def genRdataOH(uop: DynInst): UInt = { 81 val fuOpType = uop.fuOpType 82 val fpWen = uop.fpWen 83 val result = Cat( 84 (fuOpType === LSUOpType.lw && fpWen), 85 (fuOpType === LSUOpType.lh && fpWen), 86 (fuOpType === LSUOpType.lw && !fpWen) || (fuOpType === LSUOpType.hlvw), 87 (fuOpType === LSUOpType.lh && !fpWen) || (fuOpType === LSUOpType.hlvh), 88 (fuOpType === LSUOpType.lb) || (fuOpType === LSUOpType.hlvb), 89 (fuOpType === LSUOpType.ld) || (fuOpType === LSUOpType.hlvd), 90 (fuOpType === LSUOpType.lwu) || (fuOpType === LSUOpType.hlvwu) || (fuOpType === LSUOpType.hlvxwu), 91 (fuOpType === LSUOpType.lhu) || (fuOpType === LSUOpType.hlvhu) || (fuOpType === LSUOpType.hlvxhu), 92 (fuOpType === LSUOpType.lbu) || (fuOpType === LSUOpType.hlvbu), 93 ) 94 result 95 } 96 97 def newRdataHelper(select: UInt, rdata: UInt): UInt = { 98 XSError(PopCount(select) > 1.U, "data selector must be One-Hot!\n") 99 val selData = Seq( 100 ZeroExt(rdata(7, 0), XLEN), 101 ZeroExt(rdata(15, 0), XLEN), 102 ZeroExt(rdata(31, 0), XLEN), 103 rdata(63, 0), 104 SignExt(rdata(7, 0) , XLEN), 105 SignExt(rdata(15, 0) , XLEN), 106 SignExt(rdata(31, 0) , XLEN), 107 FPU.box(rdata, FPU.H), 108 FPU.box(rdata, FPU.S) 109 ) 110 Mux1H(select, selData) 111 } 112 113 def genDataSelectByOffset(addrOffset: UInt): Vec[Bool] = { 114 require(addrOffset.getWidth == 3) 115 VecInit((0 until 8).map{ case i => 116 addrOffset === i.U 117 }) 118 } 119 120 def rdataVecHelper(alignedType: UInt, rdata: UInt): UInt = { 121 LookupTree(alignedType, List( 122 "b00".U -> ZeroExt(rdata(7, 0), VLEN), 123 "b01".U -> ZeroExt(rdata(15, 0), VLEN), 124 "b10".U -> ZeroExt(rdata(31, 0), VLEN), 125 "b11".U -> ZeroExt(rdata(63, 0), VLEN) 126 )) 127 } 128} 129 130class LqEnqIO(implicit p: Parameters) extends MemBlockBundle { 131 val canAccept = Output(Bool()) 132 val sqCanAccept = Input(Bool()) 133 val needAlloc = Vec(LSQEnqWidth, Input(Bool())) 134 val req = Vec(LSQEnqWidth, Flipped(ValidIO(new DynInst))) 135 val resp = Vec(LSQEnqWidth, Output(new LqPtr)) 136} 137 138class LqTriggerIO(implicit p: Parameters) extends XSBundle { 139 val hitLoadAddrTriggerHitVec = Input(Vec(TriggerNum, Bool())) 140 val lqLoadAddrTriggerHitVec = Output(Vec(TriggerNum, Bool())) 141} 142 143class LoadQueueTopDownIO(implicit p: Parameters) extends XSBundle { 144 val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W))) 145 val robHeadTlbReplay = Output(Bool()) 146 val robHeadTlbMiss = Output(Bool()) 147 val robHeadLoadVio = Output(Bool()) 148 val robHeadLoadMSHR = Output(Bool()) 149 val robHeadMissInDTlb = Input(Bool()) 150 val robHeadOtherReplay = Output(Bool()) 151} 152 153class LoadQueue(implicit p: Parameters) extends XSModule 154 with HasDCacheParameters 155 with HasCircularQueuePtrHelper 156 with HasLoadHelper 157 with HasPerfEvents 158{ 159 val io = IO(new Bundle() { 160 val redirect = Flipped(Valid(new Redirect)) 161 val vecFeedback = Vec(VecLoadPipelineWidth, Flipped(ValidIO(new FeedbackToLsqIO))) 162 val enq = new LqEnqIO 163 val ldu = new Bundle() { 164 val stld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2 165 val ldld_nuke_query = Vec(LoadPipelineWidth, Flipped(new LoadNukeQueryIO)) // from load_s2 166 val ldin = Vec(LoadPipelineWidth, Flipped(Decoupled(new LqWriteBundle))) // from load_s3 167 } 168 val sta = new Bundle() { 169 val storeAddrIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) // from store_s1 170 } 171 val std = new Bundle() { 172 val storeDataIn = Vec(StorePipelineWidth, Flipped(Valid(new MemExuOutput(isVector = true)))) // from store_s0, store data, send to sq from rs 173 } 174 val sq = new Bundle() { 175 val stAddrReadySqPtr = Input(new SqPtr) 176 val stAddrReadyVec = Input(Vec(StoreQueueSize, Bool())) 177 val stDataReadySqPtr = Input(new SqPtr) 178 val stDataReadyVec = Input(Vec(StoreQueueSize, Bool())) 179 val stIssuePtr = Input(new SqPtr) 180 val sqEmpty = Input(Bool()) 181 } 182 val ldout = Vec(LoadPipelineWidth, DecoupledIO(new MemExuOutput)) 183 val ld_raw_data = Vec(LoadPipelineWidth, Output(new LoadDataFromLQBundle)) 184 val ncOut = Vec(LoadPipelineWidth, DecoupledIO(new LsPipelineBundle)) 185 val replay = Vec(LoadPipelineWidth, Decoupled(new LsPipelineBundle)) 186 // val refill = Flipped(ValidIO(new Refill)) 187 val tl_d_channel = Input(new DcacheToLduForwardIO) 188 val release = Flipped(Valid(new Release)) 189 val nuke_rollback = Vec(StorePipelineWidth, Output(Valid(new Redirect))) 190 val nack_rollback = Vec(2, Output(Valid(new Redirect))) // mmio, nc 191 val rob = Flipped(new RobLsqIO) 192 val uncache = new UncacheWordIO 193 val exceptionAddr = new ExceptionAddrIO 194 val flushFrmMaBuf = Input(Bool()) 195 val lqFull = Output(Bool()) 196 val lqDeq = Output(UInt(log2Up(CommitWidth + 1).W)) 197 val lqCancelCnt = Output(UInt(log2Up(VirtualLoadQueueSize+1).W)) 198 val lq_rep_full = Output(Bool()) 199 val tlbReplayDelayCycleCtrl = Vec(4, Input(UInt(ReSelectLen.W))) 200 val l2_hint = Input(Valid(new L2ToL1Hint())) 201 val tlb_hint = Flipped(new TlbHintIO) 202 val lqEmpty = Output(Bool()) 203 204 val lqDeqPtr = Output(new LqPtr) 205 206 val debugTopDown = new LoadQueueTopDownIO 207 }) 208 209 val loadQueueRAR = Module(new LoadQueueRAR) // read-after-read violation 210 val loadQueueRAW = Module(new LoadQueueRAW) // read-after-write violation 211 val loadQueueReplay = Module(new LoadQueueReplay) // enqueue if need replay 212 val virtualLoadQueue = Module(new VirtualLoadQueue) // control state 213 val exceptionBuffer = Module(new LqExceptionBuffer) // exception buffer 214 val ioBuffer = Module(new IOBuffer) // uncache io buffer 215 val ncBuffer = Module(new NCBuffer) // uncache nc buffer 216 /** 217 * LoadQueueRAR 218 */ 219 loadQueueRAR.io.redirect <> io.redirect 220 loadQueueRAR.io.vecFeedback <> io.vecFeedback 221 loadQueueRAR.io.release <> io.release 222 loadQueueRAR.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 223 for (w <- 0 until LoadPipelineWidth) { 224 loadQueueRAR.io.query(w).req <> io.ldu.ldld_nuke_query(w).req // from load_s1 225 loadQueueRAR.io.query(w).resp <> io.ldu.ldld_nuke_query(w).resp // to load_s2 226 loadQueueRAR.io.query(w).revoke := io.ldu.ldld_nuke_query(w).revoke // from load_s3 227 } 228 229 /** 230 * LoadQueueRAW 231 */ 232 loadQueueRAW.io.redirect <> io.redirect 233 loadQueueRAW.io.vecFeedback <> io.vecFeedback 234 loadQueueRAW.io.storeIn <> io.sta.storeAddrIn 235 loadQueueRAW.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 236 loadQueueRAW.io.stIssuePtr <> io.sq.stIssuePtr 237 for (w <- 0 until LoadPipelineWidth) { 238 loadQueueRAW.io.query(w).req <> io.ldu.stld_nuke_query(w).req // from load_s1 239 loadQueueRAW.io.query(w).resp <> io.ldu.stld_nuke_query(w).resp // to load_s2 240 loadQueueRAW.io.query(w).revoke := io.ldu.stld_nuke_query(w).revoke // from load_s3 241 } 242 243 /** 244 * VirtualLoadQueue 245 */ 246 virtualLoadQueue.io.redirect <> io.redirect 247 virtualLoadQueue.io.vecCommit <> io.vecFeedback 248 virtualLoadQueue.io.enq <> io.enq 249 virtualLoadQueue.io.ldin <> io.ldu.ldin // from load_s3 250 virtualLoadQueue.io.lqFull <> io.lqFull 251 virtualLoadQueue.io.lqDeq <> io.lqDeq 252 virtualLoadQueue.io.lqCancelCnt <> io.lqCancelCnt 253 virtualLoadQueue.io.lqEmpty <> io.lqEmpty 254 virtualLoadQueue.io.ldWbPtr <> io.lqDeqPtr 255 256 /** 257 * Load queue exception buffer 258 */ 259 exceptionBuffer.io.redirect <> io.redirect 260 for (i <- 0 until LoadPipelineWidth) { 261 exceptionBuffer.io.req(i).valid := io.ldu.ldin(i).valid && !io.ldu.ldin(i).bits.isvec // from load_s3 262 exceptionBuffer.io.req(i).bits := io.ldu.ldin(i).bits 263 } 264 // vlsu exception! 265 for (i <- 0 until VecLoadPipelineWidth) { 266 exceptionBuffer.io.req(LoadPipelineWidth + i).valid := io.vecFeedback(i).valid && io.vecFeedback(i).bits.feedback(VecFeedbacks.FLUSH) // have exception 267 exceptionBuffer.io.req(LoadPipelineWidth + i).bits := DontCare 268 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.vaddr := io.vecFeedback(i).bits.vaddr 269 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.fullva := io.vecFeedback(i).bits.vaddr 270 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.vaNeedExt := io.vecFeedback(i).bits.vaNeedExt 271 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.gpaddr := io.vecFeedback(i).bits.gpaddr 272 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.uopIdx := io.vecFeedback(i).bits.uopidx 273 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.robIdx := io.vecFeedback(i).bits.robidx 274 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.vpu.vstart := io.vecFeedback(i).bits.vstart 275 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.vpu.vl := io.vecFeedback(i).bits.vl 276 exceptionBuffer.io.req(LoadPipelineWidth + i).bits.uop.exceptionVec := io.vecFeedback(i).bits.exceptionVec 277 } 278 // mmio non-data error exception 279 exceptionBuffer.io.req(LoadPipelineWidth + VecLoadPipelineWidth) := ioBuffer.io.exception 280 exceptionBuffer.io.req(LoadPipelineWidth + VecLoadPipelineWidth).bits.vaNeedExt := true.B 281 exceptionBuffer.io.req.last := ncBuffer.io.exception 282 exceptionBuffer.io.req.last.bits.vaNeedExt := true.B 283 exceptionBuffer.io.flushFrmMaBuf := io.flushFrmMaBuf 284 285 io.exceptionAddr <> exceptionBuffer.io.exceptionAddr 286 287 /** 288 * Load uncache buffer 289 */ 290 //mmio 291 ioBuffer.io.redirect <> io.redirect 292 ioBuffer.io.ldout <> io.ldout 293 ioBuffer.io.ld_raw_data <> io.ld_raw_data 294 ioBuffer.io.rob <> io.rob 295 for ((mmio, w) <- ioBuffer.io.req.zipWithIndex) { 296 mmio.valid := io.ldu.ldin(w).valid // from load_s3 297 mmio.bits := io.ldu.ldin(w).bits // from load_s3 298 } 299 ioBuffer.io.uncache.resp.valid := io.uncache.resp.valid && !io.uncache.resp.bits.nc 300 ioBuffer.io.uncache.resp.bits := io.uncache.resp.bits 301 //nc 302 ncBuffer.io.redirect <> io.redirect 303 ncBuffer.io.ncOut <> io.ncOut 304 for ((nc, w) <- ncBuffer.io.req.zipWithIndex) { 305 nc.valid := io.ldu.ldin(w).valid // from load_s3 306 nc.bits := io.ldu.ldin(w).bits // from load_s3 307 } 308 ncBuffer.io.uncache.resp.valid := io.uncache.resp.valid && io.uncache.resp.bits.nc 309 ncBuffer.io.uncache.resp.bits := io.uncache.resp.bits 310 //uncache arbiter 311 ioBuffer.io.uncache.req.ready := io.uncache.req.ready 312 ncBuffer.io.uncache.req.ready := io.uncache.req.ready && !ioBuffer.io.uncache.req.valid 313 when(ioBuffer.io.uncache.req.valid){ 314 io.uncache.req.valid := ioBuffer.io.uncache.req.valid 315 io.uncache.req.bits := ioBuffer.io.uncache.req.bits 316 }.otherwise{ 317 io.uncache.req.valid := ncBuffer.io.uncache.req.valid 318 io.uncache.req.bits := ncBuffer.io.uncache.req.bits 319 } 320 io.uncache.resp.ready := true.B 321 //TODO lyq: uncache resp ready arbiter? always true? 322 // when(io.uncache.resp.bits.nc){ 323 // io.uncache.resp.ready := ncBuffer.io.uncache.resp.ready 324 // }.otherwise{ 325 // io.uncache.resp.ready := ioBuffer.io.uncache.resp.ready 326 // } 327 328 io.nuke_rollback := loadQueueRAW.io.rollback 329 io.nack_rollback(0) := ioBuffer.io.rollback 330 io.nack_rollback(1) := ncBuffer.io.rollback 331 332 /* <------- DANGEROUS: Don't change sequence here ! -------> */ 333 334 /** 335 * LoadQueueReplay 336 */ 337 loadQueueReplay.io.redirect <> io.redirect 338 loadQueueReplay.io.enq <> io.ldu.ldin // from load_s3 339 loadQueueReplay.io.storeAddrIn <> io.sta.storeAddrIn // from store_s1 340 loadQueueReplay.io.storeDataIn <> io.std.storeDataIn // from store_s0 341 loadQueueReplay.io.replay <> io.replay 342 //loadQueueReplay.io.refill <> io.refill 343 loadQueueReplay.io.tl_d_channel <> io.tl_d_channel 344 loadQueueReplay.io.stAddrReadySqPtr <> io.sq.stAddrReadySqPtr 345 loadQueueReplay.io.stAddrReadyVec <> io.sq.stAddrReadyVec 346 loadQueueReplay.io.stDataReadySqPtr <> io.sq.stDataReadySqPtr 347 loadQueueReplay.io.stDataReadyVec <> io.sq.stDataReadyVec 348 loadQueueReplay.io.sqEmpty <> io.sq.sqEmpty 349 loadQueueReplay.io.lqFull <> io.lq_rep_full 350 loadQueueReplay.io.ldWbPtr <> virtualLoadQueue.io.ldWbPtr 351 loadQueueReplay.io.rarFull <> loadQueueRAR.io.lqFull 352 loadQueueReplay.io.rawFull <> loadQueueRAW.io.lqFull 353 loadQueueReplay.io.l2_hint <> io.l2_hint 354 loadQueueReplay.io.tlb_hint <> io.tlb_hint 355 loadQueueReplay.io.tlbReplayDelayCycleCtrl <> io.tlbReplayDelayCycleCtrl 356 // TODO: implement it! 357 loadQueueReplay.io.vecFeedback := io.vecFeedback 358 359 loadQueueReplay.io.debugTopDown <> io.debugTopDown 360 361 val full_mask = Cat(loadQueueRAR.io.lqFull, loadQueueRAW.io.lqFull, loadQueueReplay.io.lqFull) 362 XSPerfAccumulate("full_mask_000", full_mask === 0.U) 363 XSPerfAccumulate("full_mask_001", full_mask === 1.U) 364 XSPerfAccumulate("full_mask_010", full_mask === 2.U) 365 XSPerfAccumulate("full_mask_011", full_mask === 3.U) 366 XSPerfAccumulate("full_mask_100", full_mask === 4.U) 367 XSPerfAccumulate("full_mask_101", full_mask === 5.U) 368 XSPerfAccumulate("full_mask_110", full_mask === 6.U) 369 XSPerfAccumulate("full_mask_111", full_mask === 7.U) 370 XSPerfAccumulate("nuke_rollback", io.nuke_rollback.map(_.valid).reduce(_ || _).asUInt) 371 XSPerfAccumulate("nack_rollabck", io.nack_rollback.map(_.valid).reduce(_ || _).asUInt) 372 373 // perf cnt 374 val perfEvents = Seq(virtualLoadQueue, loadQueueRAR, loadQueueRAW, loadQueueReplay).flatMap(_.getPerfEvents) ++ 375 Seq( 376 ("full_mask_000", full_mask === 0.U), 377 ("full_mask_001", full_mask === 1.U), 378 ("full_mask_010", full_mask === 2.U), 379 ("full_mask_011", full_mask === 3.U), 380 ("full_mask_100", full_mask === 4.U), 381 ("full_mask_101", full_mask === 5.U), 382 ("full_mask_110", full_mask === 6.U), 383 ("full_mask_111", full_mask === 7.U), 384 ("nuke_rollback", io.nuke_rollback.map(_.valid).reduce(_ || _).asUInt), 385 ("nack_rollback", io.nack_rollback.map(_.valid).reduce(_ || _).asUInt) 386 ) 387 generatePerfEvent() 388 // end 389}