1package xiangshan.mem 2 3import chipsalliance.rocketchip.config.Parameters 4import chisel3._ 5import chisel3.util._ 6import utils._ 7import xiangshan._ 8import xiangshan.cache._ 9import xiangshan.cache.{DCacheWordIO, DCacheLineIO, TlbRequestIO, MemoryOpConstants} 10import xiangshan.backend.roq.RoqLsqIO 11import difftest._ 12 13class SqPtr(implicit p: Parameters) extends CircularQueuePtr[SqPtr]( 14 p => p(XSCoreParamsKey).StoreQueueSize 15){ 16 override def cloneType = (new SqPtr).asInstanceOf[this.type] 17} 18 19object SqPtr { 20 def apply(f: Bool, v: UInt)(implicit p: Parameters): SqPtr = { 21 val ptr = Wire(new SqPtr) 22 ptr.flag := f 23 ptr.value := v 24 ptr 25 } 26} 27 28class SqEnqIO(implicit p: Parameters) extends XSBundle { 29 val canAccept = Output(Bool()) 30 val lqCanAccept = Input(Bool()) 31 val needAlloc = Vec(RenameWidth, Input(Bool())) 32 val req = Vec(RenameWidth, Flipped(ValidIO(new MicroOp))) 33 val resp = Vec(RenameWidth, Output(new SqPtr)) 34} 35 36// Store Queue 37class StoreQueue(implicit p: Parameters) extends XSModule with HasDCacheParameters with HasCircularQueuePtrHelper { 38 val io = IO(new Bundle() { 39 val enq = new SqEnqIO 40 val brqRedirect = Flipped(ValidIO(new Redirect)) 41 val flush = Input(Bool()) 42 val storeIn = Vec(StorePipelineWidth, Flipped(Valid(new LsPipelineBundle))) 43 val sbuffer = Vec(StorePipelineWidth, Decoupled(new DCacheWordReq)) 44 val mmioStout = DecoupledIO(new ExuOutput) // writeback uncached store 45 val forward = Vec(LoadPipelineWidth, Flipped(new MaskedLoadForwardQueryIO)) 46 val roq = Flipped(new RoqLsqIO) 47 val uncache = new DCacheWordIO 48 // val refill = Flipped(Valid(new DCacheLineReq )) 49 val exceptionAddr = new ExceptionAddrIO 50 val sqempty = Output(Bool()) 51 val issuePtrExt = Output(new SqPtr) 52 val storeIssue = Vec(StorePipelineWidth, Flipped(Valid(new ExuInput))) 53 val sqFull = Output(Bool()) 54 }) 55 56 // data modules 57 val uop = Reg(Vec(StoreQueueSize, new MicroOp)) 58 // val data = Reg(Vec(StoreQueueSize, new LsqEntry)) 59 val dataModule = Module(new StoreQueueData(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth)) 60 dataModule.io := DontCare 61 val paddrModule = Module(new SQPaddrModule(StoreQueueSize, numRead = StorePipelineWidth, numWrite = StorePipelineWidth, numForward = StorePipelineWidth)) 62 paddrModule.io := DontCare 63 val vaddrModule = Module(new SyncDataModuleTemplate(UInt(VAddrBits.W), StoreQueueSize, numRead = 1, numWrite = StorePipelineWidth)) 64 vaddrModule.io := DontCare 65 66 // state & misc 67 val allocated = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // sq entry has been allocated 68 val datavalid = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // non-mmio data is valid 69 val writebacked = RegInit(VecInit(List.fill(StoreQueueSize)(false.B))) // inst has been writebacked to CDB 70 val issued = Reg(Vec(StoreQueueSize, Bool())) // inst has been issued by rs 71 val commited = Reg(Vec(StoreQueueSize, Bool())) // inst has been commited by roq 72 val pending = Reg(Vec(StoreQueueSize, Bool())) // mmio pending: inst is an mmio inst, it will not be executed until it reachs the end of roq 73 val mmio = Reg(Vec(StoreQueueSize, Bool())) // mmio: inst is an mmio inst 74 75 // ptr 76 require(StoreQueueSize > RenameWidth) 77 val enqPtrExt = RegInit(VecInit((0 until RenameWidth).map(_.U.asTypeOf(new SqPtr)))) 78 val deqPtrExt = RegInit(VecInit((0 until StorePipelineWidth).map(_.U.asTypeOf(new SqPtr)))) 79 val cmtPtrExt = RegInit(VecInit((0 until CommitWidth).map(_.U.asTypeOf(new SqPtr)))) 80 val issuePtrExt = RegInit(0.U.asTypeOf(new SqPtr)) 81 val validCounter = RegInit(0.U(log2Ceil(LoadQueueSize + 1).W)) 82 val allowEnqueue = RegInit(true.B) 83 84 val enqPtr = enqPtrExt(0).value 85 val deqPtr = deqPtrExt(0).value 86 val cmtPtr = cmtPtrExt(0).value 87 88 val deqMask = UIntToMask(deqPtr, StoreQueueSize) 89 val enqMask = UIntToMask(enqPtr, StoreQueueSize) 90 91 val commitCount = RegNext(io.roq.scommit) 92 93 // Read dataModule 94 // deqPtrExtNext and deqPtrExtNext+1 entry will be read from dataModule 95 // if !sbuffer.fire(), read the same ptr 96 // if sbuffer.fire(), read next 97 val deqPtrExtNext = WireInit(Mux(io.sbuffer(1).fire(), 98 VecInit(deqPtrExt.map(_ + 2.U)), 99 Mux(io.sbuffer(0).fire() || io.mmioStout.fire(), 100 VecInit(deqPtrExt.map(_ + 1.U)), 101 deqPtrExt 102 ) 103 )) 104 for (i <- 0 until StorePipelineWidth) { 105 dataModule.io.raddr(i) := deqPtrExtNext(i).value 106 paddrModule.io.raddr(i) := deqPtrExtNext(i).value 107 } 108 109 // no inst will be commited 1 cycle before tval update 110 vaddrModule.io.raddr(0) := (cmtPtrExt(0) + commitCount).value 111 112 /** 113 * Enqueue at dispatch 114 * 115 * Currently, StoreQueue only allows enqueue when #emptyEntries > RenameWidth(EnqWidth) 116 */ 117 io.enq.canAccept := allowEnqueue 118 for (i <- 0 until RenameWidth) { 119 val offset = if (i == 0) 0.U else PopCount(io.enq.needAlloc.take(i)) 120 val sqIdx = enqPtrExt(offset) 121 val index = sqIdx.value 122 when (io.enq.req(i).valid && io.enq.canAccept && io.enq.lqCanAccept && !(io.brqRedirect.valid || io.flush)) { 123 uop(index) := io.enq.req(i).bits 124 allocated(index) := true.B 125 datavalid(index) := false.B 126 writebacked(index) := false.B 127 issued(index) := false.B 128 commited(index) := false.B 129 pending(index) := false.B 130 } 131 io.enq.resp(i) := sqIdx 132 } 133 XSDebug(p"(ready, valid): ${io.enq.canAccept}, ${Binary(Cat(io.enq.req.map(_.valid)))}\n") 134 135 /** 136 * Update issuePtr when issue from rs 137 */ 138 139 // update state bit issued 140 for (i <- 0 until StorePipelineWidth) { 141 when (io.storeIssue(i).valid) { 142 issued(io.storeIssue(i).bits.uop.sqIdx.value) := true.B 143 } 144 } 145 146 // update issuePtr 147 val IssuePtrMoveStride = 4 148 require(IssuePtrMoveStride >= 2) 149 150 val issueLookupVec = (0 until IssuePtrMoveStride).map(issuePtrExt + _.U) 151 val issueLookup = issueLookupVec.map(ptr => allocated(ptr.value) && issued(ptr.value) && ptr =/= enqPtrExt(0)) 152 val nextIssuePtr = issuePtrExt + PriorityEncoder(VecInit(issueLookup.map(!_) :+ true.B)) 153 issuePtrExt := nextIssuePtr 154 155 when (io.brqRedirect.valid || io.flush) { 156 issuePtrExt := Mux( 157 isAfter(cmtPtrExt(0), deqPtrExt(0)), 158 cmtPtrExt(0), 159 deqPtrExtNext(0) // for mmio insts, deqPtr may be ahead of cmtPtr 160 ) 161 } 162 // send issuePtrExt to rs 163 // io.issuePtrExt := cmtPtrExt(0) 164 io.issuePtrExt := issuePtrExt 165 166 /** 167 * Writeback store from store units 168 * 169 * Most store instructions writeback to regfile in the previous cycle. 170 * However, 171 * (1) For an mmio instruction with exceptions, we need to mark it as datavalid 172 * (in this way it will trigger an exception when it reaches ROB's head) 173 * instead of pending to avoid sending them to lower level. 174 * (2) For an mmio instruction without exceptions, we mark it as pending. 175 * When the instruction reaches ROB's head, StoreQueue sends it to uncache channel. 176 * Upon receiving the response, StoreQueue writes back the instruction 177 * through arbiter with store units. It will later commit as normal. 178 */ 179 for (i <- 0 until StorePipelineWidth) { 180 dataModule.io.wen(i) := false.B 181 paddrModule.io.wen(i) := false.B 182 val stWbIndex = io.storeIn(i).bits.uop.sqIdx.value 183 when (io.storeIn(i).fire()) { 184 datavalid(stWbIndex) := !io.storeIn(i).bits.mmio 185 writebacked(stWbIndex) := !io.storeIn(i).bits.mmio 186 pending(stWbIndex) := io.storeIn(i).bits.mmio 187 188 val storeWbData = Wire(new SQDataEntry) 189 storeWbData := DontCare 190 storeWbData.mask := io.storeIn(i).bits.mask 191 storeWbData.data := io.storeIn(i).bits.data 192 193 dataModule.io.waddr(i) := stWbIndex 194 dataModule.io.wdata(i) := storeWbData 195 dataModule.io.wen(i) := true.B 196 197 paddrModule.io.waddr(i) := stWbIndex 198 paddrModule.io.wdata(i) := io.storeIn(i).bits.paddr 199 paddrModule.io.wen(i) := true.B 200 201 202 mmio(stWbIndex) := io.storeIn(i).bits.mmio 203 204 XSInfo("store write to sq idx %d pc 0x%x vaddr %x paddr %x data %x mmio %x\n", 205 io.storeIn(i).bits.uop.sqIdx.value, 206 io.storeIn(i).bits.uop.cf.pc, 207 io.storeIn(i).bits.vaddr, 208 io.storeIn(i).bits.paddr, 209 io.storeIn(i).bits.data, 210 io.storeIn(i).bits.mmio 211 ) 212 } 213 // vaddrModule write is delayed, as vaddrModule will not be read right after write 214 vaddrModule.io.waddr(i) := RegNext(stWbIndex) 215 vaddrModule.io.wdata(i) := RegNext(io.storeIn(i).bits.vaddr) 216 vaddrModule.io.wen(i) := RegNext(io.storeIn(i).fire()) 217 } 218 219 /** 220 * load forward query 221 * 222 * Check store queue for instructions that is older than the load. 223 * The response will be valid at the next cycle after req. 224 */ 225 // check over all lq entries and forward data from the first matched store 226 for (i <- 0 until LoadPipelineWidth) { 227 io.forward(i).forwardMask := 0.U(8.W).asBools 228 io.forward(i).forwardData := DontCare 229 230 // Compare deqPtr (deqPtr) and forward.sqIdx, we have two cases: 231 // (1) if they have the same flag, we need to check range(tail, sqIdx) 232 // (2) if they have different flags, we need to check range(tail, LoadQueueSize) and range(0, sqIdx) 233 // Forward1: Mux(same_flag, range(tail, sqIdx), range(tail, LoadQueueSize)) 234 // Forward2: Mux(same_flag, 0.U, range(0, sqIdx) ) 235 // i.e. forward1 is the target entries with the same flag bits and forward2 otherwise 236 val differentFlag = deqPtrExt(0).flag =/= io.forward(i).sqIdx.flag 237 val forwardMask = io.forward(i).sqIdxMask 238 val storeWritebackedVec = WireInit(VecInit(Seq.fill(StoreQueueSize)(false.B))) 239 for (j <- 0 until StoreQueueSize) { 240 storeWritebackedVec(j) := datavalid(j) && allocated(j) // all datavalid terms need to be checked 241 } 242 val needForward1 = Mux(differentFlag, ~deqMask, deqMask ^ forwardMask) & storeWritebackedVec.asUInt 243 val needForward2 = Mux(differentFlag, forwardMask, 0.U(StoreQueueSize.W)) & storeWritebackedVec.asUInt 244 245 XSDebug(p"$i f1 ${Binary(needForward1)} f2 ${Binary(needForward2)} " + 246 p"sqIdx ${io.forward(i).sqIdx} pa ${Hexadecimal(io.forward(i).paddr)}\n" 247 ) 248 249 // do real fwd query 250 dataModule.io.needForward(i)(0) := needForward1 & paddrModule.io.forwardMmask(i).asUInt 251 dataModule.io.needForward(i)(1) := needForward2 & paddrModule.io.forwardMmask(i).asUInt 252 253 paddrModule.io.forwardMdata(i) := io.forward(i).paddr 254 255 io.forward(i).forwardMask := dataModule.io.forwardMask(i) 256 io.forward(i).forwardData := dataModule.io.forwardData(i) 257 } 258 259 /** 260 * Memory mapped IO / other uncached operations 261 * 262 * States: 263 * (1) writeback from store units: mark as pending 264 * (2) when they reach ROB's head, they can be sent to uncache channel 265 * (3) response from uncache channel: mark as datavalid 266 * (4) writeback to ROB (and other units): mark as writebacked 267 * (5) ROB commits the instruction: same as normal instructions 268 */ 269 //(2) when they reach ROB's head, they can be sent to uncache channel 270 val s_idle :: s_req :: s_resp :: s_wb :: s_wait :: Nil = Enum(5) 271 val uncacheState = RegInit(s_idle) 272 switch(uncacheState) { 273 is(s_idle) { 274 when(io.roq.pendingst && pending(deqPtr) && allocated(deqPtr)) { 275 uncacheState := s_req 276 } 277 } 278 is(s_req) { 279 when(io.uncache.req.fire()) { 280 uncacheState := s_resp 281 } 282 } 283 is(s_resp) { 284 when(io.uncache.resp.fire()) { 285 uncacheState := s_wb 286 } 287 } 288 is(s_wb) { 289 when (io.mmioStout.fire()) { 290 uncacheState := s_wait 291 } 292 } 293 is(s_wait) { 294 when(io.roq.commit) { 295 uncacheState := s_idle // ready for next mmio 296 } 297 } 298 } 299 io.uncache.req.valid := uncacheState === s_req 300 301 io.uncache.req.bits.cmd := MemoryOpConstants.M_XWR 302 io.uncache.req.bits.addr := paddrModule.io.rdata(0) // data(deqPtr) -> rdata(0) 303 io.uncache.req.bits.data := dataModule.io.rdata(0).data 304 io.uncache.req.bits.mask := dataModule.io.rdata(0).mask 305 306 io.uncache.req.bits.id := DontCare 307 308 when(io.uncache.req.fire()){ 309 pending(deqPtr) := false.B 310 311 XSDebug( 312 p"uncache req: pc ${Hexadecimal(uop(deqPtr).cf.pc)} " + 313 p"addr ${Hexadecimal(io.uncache.req.bits.addr)} " + 314 p"data ${Hexadecimal(io.uncache.req.bits.data)} " + 315 p"op ${Hexadecimal(io.uncache.req.bits.cmd)} " + 316 p"mask ${Hexadecimal(io.uncache.req.bits.mask)}\n" 317 ) 318 } 319 320 // (3) response from uncache channel: mark as datavalid 321 io.uncache.resp.ready := true.B 322 when (io.uncache.resp.fire()) { 323 datavalid(deqPtr) := true.B 324 } 325 326 // (4) writeback to ROB (and other units): mark as writebacked 327 io.mmioStout.valid := uncacheState === s_wb // allocated(deqPtr) && datavalid(deqPtr) && !writebacked(deqPtr) 328 io.mmioStout.bits.uop := uop(deqPtr) 329 io.mmioStout.bits.uop.sqIdx := deqPtrExt(0) 330 io.mmioStout.bits.data := dataModule.io.rdata(0).data // dataModule.io.rdata.read(deqPtr) 331 io.mmioStout.bits.redirectValid := false.B 332 io.mmioStout.bits.redirect := DontCare 333 io.mmioStout.bits.debug.isMMIO := true.B 334 io.mmioStout.bits.debug.paddr := DontCare 335 io.mmioStout.bits.debug.isPerfCnt := false.B 336 io.mmioStout.bits.fflags := DontCare 337 when (io.mmioStout.fire()) { 338 writebacked(deqPtr) := true.B 339 allocated(deqPtr) := false.B 340 } 341 342 /** 343 * ROB commits store instructions (mark them as commited) 344 * 345 * (1) When store commits, mark it as commited. 346 * (2) They will not be cancelled and can be sent to lower level. 347 */ 348 for (i <- 0 until CommitWidth) { 349 when (commitCount > i.U) { 350 commited(cmtPtrExt(i).value) := true.B 351 } 352 } 353 cmtPtrExt := cmtPtrExt.map(_ + commitCount) 354 355 // Commited stores will not be cancelled and can be sent to lower level. 356 // remove retired insts from sq, add retired store to sbuffer 357 for (i <- 0 until StorePipelineWidth) { 358 // We use RegNext to prepare data for sbuffer 359 val ptr = deqPtrExt(i).value 360 // if !sbuffer.fire(), read the same ptr 361 // if sbuffer.fire(), read next 362 io.sbuffer(i).valid := allocated(ptr) && commited(ptr) && !mmio(ptr) 363 io.sbuffer(i).bits.cmd := MemoryOpConstants.M_XWR 364 io.sbuffer(i).bits.addr := paddrModule.io.rdata(i) 365 io.sbuffer(i).bits.data := dataModule.io.rdata(i).data 366 io.sbuffer(i).bits.mask := dataModule.io.rdata(i).mask 367 io.sbuffer(i).bits.id := DontCare 368 369 when (io.sbuffer(i).fire()) { 370 allocated(ptr) := false.B 371 XSDebug("sbuffer "+i+" fire: ptr %d\n", ptr) 372 } 373 } 374 when (io.sbuffer(1).fire()) { 375 assert(io.sbuffer(0).fire()) 376 } 377 378 if (!env.FPGAPlatform) { 379 for (i <- 0 until StorePipelineWidth) { 380 val storeCommit = io.sbuffer(i).fire() 381 val waddr = SignExt(io.sbuffer(i).bits.addr, 64) 382 val wdata = io.sbuffer(i).bits.data & MaskExpand(io.sbuffer(i).bits.mask) 383 val wmask = io.sbuffer(i).bits.mask 384 385 val difftest = Module(new DifftestStoreEvent) 386 difftest.io.clock := clock 387 difftest.io.coreid := 0.U 388 difftest.io.index := i.U 389 difftest.io.valid := storeCommit 390 difftest.io.storeAddr := waddr 391 difftest.io.storeData := wdata 392 difftest.io.storeMask := wmask 393 } 394 } 395 396 // Read vaddr for mem exception 397 io.exceptionAddr.vaddr := vaddrModule.io.rdata(0) 398 399 // misprediction recovery / exception redirect 400 // invalidate sq term using robIdx 401 val needCancel = Wire(Vec(StoreQueueSize, Bool())) 402 for (i <- 0 until StoreQueueSize) { 403 needCancel(i) := uop(i).roqIdx.needFlush(io.brqRedirect, io.flush) && allocated(i) && !commited(i) 404 when (needCancel(i)) { 405 allocated(i) := false.B 406 } 407 } 408 409 /** 410 * update pointers 411 */ 412 val lastCycleRedirect = RegNext(io.brqRedirect.valid) 413 val lastCycleFlush = RegNext(io.flush) 414 val lastCycleCancelCount = PopCount(RegNext(needCancel)) 415 // when io.brqRedirect.valid, we don't allow eneuque even though it may fire. 416 val enqNumber = Mux(io.enq.canAccept && io.enq.lqCanAccept && !(io.brqRedirect.valid || io.flush), PopCount(io.enq.req.map(_.valid)), 0.U) 417 when (lastCycleRedirect || lastCycleFlush) { 418 // we recover the pointers in the next cycle after redirect 419 enqPtrExt := VecInit(enqPtrExt.map(_ - lastCycleCancelCount)) 420 }.otherwise { 421 enqPtrExt := VecInit(enqPtrExt.map(_ + enqNumber)) 422 } 423 424 deqPtrExt := deqPtrExtNext 425 426 val dequeueCount = Mux(io.sbuffer(1).fire(), 2.U, Mux(io.sbuffer(0).fire() || io.mmioStout.fire(), 1.U, 0.U)) 427 val validCount = distanceBetween(enqPtrExt(0), deqPtrExt(0)) 428 429 allowEnqueue := validCount + enqNumber <= (StoreQueueSize - RenameWidth).U 430 431 // io.sqempty will be used by sbuffer 432 // We delay it for 1 cycle for better timing 433 // When sbuffer need to check if it is empty, the pipeline is blocked, which means delay io.sqempty 434 // for 1 cycle will also promise that sq is empty in that cycle 435 io.sqempty := RegNext(enqPtrExt(0).value === deqPtrExt(0).value && enqPtrExt(0).flag === deqPtrExt(0).flag) 436 437 // perf counter 438 QueuePerf(StoreQueueSize, validCount, !allowEnqueue) 439 io.sqFull := !allowEnqueue 440 XSPerfAccumulate("mmioCycle", uncacheState =/= s_idle) // lq is busy dealing with uncache req 441 XSPerfAccumulate("mmioCnt", io.uncache.req.fire()) 442 XSPerfAccumulate("mmio_wb_success", io.mmioStout.fire()) 443 XSPerfAccumulate("mmio_wb_blocked", io.mmioStout.valid && !io.mmioStout.ready) 444 XSPerfAccumulate("validEntryCnt", distanceBetween(enqPtrExt(0), deqPtrExt(0))) 445 XSPerfAccumulate("cmtEntryCnt", distanceBetween(cmtPtrExt(0), deqPtrExt(0))) 446 XSPerfAccumulate("nCmtEntryCnt", distanceBetween(enqPtrExt(0), cmtPtrExt(0))) 447 448 // debug info 449 XSDebug("enqPtrExt %d:%d deqPtrExt %d:%d\n", enqPtrExt(0).flag, enqPtr, deqPtrExt(0).flag, deqPtr) 450 451 def PrintFlag(flag: Bool, name: String): Unit = { 452 when(flag) { 453 XSDebug(false, true.B, name) 454 }.otherwise { 455 XSDebug(false, true.B, " ") 456 } 457 } 458 459 for (i <- 0 until StoreQueueSize) { 460 if (i % 4 == 0) XSDebug("") 461 XSDebug(false, true.B, "%x ", uop(i).cf.pc) 462 PrintFlag(allocated(i), "a") 463 PrintFlag(allocated(i) && datavalid(i), "v") 464 PrintFlag(allocated(i) && writebacked(i), "w") 465 PrintFlag(allocated(i) && commited(i), "c") 466 PrintFlag(allocated(i) && pending(i), "p") 467 XSDebug(false, true.B, " ") 468 if (i % 4 == 3 || i == StoreQueueSize - 1) XSDebug(false, true.B, "\n") 469 } 470 471} 472