1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import xiangshan._ 23import utils._ 24import xiangshan.cache._ 25import difftest._ 26 27class SbufferFlushBundle extends Bundle { 28 val valid = Output(Bool()) 29 val empty = Input(Bool()) 30} 31 32trait HasSbufferConst extends HasXSParameter { 33 val EvictCycles = 1 << 20 34 val SbufferReplayDelayCycles = 16 35 require(isPow2(EvictCycles)) 36 val EvictCountBits = log2Up(EvictCycles+1) 37 val MissqReplayCountBits = log2Up(SbufferReplayDelayCycles) + 1 38 39 val SbufferIndexWidth: Int = log2Up(StoreBufferSize) 40 // paddr = ptag + offset 41 val CacheLineBytes: Int = CacheLineSize / 8 42 val CacheLineWords: Int = CacheLineBytes / DataBytes 43 val OffsetWidth: Int = log2Up(CacheLineBytes) 44 val WordsWidth: Int = log2Up(CacheLineWords) 45 val PTagWidth: Int = PAddrBits - OffsetWidth 46 val VTagWidth: Int = VAddrBits - OffsetWidth 47 val WordOffsetWidth: Int = PAddrBits - WordsWidth 48} 49 50class SbufferEntryState (implicit p: Parameters) extends SbufferBundle { 51 val state_valid = Bool() // this entry is active 52 val state_inflight = Bool() // sbuffer is trying to write this entry to dcache 53 val w_timeout = Bool() // with timeout resp, waiting for resend store pipeline req timeout 54 val w_sameblock_inflight = Bool() // same cache block dcache req is inflight 55 val s_recheck_inflight = Bool() // recheck if same cache block dcache req is inflight 56 57 def isInvalid(): Bool = !state_valid 58 def isValid(): Bool = state_valid 59 def isActive(): Bool = state_valid && !state_inflight 60 def isInflight(): Bool = state_inflight 61 def isDcacheReqCandidate(): Bool = state_valid && !state_inflight && !w_sameblock_inflight 62} 63 64class SbufferBundle(implicit p: Parameters) extends XSBundle with HasSbufferConst 65 66class DataWriteReq(implicit p: Parameters) extends SbufferBundle { 67 // val idx = UInt(SbufferIndexWidth.W) 68 val wvec = UInt(StoreBufferSize.W) 69 val mask = UInt((DataBits/8).W) 70 val data = UInt(DataBits.W) 71 val wordOffset = UInt(WordOffsetWidth.W) 72 val wline = Bool() 73} 74 75class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst { 76 val io = IO(new Bundle(){ 77 val writeReq = Vec(StorePipelineWidth, Flipped(ValidIO(new DataWriteReq))) 78 val dataOut = Output(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) 79 }) 80 81 val data = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) 82 83 val req = io.writeReq 84 85 for(i <- 0 until StorePipelineWidth) { 86 when(req(i).valid){ 87 for(line <- 0 until StoreBufferSize){ 88 for(word <- 0 until CacheLineWords){ 89 for(byte <- 0 until DataBytes){ 90 when( 91 req(i).bits.wvec(line) && ( 92 req(i).bits.mask(byte) && (req(i).bits.wordOffset(WordsWidth-1, 0) === word.U) || 93 req(i).bits.wline 94 ) 95 ){ 96 data(line)(word)(byte) := req(i).bits.data(byte*8+7, byte*8) 97 } 98 } 99 } 100 } 101 } 102 } 103 104 io.dataOut := data 105} 106 107class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst with HasPerfEvents { 108 val io = IO(new Bundle() { 109 val hartId = Input(UInt(8.W)) 110 val in = Vec(StorePipelineWidth, Flipped(Decoupled(new DCacheWordReqWithVaddr))) //Todo: store logic only support Width == 2 now 111 val dcache = Flipped(new DCacheToSbufferIO) 112 val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) 113 val sqempty = Input(Bool()) 114 val flush = Flipped(new SbufferFlushBundle) 115 val csrCtrl = Flipped(new CustomCSRCtrlIO) 116 }) 117 118 val dataModule = Module(new SbufferData) 119 dataModule.io.writeReq <> DontCare 120 val writeReq = dataModule.io.writeReq 121 122 val ptag = Reg(Vec(StoreBufferSize, UInt(PTagWidth.W))) 123 val vtag = Reg(Vec(StoreBufferSize, UInt(VTagWidth.W))) 124 val mask = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, Bool())))) 125 val waitInflightMask = Reg(Vec(StoreBufferSize, UInt(StoreBufferSize.W))) 126 val data = dataModule.io.dataOut 127 val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U.asTypeOf(new SbufferEntryState)))) 128 val cohCount = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(EvictCountBits.W)))) 129 val missqReplayCount = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(MissqReplayCountBits.W)))) 130 131 val willSendDcacheReq = Wire(Bool()) 132 133 /* 134 idle --[flush] --> drain --[buf empty]--> idle 135 --[buf full]--> replace --[dcache resp]--> idle 136 */ 137 // x_drain_all: drain store queue and sbuffer 138 // x_drain_sbuffer: drain sbuffer only, block store queue to sbuffer write 139 val x_idle :: x_replace :: x_drain_all :: x_drain_sbuffer :: Nil = Enum(4) 140 def needDrain(state: UInt): Bool = 141 state(1) 142 val sbuffer_state = RegInit(x_idle) 143 144 // ---------------------- Store Enq Sbuffer --------------------- 145 146 def getPTag(pa: UInt): UInt = 147 pa(PAddrBits - 1, PAddrBits - PTagWidth) 148 149 def getVTag(va: UInt): UInt = 150 va(VAddrBits - 1, VAddrBits - VTagWidth) 151 152 def getWord(pa: UInt): UInt = 153 pa(PAddrBits-1, 3) 154 155 def getWordOffset(pa: UInt): UInt = 156 pa(OffsetWidth-1, 3) 157 158 def getAddr(ptag: UInt): UInt = 159 Cat(ptag, 0.U((PAddrBits - PTagWidth).W)) 160 161 def getByteOffset(offect: UInt): UInt = 162 Cat(offect(OffsetWidth - 1, 3), 0.U(3.W)) 163 164 def isOneOf(key: UInt, seq: Seq[UInt]): Bool = 165 if(seq.isEmpty) false.B else Cat(seq.map(_===key)).orR() 166 167 def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f 168 169 // sbuffer entry count 170 171 val plru = new PseudoLRU(StoreBufferSize) 172 val accessIdx = Wire(Vec(StorePipelineWidth + 1, Valid(UInt(SbufferIndexWidth.W)))) 173 174 val replaceIdx = plru.way 175 plru.access(accessIdx) 176 177 //-------------------------cohCount----------------------------- 178 // insert and merge: cohCount=0 179 // every cycle cohCount+=1 180 // if cohCount(EvictCountBits-1)==1, evict 181 val cohTimeOutMask = VecInit(widthMap(i => cohCount(i)(EvictCountBits - 1) && stateVec(i).isActive())) 182 val (cohTimeOutIdx, cohHasTimeOut) = PriorityEncoderWithFlag(cohTimeOutMask) 183 val missqReplayTimeOutMask = VecInit(widthMap(i => missqReplayCount(i)(MissqReplayCountBits - 1) && stateVec(i).w_timeout)) 184 val (missqReplayTimeOutIdx, missqReplayMayHasTimeOut) = PriorityEncoderWithFlag(missqReplayTimeOutMask) 185 val missqReplayHasTimeOut = RegNext(missqReplayMayHasTimeOut) && !RegNext(willSendDcacheReq) 186 val missqReplayTimeOutIdxReg = RegEnable(missqReplayTimeOutIdx, missqReplayMayHasTimeOut) 187 188 val activeMask = VecInit(stateVec.map(s => s.isActive())) 189 val drainIdx = PriorityEncoder(activeMask) 190 191 val inflightMask = VecInit(stateVec.map(s => s.isInflight())) 192 193 val inptags = io.in.map(in => getPTag(in.bits.addr)) 194 val invtags = io.in.map(in => getVTag(in.bits.vaddr)) 195 val sameTag = inptags(0) === inptags(1) 196 val firstWord = getWord(io.in(0).bits.addr) 197 val secondWord = getWord(io.in(1).bits.addr) 198 val sameWord = firstWord === secondWord 199 200 // merge condition 201 val mergeMask = Wire(Vec(StorePipelineWidth, Vec(StoreBufferSize, Bool()))) 202 val mergeIdx = mergeMask.map(PriorityEncoder(_)) // avoid using mergeIdx for better timing 203 val canMerge = mergeMask.map(ParallelOR(_)) 204 val mergeVec = mergeMask.map(_.asUInt) 205 206 for(i <- 0 until StorePipelineWidth){ 207 mergeMask(i) := widthMap(j => 208 inptags(i) === ptag(j) && activeMask(j) 209 ) 210 assert(!(PopCount(mergeMask(i).asUInt) > 1.U && io.in(i).fire())) 211 } 212 213 // insert condition 214 // firstInsert: the first invalid entry 215 // if first entry canMerge or second entry has the same ptag with the first entry, 216 // secondInsert equal the first invalid entry, otherwise, the second invalid entry 217 val invalidMask = VecInit(stateVec.map(s => s.isInvalid())) 218 val evenInvalidMask = GetEvenBits(invalidMask.asUInt) 219 val oddInvalidMask = GetOddBits(invalidMask.asUInt) 220 221 def getFirstOneOH(input: UInt): UInt = { 222 assert(input.getWidth > 1) 223 val output = WireInit(VecInit(input.asBools)) 224 (1 until input.getWidth).map(i => { 225 output(i) := !input(i - 1, 0).orR && input(i) 226 }) 227 output.asUInt 228 } 229 230 val evenRawInsertVec = getFirstOneOH(evenInvalidMask) 231 val oddRawInsertVec = getFirstOneOH(oddInvalidMask) 232 val (evenRawInsertIdx, evenCanInsert) = PriorityEncoderWithFlag(evenInvalidMask) 233 val (oddRawInsertIdx, oddCanInsert) = PriorityEncoderWithFlag(oddInvalidMask) 234 val evenInsertIdx = Cat(evenRawInsertIdx, 0.U(1.W)) // slow to generate, for debug only 235 val oddInsertIdx = Cat(oddRawInsertIdx, 1.U(1.W)) // slow to generate, for debug only 236 val evenInsertVec = GetEvenBits.reverse(evenRawInsertVec) 237 val oddInsertVec = GetOddBits.reverse(oddRawInsertVec) 238 239 val enbufferSelReg = RegInit(false.B) 240 when(io.in(0).valid) { 241 enbufferSelReg := ~enbufferSelReg 242 } 243 244 val firstInsertIdx = Mux(enbufferSelReg, evenInsertIdx, oddInsertIdx) // slow to generate, for debug only 245 val secondInsertIdx = Mux(sameTag, 246 firstInsertIdx, 247 Mux(~enbufferSelReg, evenInsertIdx, oddInsertIdx) 248 ) // slow to generate, for debug only 249 val firstInsertVec = Mux(enbufferSelReg, evenInsertVec, oddInsertVec) 250 val secondInsertVec = Mux(sameTag, 251 firstInsertVec, 252 Mux(~enbufferSelReg, evenInsertVec, oddInsertVec) 253 ) // slow to generate, for debug only 254 val firstCanInsert = sbuffer_state =/= x_drain_sbuffer && Mux(enbufferSelReg, evenCanInsert, oddCanInsert) 255 val secondCanInsert = sbuffer_state =/= x_drain_sbuffer && Mux(sameTag, 256 firstCanInsert, 257 Mux(~enbufferSelReg, evenCanInsert, oddCanInsert) 258 ) 259 val forward_need_uarch_drain = WireInit(false.B) 260 val merge_need_uarch_drain = WireInit(false.B) 261 val do_uarch_drain = RegNext(forward_need_uarch_drain) || RegNext(RegNext(merge_need_uarch_drain)) 262 XSPerfAccumulate("do_uarch_drain", do_uarch_drain) 263 264 io.in(0).ready := firstCanInsert 265 io.in(1).ready := secondCanInsert && !sameWord && io.in(0).ready 266 267 def wordReqToBufLine(req: DCacheWordReq, reqptag: UInt, reqvtag: UInt, insertIdx: UInt, insertVec: UInt, wordOffset: UInt, flushMask: Bool): Unit = { 268 assert(UIntToOH(insertIdx) === insertVec) 269 val sameBlockInflightMask = genSameBlockInflightMask(reqptag) 270 (0 until StoreBufferSize).map(entryIdx => { 271 when(insertVec(entryIdx)){ 272 stateVec(entryIdx).state_valid := true.B 273 stateVec(entryIdx).w_sameblock_inflight := sameBlockInflightMask.orR // set w_sameblock_inflight when a line is first allocated 274 when(sameBlockInflightMask.orR){ 275 waitInflightMask(entryIdx) := sameBlockInflightMask 276 } 277 cohCount(entryIdx) := 0.U 278 // missqReplayCount(insertIdx) := 0.U 279 ptag(entryIdx) := reqptag 280 vtag(entryIdx) := reqvtag // update vtag iff a new sbuffer line is allocated 281 when(flushMask){ 282 for(j <- 0 until CacheLineWords){ 283 for(i <- 0 until DataBytes){ 284 mask(entryIdx)(j)(i) := false.B 285 } 286 } 287 } 288 for(i <- 0 until DataBytes){ 289 when(req.mask(i)){ 290 mask(entryIdx)(wordOffset)(i) := true.B 291 } 292 } 293 } 294 }) 295 } 296 297 def mergeWordReq(req: DCacheWordReq, reqptag: UInt, reqvtag: UInt, mergeIdx: UInt, mergeVec: UInt, wordOffset: UInt): Unit = { 298 assert(UIntToOH(mergeIdx) === mergeVec) 299 (0 until StoreBufferSize).map(entryIdx => { 300 when(mergeVec(entryIdx)) { 301 cohCount(entryIdx) := 0.U 302 // missqReplayCount(entryIdx) := 0.U 303 for(i <- 0 until DataBytes){ 304 when(req.mask(i)){ 305 mask(entryIdx)(wordOffset)(i) := true.B 306 // data(entryIdx)(wordOffset)(i) := req.data(i*8+7, i*8) 307 } 308 } 309 // check if vtag is the same, if not, trigger sbuffer flush 310 when(reqvtag =/= vtag(entryIdx)) { 311 XSDebug("reqvtag =/= sbufvtag req(vtag %x ptag %x) sbuffer(vtag %x ptag %x)\n", 312 reqvtag << OffsetWidth, 313 reqptag << OffsetWidth, 314 vtag(entryIdx) << OffsetWidth, 315 ptag(entryIdx) << OffsetWidth 316 ) 317 merge_need_uarch_drain := true.B 318 } 319 } 320 }) 321 } 322 323 for(((in, wordOffset), i) <- io.in.zip(Seq(firstWord, secondWord)).zipWithIndex){ 324 writeReq(i).valid := in.fire() 325 writeReq(i).bits.wordOffset := wordOffset 326 writeReq(i).bits.mask := in.bits.mask 327 writeReq(i).bits.data := in.bits.data 328 writeReq(i).bits.wline := in.bits.wline 329 val debug_insertIdx = if(i == 0) firstInsertIdx else secondInsertIdx 330 val insertVec = if(i == 0) firstInsertVec else secondInsertVec 331 assert(!((PopCount(insertVec) > 1.U) && in.fire())) 332 val insertIdx = OHToUInt(insertVec) 333 val flushMask = if(i == 0) true.B else !sameTag 334 accessIdx(i).valid := RegNext(in.fire()) 335 accessIdx(i).bits := RegNext(Mux(canMerge(i), mergeIdx(i), insertIdx)) 336 when(in.fire()){ 337 when(canMerge(i)){ 338 // writeReq(i).bits.idx := mergeIdx(i) 339 writeReq(i).bits.wvec := mergeVec(i) 340 mergeWordReq(in.bits, inptags(i), invtags(i), mergeIdx(i), mergeVec(i), wordOffset) 341 XSDebug(p"merge req $i to line [${mergeIdx(i)}]\n") 342 }.otherwise({ 343 // writeReq(i).bits.idx := insertIdx 344 writeReq(i).bits.wvec := insertVec 345 wordReqToBufLine(in.bits, inptags(i), invtags(i), insertIdx, insertVec, wordOffset, flushMask) 346 XSDebug(p"insert req $i to line[$insertIdx]\n") 347 assert(debug_insertIdx === insertIdx) 348 }) 349 } 350 } 351 352 353 for(i <- 0 until StoreBufferSize){ 354 XSDebug(stateVec(i).isValid(), 355 p"[$i] timeout:${cohCount(i)(EvictCountBits-1)} state:${stateVec(i)}\n" 356 ) 357 } 358 359 for((req, i) <- io.in.zipWithIndex){ 360 XSDebug(req.fire(), 361 p"accept req [$i]: " + 362 p"addr:${Hexadecimal(req.bits.addr)} " + 363 p"mask:${Binary(req.bits.mask)} " + 364 p"data:${Hexadecimal(req.bits.data)}\n" 365 ) 366 XSDebug(req.valid && !req.ready, 367 p"req [$i] blocked by sbuffer\n" 368 ) 369 } 370 371 // ---------------------- Send Dcache Req --------------------- 372 373 val sbuffer_empty = Cat(invalidMask).andR() 374 val sq_empty = !Cat(io.in.map(_.valid)).orR() 375 val empty = sbuffer_empty && sq_empty 376 val threshold = RegNext(io.csrCtrl.sbuffer_threshold +& 1.U) 377 val validCount = PopCount(activeMask) 378 val do_eviction = RegNext(validCount >= threshold || validCount === (StoreBufferSize-1).U, init = false.B) 379 require((StoreBufferThreshold + 1) <= StoreBufferSize) 380 381 XSDebug(p"validCount[$validCount]\n") 382 383 io.flush.empty := RegNext(empty && io.sqempty) 384 // lru.io.flush := sbuffer_state === x_drain_all && empty 385 switch(sbuffer_state){ 386 is(x_idle){ 387 when(io.flush.valid){ 388 sbuffer_state := x_drain_all 389 }.elsewhen(do_uarch_drain){ 390 sbuffer_state := x_drain_sbuffer 391 }.elsewhen(do_eviction){ 392 sbuffer_state := x_replace 393 } 394 } 395 is(x_drain_all){ 396 when(empty){ 397 sbuffer_state := x_idle 398 } 399 } 400 is(x_drain_sbuffer){ 401 when(io.flush.valid){ 402 sbuffer_state := x_drain_all 403 }.elsewhen(sbuffer_empty){ 404 sbuffer_state := x_idle 405 } 406 } 407 is(x_replace){ 408 when(io.flush.valid){ 409 sbuffer_state := x_drain_all 410 }.elsewhen(do_uarch_drain){ 411 sbuffer_state := x_drain_sbuffer 412 }.elsewhen(!do_eviction){ 413 sbuffer_state := x_idle 414 } 415 } 416 } 417 XSDebug(p"sbuffer state:${sbuffer_state} do eviction:${do_eviction} empty:${empty}\n") 418 419 def noSameBlockInflight(idx: UInt): Bool = { 420 // stateVec(idx) itself must not be s_inflight 421 !Cat(widthMap(i => inflightMask(i) && ptag(idx) === ptag(i))).orR() 422 } 423 424 def genSameBlockInflightMask(ptag_in: UInt): UInt = { 425 val mask = VecInit(widthMap(i => inflightMask(i) && ptag_in === ptag(i))).asUInt // quite slow, use it with care 426 assert(!(PopCount(mask) > 1.U)) 427 mask 428 } 429 430 def haveSameBlockInflight(ptag_in: UInt): Bool = { 431 genSameBlockInflightMask(ptag_in).orR 432 } 433 434 val need_drain = needDrain(sbuffer_state) 435 val need_replace = do_eviction || (sbuffer_state === x_replace) 436 val evictionIdx = Mux(missqReplayHasTimeOut, 437 missqReplayTimeOutIdxReg, 438 Mux(need_drain, 439 drainIdx, 440 Mux(cohHasTimeOut, cohTimeOutIdx, replaceIdx) 441 ) 442 ) 443 444 /* 445 If there is a inflight dcache req which has same ptag with evictionIdx's ptag, 446 current eviction should be blocked. 447 */ 448 val prepareValid = missqReplayHasTimeOut || 449 stateVec(evictionIdx).isDcacheReqCandidate() && (need_drain || cohHasTimeOut || need_replace) 450 assert(!(stateVec(evictionIdx).isDcacheReqCandidate && !noSameBlockInflight(evictionIdx))) 451 val prepareValidReg = RegInit(false.B) 452 // when canSendDcacheReq, send dcache req stored in pipeline reg to dcache 453 val canSendDcacheReq = io.dcache.req.ready || !prepareValidReg 454 // when willSendDcacheReq, read dcache req data and store them in a pipeline reg 455 willSendDcacheReq := prepareValid && canSendDcacheReq 456 when(io.dcache.req.fire()){ 457 prepareValidReg := false.B 458 } 459 when(canSendDcacheReq){ 460 prepareValidReg := prepareValid 461 } 462 when(willSendDcacheReq){ 463 stateVec(evictionIdx).state_inflight := true.B 464 stateVec(evictionIdx).w_timeout := false.B 465 // stateVec(evictionIdx).s_pipe_req := true.B 466 XSDebug(p"$evictionIdx will be sent to Dcache\n") 467 } 468 XSDebug(p"need drain:$need_drain cohHasTimeOut: $cohHasTimeOut need replace:$need_replace\n") 469 XSDebug(p"drainIdx:$drainIdx tIdx:$cohTimeOutIdx replIdx:$replaceIdx " + 470 p"blocked:${!noSameBlockInflight(evictionIdx)} v:${activeMask(evictionIdx)}\n") 471 XSDebug(p"prepareValid:$prepareValid evictIdx:$evictionIdx dcache ready:${io.dcache.req.ready}\n") 472 // Note: if other dcache req in the same block are inflight, 473 // the lru update may not accurate 474 accessIdx(StorePipelineWidth).valid := invalidMask(replaceIdx) || ( 475 need_replace && !need_drain && !cohHasTimeOut && !missqReplayHasTimeOut && canSendDcacheReq && activeMask(replaceIdx)) 476 accessIdx(StorePipelineWidth).bits := replaceIdx 477 val evictionIdxReg = RegEnable(evictionIdx, enable = willSendDcacheReq) 478 val evictionPTag = RegEnable(ptag(evictionIdx), enable = willSendDcacheReq) 479 val evictionVTag = RegEnable(vtag(evictionIdx), enable = willSendDcacheReq) 480 481 io.dcache.req.valid := prepareValidReg 482 io.dcache.req.bits := DontCare 483 io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR 484 io.dcache.req.bits.addr := getAddr(evictionPTag) 485 io.dcache.req.bits.vaddr := getAddr(evictionVTag) 486 io.dcache.req.bits.data := data(evictionIdxReg).asUInt 487 io.dcache.req.bits.mask := mask(evictionIdxReg).asUInt 488 io.dcache.req.bits.id := evictionIdxReg 489 490 when (io.dcache.req.fire()) { 491 assert(!(io.dcache.req.bits.vaddr === 0.U)) 492 assert(!(io.dcache.req.bits.addr === 0.U)) 493 } 494 495 XSDebug(io.dcache.req.fire(), 496 p"send buf [$evictionIdxReg] to Dcache, req fire\n" 497 ) 498 499 // update sbuffer status according to dcache resp source 500 501 def id_to_sbuffer_id(id: UInt): UInt = { 502 require(id.getWidth >= log2Up(StoreBufferSize)) 503 id(log2Up(StoreBufferSize)-1, 0) 504 } 505 506 // hit resp 507 io.dcache.hit_resps.map(resp => { 508 val dcache_resp_id = resp.bits.id 509 when (resp.fire()) { 510 stateVec(dcache_resp_id).state_inflight := false.B 511 stateVec(dcache_resp_id).state_valid := false.B 512 assert(!resp.bits.replay) 513 assert(!resp.bits.miss) // not need to resp if miss, to be opted 514 assert(stateVec(dcache_resp_id).state_inflight === true.B) 515 } 516 517 // Update w_sameblock_inflight flag is delayed for 1 cycle 518 // 519 // When a new req allocate a new line in sbuffer, sameblock_inflight check will ignore 520 // current dcache.hit_resps. Then, in the next cycle, we have plenty of time to check 521 // if the same block is still inflight 522 (0 until StoreBufferSize).map(i => { 523 when( 524 stateVec(i).w_sameblock_inflight && 525 stateVec(i).state_valid && 526 RegNext(resp.fire()) && 527 waitInflightMask(i) === UIntToOH(RegNext(id_to_sbuffer_id(dcache_resp_id))) 528 ){ 529 stateVec(i).w_sameblock_inflight := false.B 530 } 531 }) 532 }) 533 534 535 // replay resp 536 val replay_resp_id = io.dcache.replay_resp.bits.id 537 when (io.dcache.replay_resp.fire()) { 538 missqReplayCount(replay_resp_id) := 0.U 539 stateVec(replay_resp_id).w_timeout := true.B 540 // waiting for timeout 541 assert(io.dcache.replay_resp.bits.replay) 542 assert(stateVec(replay_resp_id).state_inflight === true.B) 543 } 544 545 // TODO: reuse cohCount 546 (0 until StoreBufferSize).map(i => { 547 when(stateVec(i).w_timeout && stateVec(i).state_inflight && !missqReplayCount(i)(MissqReplayCountBits-1)) { 548 missqReplayCount(i) := missqReplayCount(i) + 1.U 549 } 550 when(activeMask(i) && !cohTimeOutMask(i)){ 551 cohCount(i) := cohCount(i)+1.U 552 } 553 }) 554 555 if (env.EnableDifftest) { 556 // hit resp 557 io.dcache.hit_resps.zipWithIndex.map{case (resp, index) => { 558 val difftest = Module(new DifftestSbufferEvent) 559 val dcache_resp_id = resp.bits.id 560 difftest.io.clock := clock 561 difftest.io.coreid := io.hartId 562 difftest.io.index := index.U 563 difftest.io.sbufferResp := RegNext(resp.fire()) 564 difftest.io.sbufferAddr := RegNext(getAddr(ptag(dcache_resp_id))) 565 difftest.io.sbufferData := RegNext(data(dcache_resp_id).asTypeOf(Vec(CacheLineBytes, UInt(8.W)))) 566 difftest.io.sbufferMask := RegNext(mask(dcache_resp_id).asUInt) 567 }} 568 } 569 570 // ---------------------- Load Data Forward --------------------- 571 val mismatch = Wire(Vec(LoadPipelineWidth, Bool())) 572 XSPerfAccumulate("vaddr_match_failed", mismatch(0) || mismatch(1)) 573 for ((forward, i) <- io.forward.zipWithIndex) { 574 val vtag_matches = VecInit(widthMap(w => vtag(w) === getVTag(forward.vaddr))) 575 val ptag_matches = VecInit(widthMap(w => ptag(w) === getPTag(forward.paddr))) 576 val tag_matches = vtag_matches 577 val tag_mismatch = RegNext(forward.valid) && VecInit(widthMap(w => 578 RegNext(vtag_matches(w)) =/= RegNext(ptag_matches(w)) && RegNext((activeMask(w) || inflightMask(w))) 579 )).asUInt.orR 580 mismatch(i) := tag_mismatch 581 when (tag_mismatch) { 582 XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n", 583 RegNext(ptag_matches.asUInt), 584 RegNext(vtag_matches.asUInt), 585 RegNext(forward.vaddr), 586 RegNext(forward.paddr) 587 ) 588 forward_need_uarch_drain := true.B 589 } 590 val valid_tag_matches = widthMap(w => tag_matches(w) && activeMask(w)) 591 val inflight_tag_matches = widthMap(w => tag_matches(w) && inflightMask(w)) 592 val line_offset_mask = UIntToOH(getWordOffset(forward.paddr)) 593 594 val valid_tag_match_reg = valid_tag_matches.map(RegNext(_)) 595 val inflight_tag_match_reg = inflight_tag_matches.map(RegNext(_)) 596 val line_offset_reg = RegNext(line_offset_mask) 597 val forward_mask_candidate_reg = RegEnable( 598 VecInit(mask.map(entry => entry(getWordOffset(forward.paddr)))), 599 forward.valid 600 ) 601 val forward_data_candidate_reg = RegEnable( 602 VecInit(data.map(entry => entry(getWordOffset(forward.paddr)))), 603 forward.valid 604 ) 605 606 val selectedValidMask = Mux1H(valid_tag_match_reg, forward_mask_candidate_reg) 607 val selectedValidData = Mux1H(valid_tag_match_reg, forward_data_candidate_reg) 608 selectedValidMask.suggestName("selectedValidMask_"+i) 609 selectedValidData.suggestName("selectedValidData_"+i) 610 611 val selectedInflightMask = Mux1H(inflight_tag_match_reg, forward_mask_candidate_reg) 612 val selectedInflightData = Mux1H(inflight_tag_match_reg, forward_data_candidate_reg) 613 selectedInflightMask.suggestName("selectedInflightMask_"+i) 614 selectedInflightData.suggestName("selectedInflightData_"+i) 615 616 // currently not being used 617 val selectedInflightMaskFast = Mux1H(line_offset_mask, Mux1H(inflight_tag_matches, mask).asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))) 618 val selectedValidMaskFast = Mux1H(line_offset_mask, Mux1H(valid_tag_matches, mask).asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))) 619 620 forward.dataInvalid := false.B // data in store line merge buffer is always ready 621 forward.matchInvalid := tag_mismatch // paddr / vaddr cam result does not match 622 for (j <- 0 until DataBytes) { 623 forward.forwardMask(j) := false.B 624 forward.forwardData(j) := DontCare 625 626 // valid entries have higher priority than inflight entries 627 when(selectedInflightMask(j)) { 628 forward.forwardMask(j) := true.B 629 forward.forwardData(j) := selectedInflightData(j) 630 } 631 when(selectedValidMask(j)) { 632 forward.forwardMask(j) := true.B 633 forward.forwardData(j) := selectedValidData(j) 634 } 635 636 forward.forwardMaskFast(j) := selectedInflightMaskFast(j) || selectedValidMaskFast(j) 637 } 638 } 639 640 for (i <- 0 until StoreBufferSize) { 641 XSDebug("sbf entry " + i + " : ptag %x vtag %x valid %x active %x inflight %x w_timeout %x\n", 642 ptag(i) << OffsetWidth, 643 vtag(i) << OffsetWidth, 644 stateVec(i).isValid(), 645 activeMask(i), 646 inflightMask(i), 647 stateVec(i).w_timeout 648 ) 649 } 650 651 val perf_valid_entry_count = PopCount(VecInit(stateVec.map(s => !s.isInvalid())).asUInt) 652 XSPerfHistogram("util", perf_valid_entry_count, true.B, 0, StoreBufferSize, 1) 653 XSPerfAccumulate("sbuffer_req_valid", PopCount(VecInit(io.in.map(_.valid)).asUInt)) 654 XSPerfAccumulate("sbuffer_req_fire", PopCount(VecInit(io.in.map(_.fire())).asUInt)) 655 XSPerfAccumulate("sbuffer_merge", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && canMerge(i)})).asUInt)) 656 XSPerfAccumulate("sbuffer_newline", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && !canMerge(i)})).asUInt)) 657 XSPerfAccumulate("dcache_req_valid", io.dcache.req.valid) 658 XSPerfAccumulate("dcache_req_fire", io.dcache.req.fire()) 659 XSPerfAccumulate("sbuffer_idle", sbuffer_state === x_idle) 660 XSPerfAccumulate("sbuffer_flush", sbuffer_state === x_drain_sbuffer) 661 XSPerfAccumulate("sbuffer_replace", sbuffer_state === x_replace) 662 XSPerfAccumulate("evenCanInsert", evenCanInsert) 663 XSPerfAccumulate("oddCanInsert", oddCanInsert) 664 XSPerfAccumulate("mainpipe_resp_valid", io.dcache.main_pipe_hit_resp.fire()) 665 XSPerfAccumulate("refill_resp_valid", io.dcache.refill_hit_resp.fire()) 666 XSPerfAccumulate("replay_resp_valid", io.dcache.replay_resp.fire()) 667 XSPerfAccumulate("coh_timeout", cohHasTimeOut) 668 669 // val (store_latency_sample, store_latency) = TransactionLatencyCounter(io.lsu.req.fire(), io.lsu.resp.fire()) 670 // XSPerfHistogram("store_latency", store_latency, store_latency_sample, 0, 100, 10) 671 // XSPerfAccumulate("store_req", io.lsu.req.fire()) 672 673 val perfEvents = Seq( 674 ("sbuffer_req_valid ", PopCount(VecInit(io.in.map(_.valid)).asUInt) ), 675 ("sbuffer_req_fire ", PopCount(VecInit(io.in.map(_.fire())).asUInt) ), 676 ("sbuffer_merge ", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && canMerge(i)})).asUInt) ), 677 ("sbuffer_newline ", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && !canMerge(i)})).asUInt) ), 678 ("dcache_req_valid ", io.dcache.req.valid ), 679 ("dcache_req_fire ", io.dcache.req.fire() ), 680 ("sbuffer_idle ", sbuffer_state === x_idle ), 681 ("sbuffer_flush ", sbuffer_state === x_drain_sbuffer ), 682 ("sbuffer_replace ", sbuffer_state === x_replace ), 683 ("mpipe_resp_valid ", io.dcache.main_pipe_hit_resp.fire() ), 684 ("refill_resp_valid ", io.dcache.refill_hit_resp.fire() ), 685 ("replay_resp_valid ", io.dcache.replay_resp.fire() ), 686 ("coh_timeout ", cohHasTimeOut ), 687 ("sbuffer_1_4_valid ", (perf_valid_entry_count < (StoreBufferSize.U/4.U)) ), 688 ("sbuffer_2_4_valid ", (perf_valid_entry_count > (StoreBufferSize.U/4.U)) & (perf_valid_entry_count <= (StoreBufferSize.U/2.U)) ), 689 ("sbuffer_3_4_valid ", (perf_valid_entry_count > (StoreBufferSize.U/2.U)) & (perf_valid_entry_count <= (StoreBufferSize.U*3.U/4.U))), 690 ("sbuffer_full_valid", (perf_valid_entry_count > (StoreBufferSize.U*3.U/4.U))) 691 ) 692 generatePerfEvent() 693 694} 695