1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import xiangshan._ 23import utils._ 24import xiangshan.cache._ 25import difftest._ 26import freechips.rocketchip.util._ 27 28class SbufferFlushBundle extends Bundle { 29 val valid = Output(Bool()) 30 val empty = Input(Bool()) 31} 32 33trait HasSbufferConst extends HasXSParameter { 34 val EvictCycles = 1 << 20 35 val SbufferReplayDelayCycles = 16 36 require(isPow2(EvictCycles)) 37 val EvictCountBits = log2Up(EvictCycles+1) 38 val MissqReplayCountBits = log2Up(SbufferReplayDelayCycles) + 1 39 40 val SbufferIndexWidth: Int = log2Up(StoreBufferSize) 41 // paddr = ptag + offset 42 val CacheLineBytes: Int = CacheLineSize / 8 43 val CacheLineWords: Int = CacheLineBytes / DataBytes 44 val OffsetWidth: Int = log2Up(CacheLineBytes) 45 val WordsWidth: Int = log2Up(CacheLineWords) 46 val PTagWidth: Int = PAddrBits - OffsetWidth 47 val VTagWidth: Int = VAddrBits - OffsetWidth 48 val WordOffsetWidth: Int = PAddrBits - WordsWidth 49} 50 51class SbufferEntryState (implicit p: Parameters) extends SbufferBundle { 52 val state_valid = Bool() // this entry is active 53 val state_inflight = Bool() // sbuffer is trying to write this entry to dcache 54 val w_timeout = Bool() // with timeout resp, waiting for resend store pipeline req timeout 55 val w_sameblock_inflight = Bool() // same cache block dcache req is inflight 56 val s_recheck_inflight = Bool() // recheck if same cache block dcache req is inflight 57 58 def isInvalid(): Bool = !state_valid 59 def isValid(): Bool = state_valid 60 def isActive(): Bool = state_valid && !state_inflight 61 def isInflight(): Bool = state_inflight 62 def isDcacheReqCandidate(): Bool = state_valid && !state_inflight && !w_sameblock_inflight 63} 64 65class SbufferBundle(implicit p: Parameters) extends XSBundle with HasSbufferConst 66 67class DataWriteReq(implicit p: Parameters) extends SbufferBundle { 68 // val idx = UInt(SbufferIndexWidth.W) 69 val wvec = UInt(StoreBufferSize.W) 70 val mask = UInt((DataBits/8).W) 71 val data = UInt(DataBits.W) 72 val wordOffset = UInt(WordOffsetWidth.W) 73 val wline = Bool() 74} 75 76class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst { 77 val io = IO(new Bundle(){ 78 val writeReq = Vec(EnsbufferWidth, Flipped(ValidIO(new DataWriteReq))) 79 val dataOut = Output(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) 80 }) 81 82 val data = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) 83 84 val req = io.writeReq 85 86 for(i <- 0 until EnsbufferWidth) { 87 when(req(i).valid){ 88 for(line <- 0 until StoreBufferSize){ 89 for(word <- 0 until CacheLineWords){ 90 for(byte <- 0 until DataBytes){ 91 when( 92 req(i).bits.wvec(line) && ( 93 req(i).bits.mask(byte) && (req(i).bits.wordOffset(WordsWidth-1, 0) === word.U) || 94 req(i).bits.wline 95 ) 96 ){ 97 data(line)(word)(byte) := req(i).bits.data(byte*8+7, byte*8) 98 } 99 } 100 } 101 } 102 } 103 } 104 105 io.dataOut := data 106} 107 108class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst with HasPerfEvents { 109 val io = IO(new Bundle() { 110 val hartId = Input(UInt(8.W)) 111 val in = Vec(EnsbufferWidth, Flipped(Decoupled(new DCacheWordReqWithVaddr))) 112 val dcache = Flipped(new DCacheToSbufferIO) 113 val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) 114 val sqempty = Input(Bool()) 115 val flush = Flipped(new SbufferFlushBundle) 116 val csrCtrl = Flipped(new CustomCSRCtrlIO) 117 }) 118 119 val dataModule = Module(new SbufferData) 120 dataModule.io.writeReq <> DontCare 121 val writeReq = dataModule.io.writeReq 122 123 val ptag = Reg(Vec(StoreBufferSize, UInt(PTagWidth.W))) 124 val vtag = Reg(Vec(StoreBufferSize, UInt(VTagWidth.W))) 125 val mask = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, Bool())))) 126 val waitInflightMask = Reg(Vec(StoreBufferSize, UInt(StoreBufferSize.W))) 127 val data = dataModule.io.dataOut 128 val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U.asTypeOf(new SbufferEntryState)))) 129 val cohCount = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(EvictCountBits.W)))) 130 val missqReplayCount = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(MissqReplayCountBits.W)))) 131 132 val willSendDcacheReq = Wire(Bool()) 133 134 /* 135 idle --[flush] --> drain --[buf empty]--> idle 136 --[buf full]--> replace --[dcache resp]--> idle 137 */ 138 // x_drain_all: drain store queue and sbuffer 139 // x_drain_sbuffer: drain sbuffer only, block store queue to sbuffer write 140 val x_idle :: x_replace :: x_drain_all :: x_drain_sbuffer :: Nil = Enum(4) 141 def needDrain(state: UInt): Bool = 142 state(1) 143 val sbuffer_state = RegInit(x_idle) 144 145 // ---------------------- Store Enq Sbuffer --------------------- 146 147 def getPTag(pa: UInt): UInt = 148 pa(PAddrBits - 1, PAddrBits - PTagWidth) 149 150 def getVTag(va: UInt): UInt = 151 va(VAddrBits - 1, VAddrBits - VTagWidth) 152 153 def getWord(pa: UInt): UInt = 154 pa(PAddrBits-1, 3) 155 156 def getWordOffset(pa: UInt): UInt = 157 pa(OffsetWidth-1, 3) 158 159 def getAddr(ptag: UInt): UInt = 160 Cat(ptag, 0.U((PAddrBits - PTagWidth).W)) 161 162 def getByteOffset(offect: UInt): UInt = 163 Cat(offect(OffsetWidth - 1, 3), 0.U(3.W)) 164 165 def isOneOf(key: UInt, seq: Seq[UInt]): Bool = 166 if(seq.isEmpty) false.B else Cat(seq.map(_===key)).orR() 167 168 def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f 169 170 // sbuffer entry count 171 172 val plru = new PseudoLRU(StoreBufferSize) 173 val accessIdx = Wire(Vec(EnsbufferWidth + 1, Valid(UInt(SbufferIndexWidth.W)))) 174 175 val replaceIdx = plru.way 176 plru.access(accessIdx) 177 178 //-------------------------cohCount----------------------------- 179 // insert and merge: cohCount=0 180 // every cycle cohCount+=1 181 // if cohCount(EvictCountBits-1)==1, evict 182 val cohTimeOutMask = VecInit(widthMap(i => cohCount(i)(EvictCountBits - 1) && stateVec(i).isActive())) 183 val (cohTimeOutIdx, cohHasTimeOut) = PriorityEncoderWithFlag(cohTimeOutMask) 184 val missqReplayTimeOutMask = VecInit(widthMap(i => missqReplayCount(i)(MissqReplayCountBits - 1) && stateVec(i).w_timeout)) 185 val (missqReplayTimeOutIdx, missqReplayMayHasTimeOut) = PriorityEncoderWithFlag(missqReplayTimeOutMask) 186 val missqReplayHasTimeOut = RegNext(missqReplayMayHasTimeOut) && !RegNext(willSendDcacheReq) 187 val missqReplayTimeOutIdxReg = RegEnable(missqReplayTimeOutIdx, missqReplayMayHasTimeOut) 188 189 val activeMask = VecInit(stateVec.map(s => s.isActive())) 190 val drainIdx = PriorityEncoder(activeMask) 191 192 val inflightMask = VecInit(stateVec.map(s => s.isInflight())) 193 194 val inptags = io.in.map(in => getPTag(in.bits.addr)) 195 val invtags = io.in.map(in => getVTag(in.bits.vaddr)) 196 val sameTag = Seq.tabulate(io.in.length)(x => Seq.tabulate(io.in.length)(y => inptags(x) === inptags(y))) 197 val words = (0 until EnsbufferWidth).map(i => getWord(io.in(i).bits.addr)) 198 val sameWord = Seq.tabulate(EnsbufferWidth)(x => Seq.tabulate(EnsbufferWidth)(y => words(x) === words(y))) 199 200 // merge condition 201 val mergeMask = Wire(Vec(EnsbufferWidth, Vec(StoreBufferSize, Bool()))) 202 val mergeIdx = mergeMask.map(PriorityEncoder(_)) // avoid using mergeIdx for better timing 203 val canMerge = mergeMask.map(ParallelOR(_)) 204 val mergeVec = mergeMask.map(_.asUInt) 205 206 for(i <- 0 until EnsbufferWidth){ 207 mergeMask(i) := widthMap(j => 208 inptags(i) === ptag(j) && activeMask(j) 209 ) 210 assert(!(PopCount(mergeMask(i).asUInt) > 1.U && io.in(i).fire())) 211 } 212 213 // insert condition 214 // firstInsert: the first invalid entry 215 // if first entry canMerge or second entry has the same ptag with the first entry, 216 // secondInsert equal the first invalid entry, otherwise, the second invalid entry 217 val invalidMask = VecInit(stateVec.map(s => s.isInvalid())) 218 val remInvalidMask = GetRemBits(EnsbufferWidth)(invalidMask.asUInt) 219 220 def getFirstOneOH(input: UInt): UInt = { 221 assert(input.getWidth > 1) 222 val output = WireInit(VecInit(input.asBools)) 223 (1 until input.getWidth).map(i => { 224 output(i) := !input(i - 1, 0).orR && input(i) 225 }) 226 output.asUInt 227 } 228 229 val remRawInsertVec = remInvalidMask.map(getFirstOneOH(_)) 230 val remRawInsert = remInvalidMask.map(PriorityEncoderWithFlag(_)).unzip 231 val (remRawInsertIdx, remCanInsert) = (remRawInsert._1, VecInit(remRawInsert._2)) 232 val remInsertIdx = VecInit(remRawInsertIdx.zipWithIndex.map { case (raw, idx) => 233 if (EnsbufferWidth > 1) Cat(raw, idx.U(log2Ceil(EnsbufferWidth).W)) 234 else raw 235 }) // slow to generate, for debug only 236 val remInsertVec = VecInit(GetRemBits.reverse(EnsbufferWidth)(remRawInsertVec)) 237 238 val enbufferSelReg = RegInit(0.U(log2Up(EnsbufferWidth).W)) 239 if (EnsbufferWidth > 1) when(io.in(0).valid) { 240 enbufferSelReg := enbufferSelReg + 1.U 241 } 242 243 val insertIdxs = (0 until EnsbufferWidth).map(i => 244 PriorityMuxDefault(if (i == 0) Seq(0.B -> 0.U) else (0 until i).map(j => sameTag(i)(j) -> remInsertIdx(enbufferSelReg + j.U)), remInsertIdx(enbufferSelReg + i.U)) 245 ) // slow to generate, for debug only 246 val insertVecs = (0 until EnsbufferWidth).map(i => 247 PriorityMuxDefault(if (i == 0) Seq(0.B -> 0.U) else (0 until i).map(j => sameTag(i)(j) -> remInsertVec(enbufferSelReg + j.U)), remInsertVec(enbufferSelReg + i.U)) 248 ) // slow to generate, for debug only 249 val canInserts = (0 until EnsbufferWidth).map(i => 250 PriorityMuxDefault(if (i == 0) Seq(0.B -> 0.B) else (0 until i).map(j => sameTag(i)(j) -> remCanInsert(enbufferSelReg + j.U)), remCanInsert(enbufferSelReg + i.U)) 251 ).map(_ && sbuffer_state =/= x_drain_sbuffer) 252 val forward_need_uarch_drain = WireInit(false.B) 253 val merge_need_uarch_drain = WireInit(false.B) 254 val do_uarch_drain = RegNext(forward_need_uarch_drain) || RegNext(RegNext(merge_need_uarch_drain)) 255 XSPerfAccumulate("do_uarch_drain", do_uarch_drain) 256 257 (0 until EnsbufferWidth).foreach(i => 258 io.in(i).ready := canInserts(i) && (if (i == 0) 1.B else !sameWord(0)(i) && io.in(i - 1).ready) 259 ) 260 261 def wordReqToBufLine(req: DCacheWordReq, reqptag: UInt, reqvtag: UInt, insertIdx: UInt, insertVec: UInt, wordOffset: UInt, flushMask: Bool): Unit = { 262 assert(UIntToOH(insertIdx) === insertVec) 263 val sameBlockInflightMask = genSameBlockInflightMask(reqptag) 264 (0 until StoreBufferSize).map(entryIdx => { 265 when(insertVec(entryIdx)){ 266 stateVec(entryIdx).state_valid := true.B 267 stateVec(entryIdx).w_sameblock_inflight := sameBlockInflightMask.orR // set w_sameblock_inflight when a line is first allocated 268 when(sameBlockInflightMask.orR){ 269 waitInflightMask(entryIdx) := sameBlockInflightMask 270 } 271 cohCount(entryIdx) := 0.U 272 // missqReplayCount(insertIdx) := 0.U 273 ptag(entryIdx) := reqptag 274 vtag(entryIdx) := reqvtag // update vtag iff a new sbuffer line is allocated 275 when(flushMask){ 276 for(j <- 0 until CacheLineWords){ 277 for(i <- 0 until DataBytes){ 278 mask(entryIdx)(j)(i) := false.B 279 } 280 } 281 } 282 for(i <- 0 until DataBytes){ 283 when(req.mask(i)){ 284 mask(entryIdx)(wordOffset)(i) := true.B 285 } 286 } 287 } 288 }) 289 } 290 291 def mergeWordReq(req: DCacheWordReq, reqptag: UInt, reqvtag: UInt, mergeIdx: UInt, mergeVec: UInt, wordOffset: UInt): Unit = { 292 assert(UIntToOH(mergeIdx) === mergeVec) 293 (0 until StoreBufferSize).map(entryIdx => { 294 when(mergeVec(entryIdx)) { 295 cohCount(entryIdx) := 0.U 296 // missqReplayCount(entryIdx) := 0.U 297 for(i <- 0 until DataBytes){ 298 when(req.mask(i)){ 299 mask(entryIdx)(wordOffset)(i) := true.B 300 // data(entryIdx)(wordOffset)(i) := req.data(i*8+7, i*8) 301 } 302 } 303 // check if vtag is the same, if not, trigger sbuffer flush 304 when(reqvtag =/= vtag(entryIdx)) { 305 XSDebug("reqvtag =/= sbufvtag req(vtag %x ptag %x) sbuffer(vtag %x ptag %x)\n", 306 reqvtag << OffsetWidth, 307 reqptag << OffsetWidth, 308 vtag(entryIdx) << OffsetWidth, 309 ptag(entryIdx) << OffsetWidth 310 ) 311 merge_need_uarch_drain := true.B 312 } 313 } 314 }) 315 } 316 317 for(((in, wordOffset), i) <- io.in.zip(words).zipWithIndex){ 318 writeReq(i).valid := in.fire() 319 writeReq(i).bits.wordOffset := wordOffset 320 writeReq(i).bits.mask := in.bits.mask 321 writeReq(i).bits.data := in.bits.data 322 writeReq(i).bits.wline := in.bits.wline 323 val debug_insertIdx = insertIdxs(i) 324 val insertVec = insertVecs(i) 325 assert(!((PopCount(insertVec) > 1.U) && in.fire())) 326 val insertIdx = OHToUInt(insertVec) 327 val flushMask = if(i == 0) true.B else (0 until i).map(j => !sameTag(i)(j)).reduce(_ && _) 328 flushMask.suggestName(s"flushMask_${i}") 329 accessIdx(i).valid := RegNext(in.fire()) 330 accessIdx(i).bits := RegNext(Mux(canMerge(i), mergeIdx(i), insertIdx)) 331 when(in.fire()){ 332 when(canMerge(i)){ 333 // writeReq(i).bits.idx := mergeIdx(i) 334 writeReq(i).bits.wvec := mergeVec(i) 335 mergeWordReq(in.bits, inptags(i), invtags(i), mergeIdx(i), mergeVec(i), wordOffset) 336 XSDebug(p"merge req $i to line [${mergeIdx(i)}]\n") 337 }.otherwise({ 338 // writeReq(i).bits.idx := insertIdx 339 writeReq(i).bits.wvec := insertVec 340 wordReqToBufLine(in.bits, inptags(i), invtags(i), insertIdx, insertVec, wordOffset, flushMask) 341 XSDebug(p"insert req $i to line[$insertIdx]\n") 342 assert(debug_insertIdx === insertIdx) 343 }) 344 } 345 } 346 347 348 for(i <- 0 until StoreBufferSize){ 349 XSDebug(stateVec(i).isValid(), 350 p"[$i] timeout:${cohCount(i)(EvictCountBits-1)} state:${stateVec(i)}\n" 351 ) 352 } 353 354 for((req, i) <- io.in.zipWithIndex){ 355 XSDebug(req.fire(), 356 p"accept req [$i]: " + 357 p"addr:${Hexadecimal(req.bits.addr)} " + 358 p"mask:${Binary(req.bits.mask)} " + 359 p"data:${Hexadecimal(req.bits.data)}\n" 360 ) 361 XSDebug(req.valid && !req.ready, 362 p"req [$i] blocked by sbuffer\n" 363 ) 364 } 365 366 // ---------------------- Send Dcache Req --------------------- 367 368 val sbuffer_empty = Cat(invalidMask).andR() 369 val sq_empty = !Cat(io.in.map(_.valid)).orR() 370 val empty = sbuffer_empty && sq_empty 371 val threshold = RegNext(io.csrCtrl.sbuffer_threshold +& 1.U) 372 val validCount = PopCount(activeMask) 373 val do_eviction = RegNext(validCount >= threshold || validCount === (StoreBufferSize-1).U, init = false.B) 374 require((StoreBufferThreshold + 1) <= StoreBufferSize) 375 376 XSDebug(p"validCount[$validCount]\n") 377 378 io.flush.empty := RegNext(empty && io.sqempty) 379 // lru.io.flush := sbuffer_state === x_drain_all && empty 380 switch(sbuffer_state){ 381 is(x_idle){ 382 when(io.flush.valid){ 383 sbuffer_state := x_drain_all 384 }.elsewhen(do_uarch_drain){ 385 sbuffer_state := x_drain_sbuffer 386 }.elsewhen(do_eviction){ 387 sbuffer_state := x_replace 388 } 389 } 390 is(x_drain_all){ 391 when(empty){ 392 sbuffer_state := x_idle 393 } 394 } 395 is(x_drain_sbuffer){ 396 when(io.flush.valid){ 397 sbuffer_state := x_drain_all 398 }.elsewhen(sbuffer_empty){ 399 sbuffer_state := x_idle 400 } 401 } 402 is(x_replace){ 403 when(io.flush.valid){ 404 sbuffer_state := x_drain_all 405 }.elsewhen(do_uarch_drain){ 406 sbuffer_state := x_drain_sbuffer 407 }.elsewhen(!do_eviction){ 408 sbuffer_state := x_idle 409 } 410 } 411 } 412 XSDebug(p"sbuffer state:${sbuffer_state} do eviction:${do_eviction} empty:${empty}\n") 413 414 def noSameBlockInflight(idx: UInt): Bool = { 415 // stateVec(idx) itself must not be s_inflight 416 !Cat(widthMap(i => inflightMask(i) && ptag(idx) === ptag(i))).orR() 417 } 418 419 def genSameBlockInflightMask(ptag_in: UInt): UInt = { 420 val mask = VecInit(widthMap(i => inflightMask(i) && ptag_in === ptag(i))).asUInt // quite slow, use it with care 421 assert(!(PopCount(mask) > 1.U)) 422 mask 423 } 424 425 def haveSameBlockInflight(ptag_in: UInt): Bool = { 426 genSameBlockInflightMask(ptag_in).orR 427 } 428 429 val need_drain = needDrain(sbuffer_state) 430 val need_replace = do_eviction || (sbuffer_state === x_replace) 431 val evictionIdx = Mux(missqReplayHasTimeOut, 432 missqReplayTimeOutIdxReg, 433 Mux(need_drain, 434 drainIdx, 435 Mux(cohHasTimeOut, cohTimeOutIdx, replaceIdx) 436 ) 437 ) 438 439 /* 440 If there is a inflight dcache req which has same ptag with evictionIdx's ptag, 441 current eviction should be blocked. 442 */ 443 val prepareValid = missqReplayHasTimeOut || 444 stateVec(evictionIdx).isDcacheReqCandidate() && (need_drain || cohHasTimeOut || need_replace) 445 assert(!(stateVec(evictionIdx).isDcacheReqCandidate && !noSameBlockInflight(evictionIdx))) 446 val prepareValidReg = RegInit(false.B) 447 // when canSendDcacheReq, send dcache req stored in pipeline reg to dcache 448 val canSendDcacheReq = io.dcache.req.ready || !prepareValidReg 449 // when willSendDcacheReq, read dcache req data and store them in a pipeline reg 450 willSendDcacheReq := prepareValid && canSendDcacheReq 451 when(io.dcache.req.fire()){ 452 prepareValidReg := false.B 453 } 454 when(canSendDcacheReq){ 455 prepareValidReg := prepareValid 456 } 457 when(willSendDcacheReq){ 458 stateVec(evictionIdx).state_inflight := true.B 459 stateVec(evictionIdx).w_timeout := false.B 460 // stateVec(evictionIdx).s_pipe_req := true.B 461 XSDebug(p"$evictionIdx will be sent to Dcache\n") 462 } 463 XSDebug(p"need drain:$need_drain cohHasTimeOut: $cohHasTimeOut need replace:$need_replace\n") 464 XSDebug(p"drainIdx:$drainIdx tIdx:$cohTimeOutIdx replIdx:$replaceIdx " + 465 p"blocked:${!noSameBlockInflight(evictionIdx)} v:${activeMask(evictionIdx)}\n") 466 XSDebug(p"prepareValid:$prepareValid evictIdx:$evictionIdx dcache ready:${io.dcache.req.ready}\n") 467 // Note: if other dcache req in the same block are inflight, 468 // the lru update may not accurate 469 accessIdx(EnsbufferWidth).valid := invalidMask(replaceIdx) || ( 470 need_replace && !need_drain && !cohHasTimeOut && !missqReplayHasTimeOut && canSendDcacheReq && activeMask(replaceIdx)) 471 accessIdx(EnsbufferWidth).bits := replaceIdx 472 val evictionIdxReg = RegEnable(evictionIdx, willSendDcacheReq) 473 val evictionPTag = RegEnable(ptag(evictionIdx), willSendDcacheReq) 474 val evictionVTag = RegEnable(vtag(evictionIdx), willSendDcacheReq) 475 476 io.dcache.req.valid := prepareValidReg 477 io.dcache.req.bits := DontCare 478 io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR 479 io.dcache.req.bits.addr := getAddr(evictionPTag) 480 io.dcache.req.bits.vaddr := getAddr(evictionVTag) 481 io.dcache.req.bits.data := data(evictionIdxReg).asUInt 482 io.dcache.req.bits.mask := mask(evictionIdxReg).asUInt 483 io.dcache.req.bits.id := evictionIdxReg 484 485 when (io.dcache.req.fire()) { 486 assert(!(io.dcache.req.bits.vaddr === 0.U)) 487 assert(!(io.dcache.req.bits.addr === 0.U)) 488 } 489 490 XSDebug(io.dcache.req.fire(), 491 p"send buf [$evictionIdxReg] to Dcache, req fire\n" 492 ) 493 494 // update sbuffer status according to dcache resp source 495 496 def id_to_sbuffer_id(id: UInt): UInt = { 497 require(id.getWidth >= log2Up(StoreBufferSize)) 498 id(log2Up(StoreBufferSize)-1, 0) 499 } 500 501 // hit resp 502 io.dcache.hit_resps.map(resp => { 503 val dcache_resp_id = resp.bits.id 504 when (resp.fire()) { 505 stateVec(dcache_resp_id).state_inflight := false.B 506 stateVec(dcache_resp_id).state_valid := false.B 507 assert(!resp.bits.replay) 508 assert(!resp.bits.miss) // not need to resp if miss, to be opted 509 assert(stateVec(dcache_resp_id).state_inflight === true.B) 510 } 511 512 // Update w_sameblock_inflight flag is delayed for 1 cycle 513 // 514 // When a new req allocate a new line in sbuffer, sameblock_inflight check will ignore 515 // current dcache.hit_resps. Then, in the next cycle, we have plenty of time to check 516 // if the same block is still inflight 517 (0 until StoreBufferSize).map(i => { 518 when( 519 stateVec(i).w_sameblock_inflight && 520 stateVec(i).state_valid && 521 RegNext(resp.fire()) && 522 waitInflightMask(i) === UIntToOH(RegNext(id_to_sbuffer_id(dcache_resp_id))) 523 ){ 524 stateVec(i).w_sameblock_inflight := false.B 525 } 526 }) 527 }) 528 529 530 // replay resp 531 val replay_resp_id = io.dcache.replay_resp.bits.id 532 when (io.dcache.replay_resp.fire()) { 533 missqReplayCount(replay_resp_id) := 0.U 534 stateVec(replay_resp_id).w_timeout := true.B 535 // waiting for timeout 536 assert(io.dcache.replay_resp.bits.replay) 537 assert(stateVec(replay_resp_id).state_inflight === true.B) 538 } 539 540 // TODO: reuse cohCount 541 (0 until StoreBufferSize).map(i => { 542 when(stateVec(i).w_timeout && stateVec(i).state_inflight && !missqReplayCount(i)(MissqReplayCountBits-1)) { 543 missqReplayCount(i) := missqReplayCount(i) + 1.U 544 } 545 when(activeMask(i) && !cohTimeOutMask(i)){ 546 cohCount(i) := cohCount(i)+1.U 547 } 548 }) 549 550 if (env.EnableDifftest) { 551 // hit resp 552 io.dcache.hit_resps.zipWithIndex.map{case (resp, index) => { 553 val difftest = Module(new DifftestSbufferEvent) 554 val dcache_resp_id = resp.bits.id 555 difftest.io.clock := clock 556 difftest.io.coreid := io.hartId 557 difftest.io.index := index.U 558 difftest.io.sbufferResp := RegNext(resp.fire()) 559 difftest.io.sbufferAddr := RegNext(getAddr(ptag(dcache_resp_id))) 560 difftest.io.sbufferData := RegNext(data(dcache_resp_id).asTypeOf(Vec(CacheLineBytes, UInt(8.W)))) 561 difftest.io.sbufferMask := RegNext(mask(dcache_resp_id).asUInt) 562 }} 563 } 564 565 // ---------------------- Load Data Forward --------------------- 566 val mismatch = Wire(Vec(LoadPipelineWidth, Bool())) 567 XSPerfAccumulate("vaddr_match_failed", mismatch.reduce(_ || _)) 568 for ((forward, i) <- io.forward.zipWithIndex) { 569 val vtag_matches = VecInit(widthMap(w => vtag(w) === getVTag(forward.vaddr))) 570 val ptag_matches = VecInit(widthMap(w => ptag(w) === getPTag(forward.paddr))) 571 val tag_matches = vtag_matches 572 val tag_mismatch = RegNext(forward.valid) && VecInit(widthMap(w => 573 RegNext(vtag_matches(w)) =/= RegNext(ptag_matches(w)) && RegNext((activeMask(w) || inflightMask(w))) 574 )).asUInt.orR 575 mismatch(i) := tag_mismatch 576 when (tag_mismatch) { 577 XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n", 578 RegNext(ptag_matches.asUInt), 579 RegNext(vtag_matches.asUInt), 580 RegNext(forward.vaddr), 581 RegNext(forward.paddr) 582 ) 583 forward_need_uarch_drain := true.B 584 } 585 val valid_tag_matches = widthMap(w => tag_matches(w) && activeMask(w)) 586 val inflight_tag_matches = widthMap(w => tag_matches(w) && inflightMask(w)) 587 val line_offset_mask = UIntToOH(getWordOffset(forward.paddr)) 588 589 val valid_tag_match_reg = valid_tag_matches.map(RegNext(_)) 590 val inflight_tag_match_reg = inflight_tag_matches.map(RegNext(_)) 591 val line_offset_reg = RegNext(line_offset_mask) 592 val forward_mask_candidate_reg = RegEnable( 593 VecInit(mask.map(entry => entry(getWordOffset(forward.paddr)))), 594 forward.valid 595 ) 596 val forward_data_candidate_reg = RegEnable( 597 VecInit(data.map(entry => entry(getWordOffset(forward.paddr)))), 598 forward.valid 599 ) 600 601 val selectedValidMask = Mux1H(valid_tag_match_reg, forward_mask_candidate_reg) 602 val selectedValidData = Mux1H(valid_tag_match_reg, forward_data_candidate_reg) 603 selectedValidMask.suggestName("selectedValidMask_"+i) 604 selectedValidData.suggestName("selectedValidData_"+i) 605 606 val selectedInflightMask = Mux1H(inflight_tag_match_reg, forward_mask_candidate_reg) 607 val selectedInflightData = Mux1H(inflight_tag_match_reg, forward_data_candidate_reg) 608 selectedInflightMask.suggestName("selectedInflightMask_"+i) 609 selectedInflightData.suggestName("selectedInflightData_"+i) 610 611 // currently not being used 612 val selectedInflightMaskFast = Mux1H(line_offset_mask, Mux1H(inflight_tag_matches, mask).asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))) 613 val selectedValidMaskFast = Mux1H(line_offset_mask, Mux1H(valid_tag_matches, mask).asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))) 614 615 forward.dataInvalid := false.B // data in store line merge buffer is always ready 616 forward.matchInvalid := tag_mismatch // paddr / vaddr cam result does not match 617 for (j <- 0 until DataBytes) { 618 forward.forwardMask(j) := false.B 619 forward.forwardData(j) := DontCare 620 621 // valid entries have higher priority than inflight entries 622 when(selectedInflightMask(j)) { 623 forward.forwardMask(j) := true.B 624 forward.forwardData(j) := selectedInflightData(j) 625 } 626 when(selectedValidMask(j)) { 627 forward.forwardMask(j) := true.B 628 forward.forwardData(j) := selectedValidData(j) 629 } 630 631 forward.forwardMaskFast(j) := selectedInflightMaskFast(j) || selectedValidMaskFast(j) 632 } 633 } 634 635 for (i <- 0 until StoreBufferSize) { 636 XSDebug("sbf entry " + i + " : ptag %x vtag %x valid %x active %x inflight %x w_timeout %x\n", 637 ptag(i) << OffsetWidth, 638 vtag(i) << OffsetWidth, 639 stateVec(i).isValid(), 640 activeMask(i), 641 inflightMask(i), 642 stateVec(i).w_timeout 643 ) 644 } 645 646 val perf_valid_entry_count = RegNext(PopCount(VecInit(stateVec.map(s => !s.isInvalid())).asUInt)) 647 XSPerfHistogram("util", perf_valid_entry_count, true.B, 0, StoreBufferSize, 1) 648 XSPerfAccumulate("sbuffer_req_valid", PopCount(VecInit(io.in.map(_.valid)).asUInt)) 649 XSPerfAccumulate("sbuffer_req_fire", PopCount(VecInit(io.in.map(_.fire())).asUInt)) 650 XSPerfAccumulate("sbuffer_merge", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && canMerge(i)})).asUInt)) 651 XSPerfAccumulate("sbuffer_newline", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && !canMerge(i)})).asUInt)) 652 XSPerfAccumulate("dcache_req_valid", io.dcache.req.valid) 653 XSPerfAccumulate("dcache_req_fire", io.dcache.req.fire()) 654 XSPerfAccumulate("sbuffer_idle", sbuffer_state === x_idle) 655 XSPerfAccumulate("sbuffer_flush", sbuffer_state === x_drain_sbuffer) 656 XSPerfAccumulate("sbuffer_replace", sbuffer_state === x_replace) 657 (0 until EnsbufferWidth).foreach(i => XSPerfAccumulate(s"canInserts_${i}", canInserts(i))) 658 XSPerfAccumulate("mainpipe_resp_valid", io.dcache.main_pipe_hit_resp.fire()) 659 XSPerfAccumulate("refill_resp_valid", io.dcache.refill_hit_resp.fire()) 660 XSPerfAccumulate("replay_resp_valid", io.dcache.replay_resp.fire()) 661 XSPerfAccumulate("coh_timeout", cohHasTimeOut) 662 663 // val (store_latency_sample, store_latency) = TransactionLatencyCounter(io.lsu.req.fire(), io.lsu.resp.fire()) 664 // XSPerfHistogram("store_latency", store_latency, store_latency_sample, 0, 100, 10) 665 // XSPerfAccumulate("store_req", io.lsu.req.fire()) 666 667 val perfEvents = Seq( 668 ("sbuffer_req_valid ", PopCount(VecInit(io.in.map(_.valid)).asUInt) ), 669 ("sbuffer_req_fire ", PopCount(VecInit(io.in.map(_.fire())).asUInt) ), 670 ("sbuffer_merge ", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && canMerge(i)})).asUInt) ), 671 ("sbuffer_newline ", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && !canMerge(i)})).asUInt) ), 672 ("dcache_req_valid ", io.dcache.req.valid ), 673 ("dcache_req_fire ", io.dcache.req.fire() ), 674 ("sbuffer_idle ", sbuffer_state === x_idle ), 675 ("sbuffer_flush ", sbuffer_state === x_drain_sbuffer ), 676 ("sbuffer_replace ", sbuffer_state === x_replace ), 677 ("mpipe_resp_valid ", io.dcache.main_pipe_hit_resp.fire() ), 678 ("refill_resp_valid ", io.dcache.refill_hit_resp.fire() ), 679 ("replay_resp_valid ", io.dcache.replay_resp.fire() ), 680 ("coh_timeout ", cohHasTimeOut ), 681 ("sbuffer_1_4_valid ", (perf_valid_entry_count < (StoreBufferSize.U/4.U)) ), 682 ("sbuffer_2_4_valid ", (perf_valid_entry_count > (StoreBufferSize.U/4.U)) & (perf_valid_entry_count <= (StoreBufferSize.U/2.U)) ), 683 ("sbuffer_3_4_valid ", (perf_valid_entry_count > (StoreBufferSize.U/2.U)) & (perf_valid_entry_count <= (StoreBufferSize.U*3.U/4.U))), 684 ("sbuffer_full_valid", (perf_valid_entry_count > (StoreBufferSize.U*3.U/4.U))) 685 ) 686 generatePerfEvent() 687 688} 689