1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import xiangshan._ 23import utils._ 24import xiangshan.cache._ 25import difftest._ 26 27class SbufferFlushBundle extends Bundle { 28 val valid = Output(Bool()) 29 val empty = Input(Bool()) 30} 31 32trait HasSbufferConst extends HasXSParameter { 33 val EvictCycles = 1 << 20 34 val SbufferReplayDelayCycles = 16 35 require(isPow2(EvictCycles)) 36 val EvictCountBits = log2Up(EvictCycles+1) 37 val MissqReplayCountBits = log2Up(SbufferReplayDelayCycles) + 1 38 39 val SbufferIndexWidth: Int = log2Up(StoreBufferSize) 40 // paddr = ptag + offset 41 val CacheLineBytes: Int = CacheLineSize / 8 42 val CacheLineWords: Int = CacheLineBytes / DataBytes 43 val OffsetWidth: Int = log2Up(CacheLineBytes) 44 val WordsWidth: Int = log2Up(CacheLineWords) 45 val PTagWidth: Int = PAddrBits - OffsetWidth 46 val VTagWidth: Int = VAddrBits - OffsetWidth 47 val WordOffsetWidth: Int = PAddrBits - WordsWidth 48} 49 50class SbufferEntryState (implicit p: Parameters) extends SbufferBundle { 51 val state_valid = Bool() // this entry is active 52 val state_inflight = Bool() // sbuffer is trying to write this entry to dcache 53 val w_timeout = Bool() // with timeout resp, waiting for resend store pipeline req timeout 54 val w_sameblock_inflight = Bool() // same cache block dcache req is inflight 55 val s_recheck_inflight = Bool() // recheck if same cache block dcache req is inflight 56 57 def isInvalid(): Bool = !state_valid 58 def isValid(): Bool = state_valid 59 def isActive(): Bool = state_valid && !state_inflight 60 def isInflight(): Bool = state_inflight 61 def isDcacheReqCandidate(): Bool = state_valid && !state_inflight && !w_sameblock_inflight 62} 63 64class SbufferBundle(implicit p: Parameters) extends XSBundle with HasSbufferConst 65 66class DataWriteReq(implicit p: Parameters) extends SbufferBundle { 67 // val idx = UInt(SbufferIndexWidth.W) 68 val wvec = UInt(StoreBufferSize.W) 69 val mask = UInt((DataBits/8).W) 70 val data = UInt(DataBits.W) 71 val wordOffset = UInt(WordOffsetWidth.W) 72 val wline = Bool() 73} 74 75class SbufferData(implicit p: Parameters) extends XSModule with HasSbufferConst { 76 val io = IO(new Bundle(){ 77 val writeReq = Vec(EnsbufferWidth, Flipped(ValidIO(new DataWriteReq))) 78 val dataOut = Output(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) 79 }) 80 81 val data = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, UInt(8.W))))) 82 83 val req = io.writeReq 84 85 for(i <- 0 until EnsbufferWidth) { 86 when(req(i).valid){ 87 for(line <- 0 until StoreBufferSize){ 88 for(word <- 0 until CacheLineWords){ 89 for(byte <- 0 until DataBytes){ 90 when( 91 req(i).bits.wvec(line) && ( 92 req(i).bits.mask(byte) && (req(i).bits.wordOffset(WordsWidth-1, 0) === word.U) || 93 req(i).bits.wline 94 ) 95 ){ 96 data(line)(word)(byte) := req(i).bits.data(byte*8+7, byte*8) 97 } 98 } 99 } 100 } 101 } 102 } 103 104 io.dataOut := data 105} 106 107class Sbuffer(implicit p: Parameters) extends DCacheModule with HasSbufferConst with HasPerfEvents { 108 val io = IO(new Bundle() { 109 val hartId = Input(UInt(8.W)) 110 val in = Vec(EnsbufferWidth, Flipped(Decoupled(new DCacheWordReqWithVaddr))) 111 val dcache = Flipped(new DCacheToSbufferIO) 112 val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO)) 113 val sqempty = Input(Bool()) 114 val flush = Flipped(new SbufferFlushBundle) 115 val csrCtrl = Flipped(new CustomCSRCtrlIO) 116 }) 117 118 val dataModule = Module(new SbufferData) 119 dataModule.io.writeReq <> DontCare 120 val writeReq = dataModule.io.writeReq 121 122 val ptag = Reg(Vec(StoreBufferSize, UInt(PTagWidth.W))) 123 val vtag = Reg(Vec(StoreBufferSize, UInt(VTagWidth.W))) 124 val mask = Reg(Vec(StoreBufferSize, Vec(CacheLineWords, Vec(DataBytes, Bool())))) 125 val waitInflightMask = Reg(Vec(StoreBufferSize, UInt(StoreBufferSize.W))) 126 val data = dataModule.io.dataOut 127 val stateVec = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U.asTypeOf(new SbufferEntryState)))) 128 val cohCount = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(EvictCountBits.W)))) 129 val missqReplayCount = RegInit(VecInit(Seq.fill(StoreBufferSize)(0.U(MissqReplayCountBits.W)))) 130 131 val willSendDcacheReq = Wire(Bool()) 132 133 /* 134 idle --[flush] --> drain --[buf empty]--> idle 135 --[buf full]--> replace --[dcache resp]--> idle 136 */ 137 // x_drain_all: drain store queue and sbuffer 138 // x_drain_sbuffer: drain sbuffer only, block store queue to sbuffer write 139 val x_idle :: x_replace :: x_drain_all :: x_drain_sbuffer :: Nil = Enum(4) 140 def needDrain(state: UInt): Bool = 141 state(1) 142 val sbuffer_state = RegInit(x_idle) 143 144 // ---------------------- Store Enq Sbuffer --------------------- 145 146 def getPTag(pa: UInt): UInt = 147 pa(PAddrBits - 1, PAddrBits - PTagWidth) 148 149 def getVTag(va: UInt): UInt = 150 va(VAddrBits - 1, VAddrBits - VTagWidth) 151 152 def getWord(pa: UInt): UInt = 153 pa(PAddrBits-1, 3) 154 155 def getWordOffset(pa: UInt): UInt = 156 pa(OffsetWidth-1, 3) 157 158 def getAddr(ptag: UInt): UInt = 159 Cat(ptag, 0.U((PAddrBits - PTagWidth).W)) 160 161 def getByteOffset(offect: UInt): UInt = 162 Cat(offect(OffsetWidth - 1, 3), 0.U(3.W)) 163 164 def isOneOf(key: UInt, seq: Seq[UInt]): Bool = 165 if(seq.isEmpty) false.B else Cat(seq.map(_===key)).orR() 166 167 def widthMap[T <: Data](f: Int => T) = (0 until StoreBufferSize) map f 168 169 // sbuffer entry count 170 171 val plru = new PseudoLRU(StoreBufferSize) 172 val accessIdx = Wire(Vec(EnsbufferWidth + 1, Valid(UInt(SbufferIndexWidth.W)))) 173 174 val replaceIdx = plru.way 175 plru.access(accessIdx) 176 177 //-------------------------cohCount----------------------------- 178 // insert and merge: cohCount=0 179 // every cycle cohCount+=1 180 // if cohCount(EvictCountBits-1)==1, evict 181 val cohTimeOutMask = VecInit(widthMap(i => cohCount(i)(EvictCountBits - 1) && stateVec(i).isActive())) 182 val (cohTimeOutIdx, cohHasTimeOut) = PriorityEncoderWithFlag(cohTimeOutMask) 183 val missqReplayTimeOutMask = VecInit(widthMap(i => missqReplayCount(i)(MissqReplayCountBits - 1) && stateVec(i).w_timeout)) 184 val (missqReplayTimeOutIdx, missqReplayMayHasTimeOut) = PriorityEncoderWithFlag(missqReplayTimeOutMask) 185 val missqReplayHasTimeOut = RegNext(missqReplayMayHasTimeOut) && !RegNext(willSendDcacheReq) 186 val missqReplayTimeOutIdxReg = RegEnable(missqReplayTimeOutIdx, missqReplayMayHasTimeOut) 187 188 val activeMask = VecInit(stateVec.map(s => s.isActive())) 189 val drainIdx = PriorityEncoder(activeMask) 190 191 val inflightMask = VecInit(stateVec.map(s => s.isInflight())) 192 193 val inptags = io.in.map(in => getPTag(in.bits.addr)) 194 val invtags = io.in.map(in => getVTag(in.bits.vaddr)) 195 val sameTag = Seq.tabulate(io.in.length)(x => Seq.tabulate(io.in.length)(y => inptags(x) === inptags(y))) 196 val words = (0 until EnsbufferWidth).map(i => getWord(io.in(i).bits.addr)) 197 val sameWord = Seq.tabulate(EnsbufferWidth)(x => Seq.tabulate(EnsbufferWidth)(y => words(x) === words(y))) 198 199 // merge condition 200 val mergeMask = Wire(Vec(EnsbufferWidth, Vec(StoreBufferSize, Bool()))) 201 val mergeIdx = mergeMask.map(PriorityEncoder(_)) // avoid using mergeIdx for better timing 202 val canMerge = mergeMask.map(ParallelOR(_)) 203 val mergeVec = mergeMask.map(_.asUInt) 204 205 for(i <- 0 until EnsbufferWidth){ 206 mergeMask(i) := widthMap(j => 207 inptags(i) === ptag(j) && activeMask(j) 208 ) 209 assert(!(PopCount(mergeMask(i).asUInt) > 1.U && io.in(i).fire())) 210 } 211 212 // insert condition 213 // firstInsert: the first invalid entry 214 // if first entry canMerge or second entry has the same ptag with the first entry, 215 // secondInsert equal the first invalid entry, otherwise, the second invalid entry 216 val invalidMask = VecInit(stateVec.map(s => s.isInvalid())) 217 val remInvalidMask = GetRemBits(EnsbufferWidth)(invalidMask.asUInt) 218 219 def getFirstOneOH(input: UInt): UInt = { 220 assert(input.getWidth > 1) 221 val output = WireInit(VecInit(input.asBools)) 222 (1 until input.getWidth).map(i => { 223 output(i) := !input(i - 1, 0).orR && input(i) 224 }) 225 output.asUInt 226 } 227 228 val remRawInsertVec = remInvalidMask.map(getFirstOneOH(_)) 229 val remRawInsert = remInvalidMask.map(PriorityEncoderWithFlag(_)).unzip 230 val (remRawInsertIdx, remCanInsert) = (remRawInsert._1, VecInit(remRawInsert._2)) 231 val remInsertIdx = VecInit(remRawInsertIdx.zipWithIndex.map { case (raw, idx) => 232 if (EnsbufferWidth > 1) Cat(raw, idx.U(log2Ceil(EnsbufferWidth).W)) 233 else raw 234 }) // slow to generate, for debug only 235 val remInsertVec = VecInit(GetRemBits.reverse(EnsbufferWidth)(remRawInsertVec)) 236 237 val enbufferSelReg = RegInit(0.U(log2Up(EnsbufferWidth).W)) 238 if (EnsbufferWidth > 1) when(io.in(0).valid) { 239 enbufferSelReg := enbufferSelReg + 1.U 240 } 241 242 val insertIdxs = (0 until EnsbufferWidth).map(i => 243 PriorityMuxDefault(if (i == 0) Seq(0.B -> 0.U) else (0 until i).map(j => sameTag(i)(j) -> remInsertIdx(enbufferSelReg + j.U)), remInsertIdx(enbufferSelReg + i.U)) 244 ) // slow to generate, for debug only 245 val insertVecs = (0 until EnsbufferWidth).map(i => 246 PriorityMuxDefault(if (i == 0) Seq(0.B -> 0.U) else (0 until i).map(j => sameTag(i)(j) -> remInsertVec(enbufferSelReg + j.U)), remInsertVec(enbufferSelReg + i.U)) 247 ) // slow to generate, for debug only 248 val canInserts = (0 until EnsbufferWidth).map(i => 249 PriorityMuxDefault(if (i == 0) Seq(0.B -> 0.B) else (0 until i).map(j => sameTag(i)(j) -> remCanInsert(enbufferSelReg + j.U)), remCanInsert(enbufferSelReg + i.U)) 250 ).map(_ && sbuffer_state =/= x_drain_sbuffer) 251 val forward_need_uarch_drain = WireInit(false.B) 252 val merge_need_uarch_drain = WireInit(false.B) 253 val do_uarch_drain = RegNext(forward_need_uarch_drain) || RegNext(RegNext(merge_need_uarch_drain)) 254 XSPerfAccumulate("do_uarch_drain", do_uarch_drain) 255 256 (0 until EnsbufferWidth).foreach(i => 257 io.in(i).ready := canInserts(i) && (if (i == 0) 1.B else !sameWord(0)(i) && io.in(i - 1).ready) 258 ) 259 260 def wordReqToBufLine(req: DCacheWordReq, reqptag: UInt, reqvtag: UInt, insertIdx: UInt, insertVec: UInt, wordOffset: UInt, flushMask: Bool): Unit = { 261 assert(UIntToOH(insertIdx) === insertVec) 262 val sameBlockInflightMask = genSameBlockInflightMask(reqptag) 263 (0 until StoreBufferSize).map(entryIdx => { 264 when(insertVec(entryIdx)){ 265 stateVec(entryIdx).state_valid := true.B 266 stateVec(entryIdx).w_sameblock_inflight := sameBlockInflightMask.orR // set w_sameblock_inflight when a line is first allocated 267 when(sameBlockInflightMask.orR){ 268 waitInflightMask(entryIdx) := sameBlockInflightMask 269 } 270 cohCount(entryIdx) := 0.U 271 // missqReplayCount(insertIdx) := 0.U 272 ptag(entryIdx) := reqptag 273 vtag(entryIdx) := reqvtag // update vtag iff a new sbuffer line is allocated 274 when(flushMask){ 275 for(j <- 0 until CacheLineWords){ 276 for(i <- 0 until DataBytes){ 277 mask(entryIdx)(j)(i) := false.B 278 } 279 } 280 } 281 for(i <- 0 until DataBytes){ 282 when(req.mask(i)){ 283 mask(entryIdx)(wordOffset)(i) := true.B 284 } 285 } 286 } 287 }) 288 } 289 290 def mergeWordReq(req: DCacheWordReq, reqptag: UInt, reqvtag: UInt, mergeIdx: UInt, mergeVec: UInt, wordOffset: UInt): Unit = { 291 assert(UIntToOH(mergeIdx) === mergeVec) 292 (0 until StoreBufferSize).map(entryIdx => { 293 when(mergeVec(entryIdx)) { 294 cohCount(entryIdx) := 0.U 295 // missqReplayCount(entryIdx) := 0.U 296 for(i <- 0 until DataBytes){ 297 when(req.mask(i)){ 298 mask(entryIdx)(wordOffset)(i) := true.B 299 // data(entryIdx)(wordOffset)(i) := req.data(i*8+7, i*8) 300 } 301 } 302 // check if vtag is the same, if not, trigger sbuffer flush 303 when(reqvtag =/= vtag(entryIdx)) { 304 XSDebug("reqvtag =/= sbufvtag req(vtag %x ptag %x) sbuffer(vtag %x ptag %x)\n", 305 reqvtag << OffsetWidth, 306 reqptag << OffsetWidth, 307 vtag(entryIdx) << OffsetWidth, 308 ptag(entryIdx) << OffsetWidth 309 ) 310 merge_need_uarch_drain := true.B 311 } 312 } 313 }) 314 } 315 316 for(((in, wordOffset), i) <- io.in.zip(words).zipWithIndex){ 317 writeReq(i).valid := in.fire() 318 writeReq(i).bits.wordOffset := wordOffset 319 writeReq(i).bits.mask := in.bits.mask 320 writeReq(i).bits.data := in.bits.data 321 writeReq(i).bits.wline := in.bits.wline 322 val debug_insertIdx = insertIdxs(i) 323 val insertVec = insertVecs(i) 324 assert(!((PopCount(insertVec) > 1.U) && in.fire())) 325 val insertIdx = OHToUInt(insertVec) 326 val flushMask = if(i == 0) true.B else (0 until i).map(j => !sameTag(i)(j)).reduce(_ && _) 327 flushMask.suggestName(s"flushMask_${i}") 328 accessIdx(i).valid := RegNext(in.fire()) 329 accessIdx(i).bits := RegNext(Mux(canMerge(i), mergeIdx(i), insertIdx)) 330 when(in.fire()){ 331 when(canMerge(i)){ 332 // writeReq(i).bits.idx := mergeIdx(i) 333 writeReq(i).bits.wvec := mergeVec(i) 334 mergeWordReq(in.bits, inptags(i), invtags(i), mergeIdx(i), mergeVec(i), wordOffset) 335 XSDebug(p"merge req $i to line [${mergeIdx(i)}]\n") 336 }.otherwise({ 337 // writeReq(i).bits.idx := insertIdx 338 writeReq(i).bits.wvec := insertVec 339 wordReqToBufLine(in.bits, inptags(i), invtags(i), insertIdx, insertVec, wordOffset, flushMask) 340 XSDebug(p"insert req $i to line[$insertIdx]\n") 341 assert(debug_insertIdx === insertIdx) 342 }) 343 } 344 } 345 346 347 for(i <- 0 until StoreBufferSize){ 348 XSDebug(stateVec(i).isValid(), 349 p"[$i] timeout:${cohCount(i)(EvictCountBits-1)} state:${stateVec(i)}\n" 350 ) 351 } 352 353 for((req, i) <- io.in.zipWithIndex){ 354 XSDebug(req.fire(), 355 p"accept req [$i]: " + 356 p"addr:${Hexadecimal(req.bits.addr)} " + 357 p"mask:${Binary(req.bits.mask)} " + 358 p"data:${Hexadecimal(req.bits.data)}\n" 359 ) 360 XSDebug(req.valid && !req.ready, 361 p"req [$i] blocked by sbuffer\n" 362 ) 363 } 364 365 // ---------------------- Send Dcache Req --------------------- 366 367 val sbuffer_empty = Cat(invalidMask).andR() 368 val sq_empty = !Cat(io.in.map(_.valid)).orR() 369 val empty = sbuffer_empty && sq_empty 370 val threshold = RegNext(io.csrCtrl.sbuffer_threshold +& 1.U) 371 val validCount = PopCount(activeMask) 372 val do_eviction = RegNext(validCount >= threshold || validCount === (StoreBufferSize-1).U, init = false.B) 373 require((StoreBufferThreshold + 1) <= StoreBufferSize) 374 375 XSDebug(p"validCount[$validCount]\n") 376 377 io.flush.empty := RegNext(empty && io.sqempty) 378 // lru.io.flush := sbuffer_state === x_drain_all && empty 379 switch(sbuffer_state){ 380 is(x_idle){ 381 when(io.flush.valid){ 382 sbuffer_state := x_drain_all 383 }.elsewhen(do_uarch_drain){ 384 sbuffer_state := x_drain_sbuffer 385 }.elsewhen(do_eviction){ 386 sbuffer_state := x_replace 387 } 388 } 389 is(x_drain_all){ 390 when(empty){ 391 sbuffer_state := x_idle 392 } 393 } 394 is(x_drain_sbuffer){ 395 when(io.flush.valid){ 396 sbuffer_state := x_drain_all 397 }.elsewhen(sbuffer_empty){ 398 sbuffer_state := x_idle 399 } 400 } 401 is(x_replace){ 402 when(io.flush.valid){ 403 sbuffer_state := x_drain_all 404 }.elsewhen(do_uarch_drain){ 405 sbuffer_state := x_drain_sbuffer 406 }.elsewhen(!do_eviction){ 407 sbuffer_state := x_idle 408 } 409 } 410 } 411 XSDebug(p"sbuffer state:${sbuffer_state} do eviction:${do_eviction} empty:${empty}\n") 412 413 def noSameBlockInflight(idx: UInt): Bool = { 414 // stateVec(idx) itself must not be s_inflight 415 !Cat(widthMap(i => inflightMask(i) && ptag(idx) === ptag(i))).orR() 416 } 417 418 def genSameBlockInflightMask(ptag_in: UInt): UInt = { 419 val mask = VecInit(widthMap(i => inflightMask(i) && ptag_in === ptag(i))).asUInt // quite slow, use it with care 420 assert(!(PopCount(mask) > 1.U)) 421 mask 422 } 423 424 def haveSameBlockInflight(ptag_in: UInt): Bool = { 425 genSameBlockInflightMask(ptag_in).orR 426 } 427 428 val need_drain = needDrain(sbuffer_state) 429 val need_replace = do_eviction || (sbuffer_state === x_replace) 430 val evictionIdx = Mux(missqReplayHasTimeOut, 431 missqReplayTimeOutIdxReg, 432 Mux(need_drain, 433 drainIdx, 434 Mux(cohHasTimeOut, cohTimeOutIdx, replaceIdx) 435 ) 436 ) 437 438 /* 439 If there is a inflight dcache req which has same ptag with evictionIdx's ptag, 440 current eviction should be blocked. 441 */ 442 val prepareValid = missqReplayHasTimeOut || 443 stateVec(evictionIdx).isDcacheReqCandidate() && (need_drain || cohHasTimeOut || need_replace) 444 assert(!(stateVec(evictionIdx).isDcacheReqCandidate && !noSameBlockInflight(evictionIdx))) 445 val prepareValidReg = RegInit(false.B) 446 // when canSendDcacheReq, send dcache req stored in pipeline reg to dcache 447 val canSendDcacheReq = io.dcache.req.ready || !prepareValidReg 448 // when willSendDcacheReq, read dcache req data and store them in a pipeline reg 449 willSendDcacheReq := prepareValid && canSendDcacheReq 450 when(io.dcache.req.fire()){ 451 prepareValidReg := false.B 452 } 453 when(canSendDcacheReq){ 454 prepareValidReg := prepareValid 455 } 456 when(willSendDcacheReq){ 457 stateVec(evictionIdx).state_inflight := true.B 458 stateVec(evictionIdx).w_timeout := false.B 459 // stateVec(evictionIdx).s_pipe_req := true.B 460 XSDebug(p"$evictionIdx will be sent to Dcache\n") 461 } 462 XSDebug(p"need drain:$need_drain cohHasTimeOut: $cohHasTimeOut need replace:$need_replace\n") 463 XSDebug(p"drainIdx:$drainIdx tIdx:$cohTimeOutIdx replIdx:$replaceIdx " + 464 p"blocked:${!noSameBlockInflight(evictionIdx)} v:${activeMask(evictionIdx)}\n") 465 XSDebug(p"prepareValid:$prepareValid evictIdx:$evictionIdx dcache ready:${io.dcache.req.ready}\n") 466 // Note: if other dcache req in the same block are inflight, 467 // the lru update may not accurate 468 accessIdx(EnsbufferWidth).valid := invalidMask(replaceIdx) || ( 469 need_replace && !need_drain && !cohHasTimeOut && !missqReplayHasTimeOut && canSendDcacheReq && activeMask(replaceIdx)) 470 accessIdx(EnsbufferWidth).bits := replaceIdx 471 val evictionIdxReg = RegEnable(evictionIdx, enable = willSendDcacheReq) 472 val evictionPTag = RegEnable(ptag(evictionIdx), enable = willSendDcacheReq) 473 val evictionVTag = RegEnable(vtag(evictionIdx), enable = willSendDcacheReq) 474 475 io.dcache.req.valid := prepareValidReg 476 io.dcache.req.bits := DontCare 477 io.dcache.req.bits.cmd := MemoryOpConstants.M_XWR 478 io.dcache.req.bits.addr := getAddr(evictionPTag) 479 io.dcache.req.bits.vaddr := getAddr(evictionVTag) 480 io.dcache.req.bits.data := data(evictionIdxReg).asUInt 481 io.dcache.req.bits.mask := mask(evictionIdxReg).asUInt 482 io.dcache.req.bits.id := evictionIdxReg 483 484 when (io.dcache.req.fire()) { 485 assert(!(io.dcache.req.bits.vaddr === 0.U)) 486 assert(!(io.dcache.req.bits.addr === 0.U)) 487 } 488 489 XSDebug(io.dcache.req.fire(), 490 p"send buf [$evictionIdxReg] to Dcache, req fire\n" 491 ) 492 493 // update sbuffer status according to dcache resp source 494 495 def id_to_sbuffer_id(id: UInt): UInt = { 496 require(id.getWidth >= log2Up(StoreBufferSize)) 497 id(log2Up(StoreBufferSize)-1, 0) 498 } 499 500 // hit resp 501 io.dcache.hit_resps.map(resp => { 502 val dcache_resp_id = resp.bits.id 503 when (resp.fire()) { 504 stateVec(dcache_resp_id).state_inflight := false.B 505 stateVec(dcache_resp_id).state_valid := false.B 506 assert(!resp.bits.replay) 507 assert(!resp.bits.miss) // not need to resp if miss, to be opted 508 assert(stateVec(dcache_resp_id).state_inflight === true.B) 509 } 510 511 // Update w_sameblock_inflight flag is delayed for 1 cycle 512 // 513 // When a new req allocate a new line in sbuffer, sameblock_inflight check will ignore 514 // current dcache.hit_resps. Then, in the next cycle, we have plenty of time to check 515 // if the same block is still inflight 516 (0 until StoreBufferSize).map(i => { 517 when( 518 stateVec(i).w_sameblock_inflight && 519 stateVec(i).state_valid && 520 RegNext(resp.fire()) && 521 waitInflightMask(i) === UIntToOH(RegNext(id_to_sbuffer_id(dcache_resp_id))) 522 ){ 523 stateVec(i).w_sameblock_inflight := false.B 524 } 525 }) 526 }) 527 528 529 // replay resp 530 val replay_resp_id = io.dcache.replay_resp.bits.id 531 when (io.dcache.replay_resp.fire()) { 532 missqReplayCount(replay_resp_id) := 0.U 533 stateVec(replay_resp_id).w_timeout := true.B 534 // waiting for timeout 535 assert(io.dcache.replay_resp.bits.replay) 536 assert(stateVec(replay_resp_id).state_inflight === true.B) 537 } 538 539 // TODO: reuse cohCount 540 (0 until StoreBufferSize).map(i => { 541 when(stateVec(i).w_timeout && stateVec(i).state_inflight && !missqReplayCount(i)(MissqReplayCountBits-1)) { 542 missqReplayCount(i) := missqReplayCount(i) + 1.U 543 } 544 when(activeMask(i) && !cohTimeOutMask(i)){ 545 cohCount(i) := cohCount(i)+1.U 546 } 547 }) 548 549 if (env.EnableDifftest) { 550 // hit resp 551 io.dcache.hit_resps.zipWithIndex.map{case (resp, index) => { 552 val difftest = Module(new DifftestSbufferEvent) 553 val dcache_resp_id = resp.bits.id 554 difftest.io.clock := clock 555 difftest.io.coreid := io.hartId 556 difftest.io.index := index.U 557 difftest.io.sbufferResp := RegNext(resp.fire()) 558 difftest.io.sbufferAddr := RegNext(getAddr(ptag(dcache_resp_id))) 559 difftest.io.sbufferData := RegNext(data(dcache_resp_id).asTypeOf(Vec(CacheLineBytes, UInt(8.W)))) 560 difftest.io.sbufferMask := RegNext(mask(dcache_resp_id).asUInt) 561 }} 562 } 563 564 // ---------------------- Load Data Forward --------------------- 565 val mismatch = Wire(Vec(LoadPipelineWidth, Bool())) 566 XSPerfAccumulate("vaddr_match_failed", mismatch.reduce(_ || _)) 567 for ((forward, i) <- io.forward.zipWithIndex) { 568 val vtag_matches = VecInit(widthMap(w => vtag(w) === getVTag(forward.vaddr))) 569 val ptag_matches = VecInit(widthMap(w => ptag(w) === getPTag(forward.paddr))) 570 val tag_matches = vtag_matches 571 val tag_mismatch = RegNext(forward.valid) && VecInit(widthMap(w => 572 RegNext(vtag_matches(w)) =/= RegNext(ptag_matches(w)) && RegNext((activeMask(w) || inflightMask(w))) 573 )).asUInt.orR 574 mismatch(i) := tag_mismatch 575 when (tag_mismatch) { 576 XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n", 577 RegNext(ptag_matches.asUInt), 578 RegNext(vtag_matches.asUInt), 579 RegNext(forward.vaddr), 580 RegNext(forward.paddr) 581 ) 582 forward_need_uarch_drain := true.B 583 } 584 val valid_tag_matches = widthMap(w => tag_matches(w) && activeMask(w)) 585 val inflight_tag_matches = widthMap(w => tag_matches(w) && inflightMask(w)) 586 val line_offset_mask = UIntToOH(getWordOffset(forward.paddr)) 587 588 val valid_tag_match_reg = valid_tag_matches.map(RegNext(_)) 589 val inflight_tag_match_reg = inflight_tag_matches.map(RegNext(_)) 590 val line_offset_reg = RegNext(line_offset_mask) 591 val forward_mask_candidate_reg = RegEnable( 592 VecInit(mask.map(entry => entry(getWordOffset(forward.paddr)))), 593 forward.valid 594 ) 595 val forward_data_candidate_reg = RegEnable( 596 VecInit(data.map(entry => entry(getWordOffset(forward.paddr)))), 597 forward.valid 598 ) 599 600 val selectedValidMask = Mux1H(valid_tag_match_reg, forward_mask_candidate_reg) 601 val selectedValidData = Mux1H(valid_tag_match_reg, forward_data_candidate_reg) 602 selectedValidMask.suggestName("selectedValidMask_"+i) 603 selectedValidData.suggestName("selectedValidData_"+i) 604 605 val selectedInflightMask = Mux1H(inflight_tag_match_reg, forward_mask_candidate_reg) 606 val selectedInflightData = Mux1H(inflight_tag_match_reg, forward_data_candidate_reg) 607 selectedInflightMask.suggestName("selectedInflightMask_"+i) 608 selectedInflightData.suggestName("selectedInflightData_"+i) 609 610 // currently not being used 611 val selectedInflightMaskFast = Mux1H(line_offset_mask, Mux1H(inflight_tag_matches, mask).asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))) 612 val selectedValidMaskFast = Mux1H(line_offset_mask, Mux1H(valid_tag_matches, mask).asTypeOf(Vec(CacheLineWords, Vec(DataBytes, Bool())))) 613 614 forward.dataInvalid := false.B // data in store line merge buffer is always ready 615 forward.matchInvalid := tag_mismatch // paddr / vaddr cam result does not match 616 for (j <- 0 until DataBytes) { 617 forward.forwardMask(j) := false.B 618 forward.forwardData(j) := DontCare 619 620 // valid entries have higher priority than inflight entries 621 when(selectedInflightMask(j)) { 622 forward.forwardMask(j) := true.B 623 forward.forwardData(j) := selectedInflightData(j) 624 } 625 when(selectedValidMask(j)) { 626 forward.forwardMask(j) := true.B 627 forward.forwardData(j) := selectedValidData(j) 628 } 629 630 forward.forwardMaskFast(j) := selectedInflightMaskFast(j) || selectedValidMaskFast(j) 631 } 632 } 633 634 for (i <- 0 until StoreBufferSize) { 635 XSDebug("sbf entry " + i + " : ptag %x vtag %x valid %x active %x inflight %x w_timeout %x\n", 636 ptag(i) << OffsetWidth, 637 vtag(i) << OffsetWidth, 638 stateVec(i).isValid(), 639 activeMask(i), 640 inflightMask(i), 641 stateVec(i).w_timeout 642 ) 643 } 644 645 val perf_valid_entry_count = PopCount(VecInit(stateVec.map(s => !s.isInvalid())).asUInt) 646 XSPerfHistogram("util", perf_valid_entry_count, true.B, 0, StoreBufferSize, 1) 647 XSPerfAccumulate("sbuffer_req_valid", PopCount(VecInit(io.in.map(_.valid)).asUInt)) 648 XSPerfAccumulate("sbuffer_req_fire", PopCount(VecInit(io.in.map(_.fire())).asUInt)) 649 XSPerfAccumulate("sbuffer_merge", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && canMerge(i)})).asUInt)) 650 XSPerfAccumulate("sbuffer_newline", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && !canMerge(i)})).asUInt)) 651 XSPerfAccumulate("dcache_req_valid", io.dcache.req.valid) 652 XSPerfAccumulate("dcache_req_fire", io.dcache.req.fire()) 653 XSPerfAccumulate("sbuffer_idle", sbuffer_state === x_idle) 654 XSPerfAccumulate("sbuffer_flush", sbuffer_state === x_drain_sbuffer) 655 XSPerfAccumulate("sbuffer_replace", sbuffer_state === x_replace) 656 (0 until EnsbufferWidth).foreach(i => XSPerfAccumulate(s"canInserts_${i}", canInserts(i))) 657 XSPerfAccumulate("mainpipe_resp_valid", io.dcache.main_pipe_hit_resp.fire()) 658 XSPerfAccumulate("refill_resp_valid", io.dcache.refill_hit_resp.fire()) 659 XSPerfAccumulate("replay_resp_valid", io.dcache.replay_resp.fire()) 660 XSPerfAccumulate("coh_timeout", cohHasTimeOut) 661 662 // val (store_latency_sample, store_latency) = TransactionLatencyCounter(io.lsu.req.fire(), io.lsu.resp.fire()) 663 // XSPerfHistogram("store_latency", store_latency, store_latency_sample, 0, 100, 10) 664 // XSPerfAccumulate("store_req", io.lsu.req.fire()) 665 666 val perfEvents = Seq( 667 ("sbuffer_req_valid ", PopCount(VecInit(io.in.map(_.valid)).asUInt) ), 668 ("sbuffer_req_fire ", PopCount(VecInit(io.in.map(_.fire())).asUInt) ), 669 ("sbuffer_merge ", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && canMerge(i)})).asUInt) ), 670 ("sbuffer_newline ", PopCount(VecInit(io.in.zipWithIndex.map({case (in, i) => in.fire() && !canMerge(i)})).asUInt) ), 671 ("dcache_req_valid ", io.dcache.req.valid ), 672 ("dcache_req_fire ", io.dcache.req.fire() ), 673 ("sbuffer_idle ", sbuffer_state === x_idle ), 674 ("sbuffer_flush ", sbuffer_state === x_drain_sbuffer ), 675 ("sbuffer_replace ", sbuffer_state === x_replace ), 676 ("mpipe_resp_valid ", io.dcache.main_pipe_hit_resp.fire() ), 677 ("refill_resp_valid ", io.dcache.refill_hit_resp.fire() ), 678 ("replay_resp_valid ", io.dcache.replay_resp.fire() ), 679 ("coh_timeout ", cohHasTimeOut ), 680 ("sbuffer_1_4_valid ", (perf_valid_entry_count < (StoreBufferSize.U/4.U)) ), 681 ("sbuffer_2_4_valid ", (perf_valid_entry_count > (StoreBufferSize.U/4.U)) & (perf_valid_entry_count <= (StoreBufferSize.U/2.U)) ), 682 ("sbuffer_3_4_valid ", (perf_valid_entry_count > (StoreBufferSize.U/2.U)) & (perf_valid_entry_count <= (StoreBufferSize.U*3.U/4.U))), 683 ("sbuffer_full_valid", (perf_valid_entry_count > (StoreBufferSize.U*3.U/4.U))) 684 ) 685 generatePerfEvent() 686 687} 688