1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.experimental.ExtModule 22import chisel3.util._ 23import xiangshan._ 24import utils._ 25import utility._ 26import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes} 27import freechips.rocketchip.tilelink._ 28import freechips.rocketchip.util.{BundleFieldBase, UIntToOH1} 29import device.RAMHelper 30import coupledL2.{AliasField, VaddrField, PrefetchField} 31import utility.ReqSourceField 32import utility.FastArbiter 33import mem.AddPipelineReg 34import xiangshan.cache.wpu._ 35 36import scala.math.max 37 38// DCache specific parameters 39case class DCacheParameters 40( 41 nSets: Int = 256, 42 nWays: Int = 8, 43 rowBits: Int = 64, 44 tagECC: Option[String] = None, 45 dataECC: Option[String] = None, 46 replacer: Option[String] = Some("setplru"), 47 updateReplaceOn2ndmiss: Boolean = true, 48 nMissEntries: Int = 1, 49 nProbeEntries: Int = 1, 50 nReleaseEntries: Int = 1, 51 nMMIOEntries: Int = 1, 52 nMMIOs: Int = 1, 53 blockBytes: Int = 64, 54 alwaysReleaseData: Boolean = false 55) extends L1CacheParameters { 56 // if sets * blockBytes > 4KB(page size), 57 // cache alias will happen, 58 // we need to avoid this by recoding additional bits in L2 cache 59 val setBytes = nSets * blockBytes 60 val aliasBitsOpt = if(setBytes > pageSize) Some(log2Ceil(setBytes / pageSize)) else None 61 62 def tagCode: Code = Code.fromString(tagECC) 63 64 def dataCode: Code = Code.fromString(dataECC) 65} 66 67// Physical Address 68// -------------------------------------- 69// | Physical Tag | PIndex | Offset | 70// -------------------------------------- 71// | 72// DCacheTagOffset 73// 74// Virtual Address 75// -------------------------------------- 76// | Above index | Set | Bank | Offset | 77// -------------------------------------- 78// | | | | 79// | | | 0 80// | | DCacheBankOffset 81// | DCacheSetOffset 82// DCacheAboveIndexOffset 83 84// Default DCache size = 64 sets * 8 ways * 8 banks * 8 Byte = 32K Byte 85 86trait HasDCacheParameters extends HasL1CacheParameters { 87 val cacheParams = dcacheParameters 88 val cfg = cacheParams 89 90 def encWordBits = cacheParams.dataCode.width(wordBits) 91 92 def encRowBits = encWordBits * rowWords // for DuplicatedDataArray only 93 def eccBits = encWordBits - wordBits 94 95 def encTagBits = cacheParams.tagCode.width(tagBits) 96 def eccTagBits = encTagBits - tagBits 97 98 def blockProbeAfterGrantCycles = 8 // give the processor some time to issue a request after a grant 99 100 def nSourceType = 10 101 def sourceTypeWidth = log2Up(nSourceType) 102 // non-prefetch source < 3 103 def LOAD_SOURCE = 0 104 def STORE_SOURCE = 1 105 def AMO_SOURCE = 2 106 // prefetch source >= 3 107 def DCACHE_PREFETCH_SOURCE = 3 108 def SOFT_PREFETCH = 4 109 def HW_PREFETCH_AGT = 5 110 def HW_PREFETCH_PHT_CUR = 6 111 def HW_PREFETCH_PHT_INC = 7 112 def HW_PREFETCH_PHT_DEC = 8 113 def HW_PREFETCH_BOP = 9 114 def HW_PREFETCH_STRIDE = 10 115 116 // each source use a id to distinguish its multiple reqs 117 def reqIdWidth = log2Up(nEntries) max log2Up(StoreBufferSize) 118 119 require(isPow2(cfg.nMissEntries)) // TODO 120 // require(isPow2(cfg.nReleaseEntries)) 121 require(cfg.nMissEntries < cfg.nReleaseEntries) 122 val nEntries = cfg.nMissEntries + cfg.nReleaseEntries 123 val releaseIdBase = cfg.nMissEntries 124 125 // banked dcache support 126 val DCacheSetDiv = 1 127 val DCacheSets = cacheParams.nSets 128 val DCacheWays = cacheParams.nWays 129 val DCacheBanks = 8 // hardcoded 130 val DCacheDupNum = 16 131 val DCacheSRAMRowBits = cacheParams.rowBits // hardcoded 132 val DCacheWordBits = 64 // hardcoded 133 val DCacheWordBytes = DCacheWordBits / 8 134 val DCacheVWordBytes = VLEN / 8 135 require(DCacheSRAMRowBits == 64) 136 137 val DCacheSetDivBits = log2Ceil(DCacheSetDiv) 138 val DCacheSetBits = log2Ceil(DCacheSets) 139 val DCacheSizeBits = DCacheSRAMRowBits * DCacheBanks * DCacheWays * DCacheSets 140 val DCacheSizeBytes = DCacheSizeBits / 8 141 val DCacheSizeWords = DCacheSizeBits / 64 // TODO 142 143 val DCacheSameVPAddrLength = 12 144 145 val DCacheSRAMRowBytes = DCacheSRAMRowBits / 8 146 val DCacheWordOffset = log2Up(DCacheWordBytes) 147 val DCacheVWordOffset = log2Up(DCacheVWordBytes) 148 149 val DCacheBankOffset = log2Up(DCacheSRAMRowBytes) 150 val DCacheSetOffset = DCacheBankOffset + log2Up(DCacheBanks) 151 val DCacheAboveIndexOffset = DCacheSetOffset + log2Up(DCacheSets) 152 val DCacheTagOffset = DCacheAboveIndexOffset min DCacheSameVPAddrLength 153 val DCacheLineOffset = DCacheSetOffset 154 155 // uncache 156 val uncacheIdxBits = log2Up(StoreQueueSize + 1) max log2Up(VirtualLoadQueueSize + 1) 157 // hardware prefetch parameters 158 // high confidence hardware prefetch port 159 val HighConfHWPFLoadPort = LoadPipelineWidth - 1 // use the last load port by default 160 val IgnorePrefetchConfidence = false 161 162 // parameters about duplicating regs to solve fanout 163 // In Main Pipe: 164 // tag_write.ready -> data_write.valid * 8 banks 165 // tag_write.ready -> meta_write.valid 166 // tag_write.ready -> tag_write.valid 167 // tag_write.ready -> err_write.valid 168 // tag_write.ready -> wb.valid 169 val nDupTagWriteReady = DCacheBanks + 4 170 // In Main Pipe: 171 // data_write.ready -> data_write.valid * 8 banks 172 // data_write.ready -> meta_write.valid 173 // data_write.ready -> tag_write.valid 174 // data_write.ready -> err_write.valid 175 // data_write.ready -> wb.valid 176 val nDupDataWriteReady = DCacheBanks + 4 177 val nDupWbReady = DCacheBanks + 4 178 val nDupStatus = nDupTagWriteReady + nDupDataWriteReady 179 val dataWritePort = 0 180 val metaWritePort = DCacheBanks 181 val tagWritePort = metaWritePort + 1 182 val errWritePort = tagWritePort + 1 183 val wbPort = errWritePort + 1 184 185 def set_to_dcache_div(set: UInt) = { 186 require(set.getWidth >= DCacheSetBits) 187 if (DCacheSetDivBits == 0) 0.U else set(DCacheSetDivBits-1, 0) 188 } 189 190 def set_to_dcache_div_set(set: UInt) = { 191 require(set.getWidth >= DCacheSetBits) 192 set(DCacheSetBits - 1, DCacheSetDivBits) 193 } 194 195 def addr_to_dcache_bank(addr: UInt) = { 196 require(addr.getWidth >= DCacheSetOffset) 197 addr(DCacheSetOffset-1, DCacheBankOffset) 198 } 199 200 def addr_to_dcache_div(addr: UInt) = { 201 require(addr.getWidth >= DCacheAboveIndexOffset) 202 if(DCacheSetDivBits == 0) 0.U else addr(DCacheSetOffset + DCacheSetDivBits - 1, DCacheSetOffset) 203 } 204 205 def addr_to_dcache_div_set(addr: UInt) = { 206 require(addr.getWidth >= DCacheAboveIndexOffset) 207 addr(DCacheAboveIndexOffset - 1, DCacheSetOffset + DCacheSetDivBits) 208 } 209 210 def addr_to_dcache_set(addr: UInt) = { 211 require(addr.getWidth >= DCacheAboveIndexOffset) 212 addr(DCacheAboveIndexOffset-1, DCacheSetOffset) 213 } 214 215 def get_data_of_bank(bank: Int, data: UInt) = { 216 require(data.getWidth >= (bank+1)*DCacheSRAMRowBits) 217 data(DCacheSRAMRowBits * (bank + 1) - 1, DCacheSRAMRowBits * bank) 218 } 219 220 def get_mask_of_bank(bank: Int, data: UInt) = { 221 require(data.getWidth >= (bank+1)*DCacheSRAMRowBytes) 222 data(DCacheSRAMRowBytes * (bank + 1) - 1, DCacheSRAMRowBytes * bank) 223 } 224 225 def get_direct_map_way(addr:UInt): UInt = { 226 addr(DCacheAboveIndexOffset + log2Up(DCacheWays) - 1, DCacheAboveIndexOffset) 227 } 228 229 def arbiter[T <: Bundle]( 230 in: Seq[DecoupledIO[T]], 231 out: DecoupledIO[T], 232 name: Option[String] = None): Unit = { 233 val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) 234 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 235 for ((a, req) <- arb.io.in.zip(in)) { 236 a <> req 237 } 238 out <> arb.io.out 239 } 240 241 def arbiter_with_pipereg[T <: Bundle]( 242 in: Seq[DecoupledIO[T]], 243 out: DecoupledIO[T], 244 name: Option[String] = None): Unit = { 245 val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) 246 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 247 for ((a, req) <- arb.io.in.zip(in)) { 248 a <> req 249 } 250 AddPipelineReg(arb.io.out, out, false.B) 251 } 252 253 def arbiter_with_pipereg_N_dup[T <: Bundle]( 254 in: Seq[DecoupledIO[T]], 255 out: DecoupledIO[T], 256 dups: Seq[DecoupledIO[T]], 257 name: Option[String] = None): Unit = { 258 val arb = Module(new Arbiter[T](chiselTypeOf(out.bits), in.size)) 259 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 260 for ((a, req) <- arb.io.in.zip(in)) { 261 a <> req 262 } 263 for (dup <- dups) { 264 AddPipelineReg(arb.io.out, dup, false.B) 265 } 266 AddPipelineReg(arb.io.out, out, false.B) 267 } 268 269 def rrArbiter[T <: Bundle]( 270 in: Seq[DecoupledIO[T]], 271 out: DecoupledIO[T], 272 name: Option[String] = None): Unit = { 273 val arb = Module(new RRArbiter[T](chiselTypeOf(out.bits), in.size)) 274 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 275 for ((a, req) <- arb.io.in.zip(in)) { 276 a <> req 277 } 278 out <> arb.io.out 279 } 280 281 def fastArbiter[T <: Bundle]( 282 in: Seq[DecoupledIO[T]], 283 out: DecoupledIO[T], 284 name: Option[String] = None): Unit = { 285 val arb = Module(new FastArbiter[T](chiselTypeOf(out.bits), in.size)) 286 if (name.nonEmpty) { arb.suggestName(s"${name.get}_arb") } 287 for ((a, req) <- arb.io.in.zip(in)) { 288 a <> req 289 } 290 out <> arb.io.out 291 } 292 293 val numReplaceRespPorts = 2 294 295 require(isPow2(nSets), s"nSets($nSets) must be pow2") 296 require(isPow2(nWays), s"nWays($nWays) must be pow2") 297 require(full_divide(rowBits, wordBits), s"rowBits($rowBits) must be multiple of wordBits($wordBits)") 298 require(full_divide(beatBits, rowBits), s"beatBits($beatBits) must be multiple of rowBits($rowBits)") 299} 300 301abstract class DCacheModule(implicit p: Parameters) extends L1CacheModule 302 with HasDCacheParameters 303 304abstract class DCacheBundle(implicit p: Parameters) extends L1CacheBundle 305 with HasDCacheParameters 306 307class ReplacementAccessBundle(implicit p: Parameters) extends DCacheBundle { 308 val set = UInt(log2Up(nSets).W) 309 val way = UInt(log2Up(nWays).W) 310} 311 312class ReplacementWayReqIO(implicit p: Parameters) extends DCacheBundle { 313 val set = ValidIO(UInt(log2Up(nSets).W)) 314 val dmWay = Output(UInt(log2Up(nWays).W)) 315 val way = Input(UInt(log2Up(nWays).W)) 316} 317 318class DCacheExtraMeta(implicit p: Parameters) extends DCacheBundle 319{ 320 val error = Bool() // cache line has been marked as corrupted by l2 / ecc error detected when store 321 val prefetch = Bool() // cache line is first required by prefetch 322 val access = Bool() // cache line has been accessed by load / store 323 324 // val debug_access_timestamp = UInt(64.W) // last time a load / store / refill access that cacheline 325} 326 327// memory request in word granularity(load, mmio, lr/sc, atomics) 328class DCacheWordReq(implicit p: Parameters) extends DCacheBundle 329{ 330 val cmd = UInt(M_SZ.W) 331 val vaddr = UInt(VAddrBits.W) 332 val data = UInt(VLEN.W) 333 val mask = UInt((VLEN/8).W) 334 val id = UInt(reqIdWidth.W) 335 val instrtype = UInt(sourceTypeWidth.W) 336 val isFirstIssue = Bool() 337 val replayCarry = new ReplayCarry(nWays) 338 339 val debug_robIdx = UInt(log2Ceil(RobSize).W) 340 def dump() = { 341 XSDebug("DCacheWordReq: cmd: %x vaddr: %x data: %x mask: %x id: %d\n", 342 cmd, vaddr, data, mask, id) 343 } 344} 345 346// memory request in word granularity(store) 347class DCacheLineReq(implicit p: Parameters) extends DCacheBundle 348{ 349 val cmd = UInt(M_SZ.W) 350 val vaddr = UInt(VAddrBits.W) 351 val addr = UInt(PAddrBits.W) 352 val data = UInt((cfg.blockBytes * 8).W) 353 val mask = UInt(cfg.blockBytes.W) 354 val id = UInt(reqIdWidth.W) 355 def dump() = { 356 XSDebug("DCacheLineReq: cmd: %x addr: %x data: %x mask: %x id: %d\n", 357 cmd, addr, data, mask, id) 358 } 359 def idx: UInt = get_idx(vaddr) 360} 361 362class DCacheWordReqWithVaddr(implicit p: Parameters) extends DCacheWordReq { 363 val addr = UInt(PAddrBits.W) 364 val wline = Bool() 365} 366 367class BaseDCacheWordResp(implicit p: Parameters) extends DCacheBundle 368{ 369 // read in s2 370 val data = UInt(VLEN.W) 371 // select in s3 372 val data_delayed = UInt(VLEN.W) 373 val id = UInt(reqIdWidth.W) 374 // cache req missed, send it to miss queue 375 val miss = Bool() 376 // cache miss, and failed to enter the missqueue, replay from RS is needed 377 val replay = Bool() 378 val replayCarry = new ReplayCarry(nWays) 379 // data has been corrupted 380 val tag_error = Bool() // tag error 381 val mshr_id = UInt(log2Up(cfg.nMissEntries).W) 382 383 val debug_robIdx = UInt(log2Ceil(RobSize).W) 384 def dump() = { 385 XSDebug("DCacheWordResp: data: %x id: %d miss: %b replay: %b\n", 386 data, id, miss, replay) 387 } 388} 389 390class DCacheWordResp(implicit p: Parameters) extends BaseDCacheWordResp 391{ 392 val meta_prefetch = Bool() 393 val meta_access = Bool() 394 // s2 395 val handled = Bool() 396 // s3: 1 cycle after data resp 397 val error_delayed = Bool() // all kinds of errors, include tag error 398 val replacementUpdated = Bool() 399} 400 401class BankedDCacheWordResp(implicit p: Parameters) extends DCacheWordResp 402{ 403 val bank_data = Vec(DCacheBanks, Bits(DCacheSRAMRowBits.W)) 404 val bank_oh = UInt(DCacheBanks.W) 405} 406 407class DCacheWordRespWithError(implicit p: Parameters) extends BaseDCacheWordResp 408{ 409 val error = Bool() // all kinds of errors, include tag error 410} 411 412class DCacheLineResp(implicit p: Parameters) extends DCacheBundle 413{ 414 val data = UInt((cfg.blockBytes * 8).W) 415 // cache req missed, send it to miss queue 416 val miss = Bool() 417 // cache req nacked, replay it later 418 val replay = Bool() 419 val id = UInt(reqIdWidth.W) 420 def dump() = { 421 XSDebug("DCacheLineResp: data: %x id: %d miss: %b replay: %b\n", 422 data, id, miss, replay) 423 } 424} 425 426class Refill(implicit p: Parameters) extends DCacheBundle 427{ 428 val addr = UInt(PAddrBits.W) 429 val data = UInt(l1BusDataWidth.W) 430 val error = Bool() // refilled data has been corrupted 431 // for debug usage 432 val data_raw = UInt((cfg.blockBytes * 8).W) 433 val hasdata = Bool() 434 val refill_done = Bool() 435 def dump() = { 436 XSDebug("Refill: addr: %x data: %x\n", addr, data) 437 } 438 val id = UInt(log2Up(cfg.nMissEntries).W) 439} 440 441class Release(implicit p: Parameters) extends DCacheBundle 442{ 443 val paddr = UInt(PAddrBits.W) 444 def dump() = { 445 XSDebug("Release: paddr: %x\n", paddr(PAddrBits-1, DCacheTagOffset)) 446 } 447} 448 449class DCacheWordIO(implicit p: Parameters) extends DCacheBundle 450{ 451 val req = DecoupledIO(new DCacheWordReq) 452 val resp = Flipped(DecoupledIO(new DCacheWordResp)) 453} 454 455 456class UncacheWordReq(implicit p: Parameters) extends DCacheBundle 457{ 458 val cmd = UInt(M_SZ.W) 459 val addr = UInt(PAddrBits.W) 460 val data = UInt(XLEN.W) 461 val mask = UInt((XLEN/8).W) 462 val id = UInt(uncacheIdxBits.W) 463 val instrtype = UInt(sourceTypeWidth.W) 464 val atomic = Bool() 465 val isFirstIssue = Bool() 466 val replayCarry = new ReplayCarry(nWays) 467 468 def dump() = { 469 XSDebug("UncacheWordReq: cmd: %x addr: %x data: %x mask: %x id: %d\n", 470 cmd, addr, data, mask, id) 471 } 472} 473 474class UncacheWordResp(implicit p: Parameters) extends DCacheBundle 475{ 476 val data = UInt(XLEN.W) 477 val data_delayed = UInt(XLEN.W) 478 val id = UInt(uncacheIdxBits.W) 479 val miss = Bool() 480 val replay = Bool() 481 val tag_error = Bool() 482 val error = Bool() 483 val replayCarry = new ReplayCarry(nWays) 484 val mshr_id = UInt(log2Up(cfg.nMissEntries).W) // FIXME: why uncacheWordResp is not merged to baseDcacheResp 485 486 val debug_robIdx = UInt(log2Ceil(RobSize).W) 487 def dump() = { 488 XSDebug("UncacheWordResp: data: %x id: %d miss: %b replay: %b, tag_error: %b, error: %b\n", 489 data, id, miss, replay, tag_error, error) 490 } 491} 492 493class UncacheWordIO(implicit p: Parameters) extends DCacheBundle 494{ 495 val req = DecoupledIO(new UncacheWordReq) 496 val resp = Flipped(DecoupledIO(new UncacheWordResp)) 497} 498 499class AtomicsResp(implicit p: Parameters) extends DCacheBundle { 500 val data = UInt(DataBits.W) 501 val miss = Bool() 502 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 503 val replay = Bool() 504 val error = Bool() 505 506 val ack_miss_queue = Bool() 507 508 val id = UInt(reqIdWidth.W) 509} 510 511class AtomicWordIO(implicit p: Parameters) extends DCacheBundle 512{ 513 val req = DecoupledIO(new MainPipeReq) 514 val resp = Flipped(ValidIO(new AtomicsResp)) 515 val block_lr = Input(Bool()) 516} 517 518// used by load unit 519class DCacheLoadIO(implicit p: Parameters) extends DCacheWordIO 520{ 521 // kill previous cycle's req 522 val s1_kill = Output(Bool()) 523 val s2_kill = Output(Bool()) 524 val s0_pc = Output(UInt(VAddrBits.W)) 525 val s1_pc = Output(UInt(VAddrBits.W)) 526 val s2_pc = Output(UInt(VAddrBits.W)) 527 // cycle 0: load has updated replacement before 528 val replacementUpdated = Output(Bool()) 529 // cycle 0: virtual address: req.addr 530 // cycle 1: physical address: s1_paddr 531 val s1_paddr_dup_lsu = Output(UInt(PAddrBits.W)) // lsu side paddr 532 val s1_paddr_dup_dcache = Output(UInt(PAddrBits.W)) // dcache side paddr 533 val s1_disable_fast_wakeup = Input(Bool()) 534 // cycle 2: hit signal 535 val s2_hit = Input(Bool()) // hit signal for lsu, 536 val s2_first_hit = Input(Bool()) 537 val s2_bank_conflict = Input(Bool()) 538 val s2_wpu_pred_fail = Input(Bool()) 539 val s2_mq_nack = Input(Bool()) 540 541 // debug 542 val debug_s1_hit_way = Input(UInt(nWays.W)) 543 val debug_s2_pred_way_num = Input(UInt(XLEN.W)) 544 val debug_s2_dm_way_num = Input(UInt(XLEN.W)) 545 val debug_s2_real_way_num = Input(UInt(XLEN.W)) 546} 547 548class DCacheLineIO(implicit p: Parameters) extends DCacheBundle 549{ 550 val req = DecoupledIO(new DCacheLineReq) 551 val resp = Flipped(DecoupledIO(new DCacheLineResp)) 552} 553 554class DCacheToSbufferIO(implicit p: Parameters) extends DCacheBundle { 555 // sbuffer will directly send request to dcache main pipe 556 val req = Flipped(Decoupled(new DCacheLineReq)) 557 558 val main_pipe_hit_resp = ValidIO(new DCacheLineResp) 559 val refill_hit_resp = ValidIO(new DCacheLineResp) 560 561 val replay_resp = ValidIO(new DCacheLineResp) 562 563 def hit_resps: Seq[ValidIO[DCacheLineResp]] = Seq(main_pipe_hit_resp, refill_hit_resp) 564} 565 566// forward tilelink channel D's data to ldu 567class DcacheToLduForwardIO(implicit p: Parameters) extends DCacheBundle { 568 val valid = Bool() 569 val data = UInt(l1BusDataWidth.W) 570 val mshrid = UInt(log2Up(cfg.nMissEntries).W) 571 val last = Bool() 572 573 def apply(req_valid : Bool, req_data : UInt, req_mshrid : UInt, req_last : Bool) = { 574 valid := req_valid 575 data := req_data 576 mshrid := req_mshrid 577 last := req_last 578 } 579 580 def dontCare() = { 581 valid := false.B 582 data := DontCare 583 mshrid := DontCare 584 last := DontCare 585 } 586 587 def forward(req_valid : Bool, req_mshr_id : UInt, req_paddr : UInt) = { 588 val all_match = req_valid && valid && 589 req_mshr_id === mshrid && 590 req_paddr(log2Up(refillBytes)) === last 591 592 val forward_D = RegInit(false.B) 593 val forwardData = RegInit(VecInit(List.fill(VLEN/8)(0.U(8.W)))) 594 595 val block_idx = req_paddr(log2Up(refillBytes) - 1, 3) 596 val block_data = Wire(Vec(l1BusDataWidth / 64, UInt(64.W))) 597 (0 until l1BusDataWidth / 64).map(i => { 598 block_data(i) := data(64 * i + 63, 64 * i) 599 }) 600 val selected_data = Wire(UInt(128.W)) 601 selected_data := Mux(req_paddr(3), Fill(2, block_data(block_idx)), Cat(block_data(block_idx + 1.U), block_data(block_idx))) 602 603 forward_D := all_match 604 for (i <- 0 until VLEN/8) { 605 forwardData(i) := selected_data(8 * i + 7, 8 * i) 606 } 607 608 (forward_D, forwardData) 609 } 610} 611 612class MissEntryForwardIO(implicit p: Parameters) extends DCacheBundle { 613 val inflight = Bool() 614 val paddr = UInt(PAddrBits.W) 615 val raw_data = Vec(blockRows, UInt(rowBits.W)) 616 val firstbeat_valid = Bool() 617 val lastbeat_valid = Bool() 618 619 def apply(mshr_valid : Bool, mshr_paddr : UInt, mshr_rawdata : Vec[UInt], mshr_first_valid : Bool, mshr_last_valid : Bool) = { 620 inflight := mshr_valid 621 paddr := mshr_paddr 622 raw_data := mshr_rawdata 623 firstbeat_valid := mshr_first_valid 624 lastbeat_valid := mshr_last_valid 625 } 626 627 // check if we can forward from mshr or D channel 628 def check(req_valid : Bool, req_paddr : UInt) = { 629 RegNext(req_valid && inflight && req_paddr(PAddrBits - 1, blockOffBits) === paddr(PAddrBits - 1, blockOffBits)) 630 } 631 632 def forward(req_valid : Bool, req_paddr : UInt) = { 633 val all_match = (req_paddr(log2Up(refillBytes)) === 0.U && firstbeat_valid) || 634 (req_paddr(log2Up(refillBytes)) === 1.U && lastbeat_valid) 635 636 val forward_mshr = RegInit(false.B) 637 val forwardData = RegInit(VecInit(List.fill(VLEN/8)(0.U(8.W)))) 638 639 val block_idx = req_paddr(log2Up(refillBytes), 3) 640 val block_data = raw_data 641 642 val selected_data = Wire(UInt(128.W)) 643 selected_data := Mux(req_paddr(3), Fill(2, block_data(block_idx)), Cat(block_data(block_idx + 1.U), block_data(block_idx))) 644 645 forward_mshr := all_match 646 for (i <- 0 until VLEN/8) { 647 forwardData(i) := selected_data(8 * i + 7, 8 * i) 648 } 649 650 (forward_mshr, forwardData) 651 } 652} 653 654// forward mshr's data to ldu 655class LduToMissqueueForwardIO(implicit p: Parameters) extends DCacheBundle { 656 // req 657 val valid = Input(Bool()) 658 val mshrid = Input(UInt(log2Up(cfg.nMissEntries).W)) 659 val paddr = Input(UInt(PAddrBits.W)) 660 // resp 661 val forward_mshr = Output(Bool()) 662 val forwardData = Output(Vec(VLEN/8, UInt(8.W))) 663 val forward_result_valid = Output(Bool()) 664 665 def connect(sink: LduToMissqueueForwardIO) = { 666 sink.valid := valid 667 sink.mshrid := mshrid 668 sink.paddr := paddr 669 forward_mshr := sink.forward_mshr 670 forwardData := sink.forwardData 671 forward_result_valid := sink.forward_result_valid 672 } 673 674 def forward() = { 675 (forward_result_valid, forward_mshr, forwardData) 676 } 677} 678 679class DCacheToLsuIO(implicit p: Parameters) extends DCacheBundle { 680 val load = Vec(LoadPipelineWidth, Flipped(new DCacheLoadIO)) // for speculative load 681 val lsq = ValidIO(new Refill) // refill to load queue, wake up load misses 682 val tl_d_channel = Output(new DcacheToLduForwardIO) 683 val store = new DCacheToSbufferIO // for sbuffer 684 val atomics = Flipped(new AtomicWordIO) // atomics reqs 685 val release = ValidIO(new Release) // cacheline release hint for ld-ld violation check 686 val forward_D = Output(Vec(LoadPipelineWidth, new DcacheToLduForwardIO)) 687 val forward_mshr = Vec(LoadPipelineWidth, new LduToMissqueueForwardIO) 688} 689 690class DCacheIO(implicit p: Parameters) extends DCacheBundle { 691 val hartId = Input(UInt(8.W)) 692 val l2_pf_store_only = Input(Bool()) 693 val lsu = new DCacheToLsuIO 694 val csr = new L1CacheToCsrIO 695 val error = new L1CacheErrorInfo 696 val mshrFull = Output(Bool()) 697 val force_write = Input(Bool()) 698} 699 700 701class DCache()(implicit p: Parameters) extends LazyModule with HasDCacheParameters { 702 703 val reqFields: Seq[BundleFieldBase] = Seq( 704 PrefetchField(), 705 ReqSourceField(), 706 VaddrField(VAddrBits - blockOffBits), 707 ) ++ cacheParams.aliasBitsOpt.map(AliasField) 708 val echoFields: Seq[BundleFieldBase] = Nil 709 710 val clientParameters = TLMasterPortParameters.v1( 711 Seq(TLMasterParameters.v1( 712 name = "dcache", 713 sourceId = IdRange(0, nEntries + 1), 714 supportsProbe = TransferSizes(cfg.blockBytes) 715 )), 716 requestFields = reqFields, 717 echoFields = echoFields 718 ) 719 720 val clientNode = TLClientNode(Seq(clientParameters)) 721 722 lazy val module = new DCacheImp(this) 723} 724 725 726class DCacheImp(outer: DCache) extends LazyModuleImp(outer) with HasDCacheParameters with HasPerfEvents { 727 728 val io = IO(new DCacheIO) 729 730 val (bus, edge) = outer.clientNode.out.head 731 require(bus.d.bits.data.getWidth == l1BusDataWidth, "DCache: tilelink width does not match") 732 733 println("DCache:") 734 println(" DCacheSets: " + DCacheSets) 735 println(" DCacheSetDiv: " + DCacheSetDiv) 736 println(" DCacheWays: " + DCacheWays) 737 println(" DCacheBanks: " + DCacheBanks) 738 println(" DCacheSRAMRowBits: " + DCacheSRAMRowBits) 739 println(" DCacheWordOffset: " + DCacheWordOffset) 740 println(" DCacheBankOffset: " + DCacheBankOffset) 741 println(" DCacheSetOffset: " + DCacheSetOffset) 742 println(" DCacheTagOffset: " + DCacheTagOffset) 743 println(" DCacheAboveIndexOffset: " + DCacheAboveIndexOffset) 744 println(" WPUEnable: " + dwpuParam.enWPU) 745 println(" WPUEnableCfPred: " + dwpuParam.enCfPred) 746 println(" WPUAlgorithm: " + dwpuParam.algoName) 747 748 //---------------------------------------- 749 // core data structures 750 val bankedDataArray = if(dwpuParam.enWPU) Module(new SramedDataArray) else Module(new BankedDataArray) 751 val metaArray = Module(new L1CohMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = 2)) 752 val errorArray = Module(new L1FlagMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = 2)) 753 val prefetchArray = Module(new L1FlagMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = 2)) // prefetch flag array 754 val accessArray = Module(new L1FlagMetaArray(readPorts = LoadPipelineWidth + 1, writePorts = LoadPipelineWidth + 2)) 755 val tagArray = Module(new DuplicatedTagArray(readPorts = LoadPipelineWidth + 1)) 756 bankedDataArray.dump() 757 758 //---------------------------------------- 759 // core modules 760 val ldu = Seq.tabulate(LoadPipelineWidth)({ i => Module(new LoadPipe(i))}) 761 // val atomicsReplayUnit = Module(new AtomicsReplayEntry) 762 val mainPipe = Module(new MainPipe) 763 val refillPipe = Module(new RefillPipe) 764 val missQueue = Module(new MissQueue(edge)) 765 val probeQueue = Module(new ProbeQueue(edge)) 766 val wb = Module(new WritebackQueue(edge)) 767 768 missQueue.io.hartId := io.hartId 769 missQueue.io.l2_pf_store_only := RegNext(io.l2_pf_store_only, false.B) 770 771 val errors = ldu.map(_.io.error) ++ // load error 772 Seq(mainPipe.io.error) // store / misc error 773 io.error <> RegNext(Mux1H(errors.map(e => RegNext(e.valid) -> RegNext(e)))) 774 775 //---------------------------------------- 776 // meta array 777 778 // read / write coh meta 779 val meta_read_ports = ldu.map(_.io.meta_read) ++ 780 Seq(mainPipe.io.meta_read) 781 val meta_resp_ports = ldu.map(_.io.meta_resp) ++ 782 Seq(mainPipe.io.meta_resp) 783 val meta_write_ports = Seq( 784 mainPipe.io.meta_write, 785 refillPipe.io.meta_write 786 ) 787 meta_read_ports.zip(metaArray.io.read).foreach { case (p, r) => r <> p } 788 meta_resp_ports.zip(metaArray.io.resp).foreach { case (p, r) => p := r } 789 meta_write_ports.zip(metaArray.io.write).foreach { case (p, w) => w <> p } 790 791 // read extra meta 792 meta_read_ports.zip(errorArray.io.read).foreach { case (p, r) => r <> p } 793 meta_read_ports.zip(prefetchArray.io.read).foreach { case (p, r) => r <> p } 794 meta_read_ports.zip(accessArray.io.read).foreach { case (p, r) => r <> p } 795 val extra_meta_resp_ports = ldu.map(_.io.extra_meta_resp) ++ 796 Seq(mainPipe.io.extra_meta_resp) 797 extra_meta_resp_ports.zip(errorArray.io.resp).foreach { case (p, r) => { 798 (0 until nWays).map(i => { p(i).error := r(i) }) 799 }} 800 extra_meta_resp_ports.zip(prefetchArray.io.resp).foreach { case (p, r) => { 801 (0 until nWays).map(i => { p(i).prefetch := r(i) }) 802 }} 803 extra_meta_resp_ports.zip(accessArray.io.resp).foreach { case (p, r) => { 804 (0 until nWays).map(i => { p(i).access := r(i) }) 805 }} 806 807 // write extra meta 808 val error_flag_write_ports = Seq( 809 mainPipe.io.error_flag_write, // error flag generated by corrupted store 810 refillPipe.io.error_flag_write // corrupted signal from l2 811 ) 812 error_flag_write_ports.zip(errorArray.io.write).foreach { case (p, w) => w <> p } 813 814 val prefetch_flag_write_ports = Seq( 815 mainPipe.io.prefetch_flag_write, // set prefetch_flag to false if coh is set to Nothing 816 refillPipe.io.prefetch_flag_write // refill required by prefetch will set prefetch_flag 817 ) 818 prefetch_flag_write_ports.zip(prefetchArray.io.write).foreach { case (p, w) => w <> p } 819 820 val access_flag_write_ports = ldu.map(_.io.access_flag_write) ++ Seq( 821 mainPipe.io.access_flag_write, 822 refillPipe.io.access_flag_write 823 ) 824 access_flag_write_ports.zip(accessArray.io.write).foreach { case (p, w) => w <> p } 825 826 //---------------------------------------- 827 // tag array 828 require(tagArray.io.read.size == (ldu.size + 1)) 829 val tag_write_intend = missQueue.io.refill_pipe_req.valid || mainPipe.io.tag_write_intend 830 assert(!RegNext(!tag_write_intend && tagArray.io.write.valid)) 831 ldu.zipWithIndex.foreach { 832 case (ld, i) => 833 tagArray.io.read(i) <> ld.io.tag_read 834 ld.io.tag_resp := tagArray.io.resp(i) 835 ld.io.tag_read.ready := !tag_write_intend 836 } 837 tagArray.io.read.last <> mainPipe.io.tag_read 838 mainPipe.io.tag_resp := tagArray.io.resp.last 839 840 val fake_tag_read_conflict_this_cycle = PopCount(ldu.map(ld=> ld.io.tag_read.valid)) 841 XSPerfAccumulate("fake_tag_read_conflict", fake_tag_read_conflict_this_cycle) 842 843 val tag_write_arb = Module(new Arbiter(new TagWriteReq, 2)) 844 tag_write_arb.io.in(0) <> refillPipe.io.tag_write 845 tag_write_arb.io.in(1) <> mainPipe.io.tag_write 846 tagArray.io.write <> tag_write_arb.io.out 847 848 ldu.map(m => { 849 m.io.vtag_update.valid := tagArray.io.write.valid 850 m.io.vtag_update.bits := tagArray.io.write.bits 851 }) 852 853 //---------------------------------------- 854 // data array 855 mainPipe.io.data_read.zip(ldu).map(x => x._1 := x._2.io.lsu.req.valid) 856 857 val dataWriteArb = Module(new Arbiter(new L1BankedDataWriteReq, 2)) 858 dataWriteArb.io.in(0) <> refillPipe.io.data_write 859 dataWriteArb.io.in(1) <> mainPipe.io.data_write 860 861 bankedDataArray.io.write <> dataWriteArb.io.out 862 863 for (bank <- 0 until DCacheBanks) { 864 val dataWriteArb_dup = Module(new Arbiter(new L1BankedDataWriteReqCtrl, 2)) 865 dataWriteArb_dup.io.in(0).valid := refillPipe.io.data_write_dup(bank).valid 866 dataWriteArb_dup.io.in(0).bits := refillPipe.io.data_write_dup(bank).bits 867 dataWriteArb_dup.io.in(1).valid := mainPipe.io.data_write_dup(bank).valid 868 dataWriteArb_dup.io.in(1).bits := mainPipe.io.data_write_dup(bank).bits 869 870 bankedDataArray.io.write_dup(bank) <> dataWriteArb_dup.io.out 871 } 872 873 bankedDataArray.io.readline <> mainPipe.io.data_readline 874 bankedDataArray.io.readline_intend := mainPipe.io.data_read_intend 875 mainPipe.io.readline_error_delayed := bankedDataArray.io.readline_error_delayed 876 mainPipe.io.data_resp := bankedDataArray.io.readline_resp 877 878 (0 until LoadPipelineWidth).map(i => { 879 bankedDataArray.io.read(i) <> ldu(i).io.banked_data_read 880 bankedDataArray.io.is128Req(i) <> ldu(i).io.is128Req 881 bankedDataArray.io.read_error_delayed(i) <> ldu(i).io.read_error_delayed 882 883 ldu(i).io.banked_data_resp := bankedDataArray.io.read_resp_delayed(i) 884 885 ldu(i).io.bank_conflict_slow := bankedDataArray.io.bank_conflict_slow(i) 886 }) 887 888 (0 until LoadPipelineWidth).map(i => { 889 val (_, _, done, _) = edge.count(bus.d) 890 when(bus.d.bits.opcode === TLMessages.GrantData) { 891 io.lsu.forward_D(i).apply(bus.d.valid, bus.d.bits.data, bus.d.bits.source, done) 892 }.otherwise { 893 io.lsu.forward_D(i).dontCare() 894 } 895 }) 896 // tl D channel wakeup 897 val (_, _, done, _) = edge.count(bus.d) 898 when (bus.d.bits.opcode === TLMessages.GrantData || bus.d.bits.opcode === TLMessages.Grant) { 899 io.lsu.tl_d_channel.apply(bus.d.valid, bus.d.bits.data, bus.d.bits.source, done) 900 } .otherwise { 901 io.lsu.tl_d_channel.dontCare() 902 } 903 mainPipe.io.force_write <> io.force_write 904 905 /** dwpu */ 906 val dwpu = Module(new DCacheWpuWrapper(LoadPipelineWidth)) 907 for(i <- 0 until LoadPipelineWidth){ 908 dwpu.io.req(i) <> ldu(i).io.dwpu.req(0) 909 dwpu.io.resp(i) <> ldu(i).io.dwpu.resp(0) 910 dwpu.io.lookup_upd(i) <> ldu(i).io.dwpu.lookup_upd(0) 911 dwpu.io.cfpred(i) <> ldu(i).io.dwpu.cfpred(0) 912 } 913 dwpu.io.tagwrite_upd.valid := tagArray.io.write.valid 914 dwpu.io.tagwrite_upd.bits.vaddr := tagArray.io.write.bits.vaddr 915 dwpu.io.tagwrite_upd.bits.s1_real_way_en := tagArray.io.write.bits.way_en 916 917 //---------------------------------------- 918 // load pipe 919 // the s1 kill signal 920 // only lsu uses this, replay never kills 921 for (w <- 0 until LoadPipelineWidth) { 922 ldu(w).io.lsu <> io.lsu.load(w) 923 924 // TODO:when have load128Req 925 ldu(w).io.load128Req := false.B 926 927 // replay and nack not needed anymore 928 // TODO: remove replay and nack 929 ldu(w).io.nack := false.B 930 931 ldu(w).io.disable_ld_fast_wakeup := 932 bankedDataArray.io.disable_ld_fast_wakeup(w) // load pipe fast wake up should be disabled when bank conflict 933 } 934 935 /** LoadMissDB: record load miss state */ 936 val isWriteLoadMissTable = WireInit(Constantin.createRecord("isWriteLoadMissTable" + p(XSCoreParamsKey).HartId.toString)) 937 val isFirstHitWrite = WireInit(Constantin.createRecord("isFirstHitWrite" + p(XSCoreParamsKey).HartId.toString)) 938 val tableName = "LoadMissDB" + p(XSCoreParamsKey).HartId.toString 939 val siteName = "DcacheWrapper" + p(XSCoreParamsKey).HartId.toString 940 val loadMissTable = ChiselDB.createTable(tableName, new LoadMissEntry) 941 for( i <- 0 until LoadPipelineWidth){ 942 val loadMissEntry = Wire(new LoadMissEntry) 943 val loadMissWriteEn = 944 (!ldu(i).io.lsu.resp.bits.replay && ldu(i).io.miss_req.fire) || 945 (ldu(i).io.lsu.s2_first_hit && ldu(i).io.lsu.resp.valid && isFirstHitWrite.orR) 946 loadMissEntry.timeCnt := GTimer() 947 loadMissEntry.robIdx := ldu(i).io.lsu.resp.bits.debug_robIdx 948 loadMissEntry.paddr := ldu(i).io.miss_req.bits.addr 949 loadMissEntry.vaddr := ldu(i).io.miss_req.bits.vaddr 950 loadMissEntry.missState := OHToUInt(Cat(Seq( 951 ldu(i).io.miss_req.fire & ldu(i).io.miss_resp.merged, 952 ldu(i).io.miss_req.fire & !ldu(i).io.miss_resp.merged, 953 ldu(i).io.lsu.s2_first_hit && ldu(i).io.lsu.resp.valid 954 ))) 955 loadMissTable.log( 956 data = loadMissEntry, 957 en = isWriteLoadMissTable.orR && loadMissWriteEn, 958 site = siteName, 959 clock = clock, 960 reset = reset 961 ) 962 } 963 964 val isWriteLoadAccessTable = WireInit(Constantin.createRecord("isWriteLoadAccessTable" + p(XSCoreParamsKey).HartId.toString)) 965 val loadAccessTable = ChiselDB.createTable("LoadAccessDB" + p(XSCoreParamsKey).HartId.toString, new LoadAccessEntry) 966 for (i <- 0 until LoadPipelineWidth) { 967 val loadAccessEntry = Wire(new LoadAccessEntry) 968 loadAccessEntry.timeCnt := GTimer() 969 loadAccessEntry.robIdx := ldu(i).io.lsu.resp.bits.debug_robIdx 970 loadAccessEntry.paddr := ldu(i).io.miss_req.bits.addr 971 loadAccessEntry.vaddr := ldu(i).io.miss_req.bits.vaddr 972 loadAccessEntry.missState := OHToUInt(Cat(Seq( 973 ldu(i).io.miss_req.fire & ldu(i).io.miss_resp.merged, 974 ldu(i).io.miss_req.fire & !ldu(i).io.miss_resp.merged, 975 ldu(i).io.lsu.s2_first_hit && ldu(i).io.lsu.resp.valid 976 ))) 977 loadAccessEntry.pred_way_num := ldu(i).io.lsu.debug_s2_pred_way_num 978 loadAccessEntry.real_way_num := ldu(i).io.lsu.debug_s2_real_way_num 979 loadAccessEntry.dm_way_num := ldu(i).io.lsu.debug_s2_dm_way_num 980 loadAccessTable.log( 981 data = loadAccessEntry, 982 en = isWriteLoadAccessTable.orR && ldu(i).io.lsu.resp.valid, 983 site = siteName + "_loadpipe" + i.toString, 984 clock = clock, 985 reset = reset 986 ) 987 } 988 989 //---------------------------------------- 990 // atomics 991 // atomics not finished yet 992 // io.lsu.atomics <> atomicsReplayUnit.io.lsu 993 io.lsu.atomics.resp := RegNext(mainPipe.io.atomic_resp) 994 io.lsu.atomics.block_lr := mainPipe.io.block_lr 995 // atomicsReplayUnit.io.pipe_resp := RegNext(mainPipe.io.atomic_resp) 996 // atomicsReplayUnit.io.block_lr <> mainPipe.io.block_lr 997 998 //---------------------------------------- 999 // miss queue 1000 val MissReqPortCount = LoadPipelineWidth + 1 1001 val MainPipeMissReqPort = 0 1002 1003 // Request 1004 val missReqArb = Module(new ArbiterFilterByCacheLineAddr(new MissReq, MissReqPortCount, blockOffBits, PAddrBits)) 1005 1006 missReqArb.io.in(MainPipeMissReqPort) <> mainPipe.io.miss_req 1007 for (w <- 0 until LoadPipelineWidth) { missReqArb.io.in(w + 1) <> ldu(w).io.miss_req } 1008 1009 for (w <- 0 until LoadPipelineWidth) { ldu(w).io.miss_resp := missQueue.io.resp } 1010 mainPipe.io.miss_resp := missQueue.io.resp 1011 1012 wb.io.miss_req.valid := missReqArb.io.out.valid 1013 wb.io.miss_req.bits := missReqArb.io.out.bits.addr 1014 1015 // block_decoupled(missReqArb.io.out, missQueue.io.req, wb.io.block_miss_req) 1016 missReqArb.io.out <> missQueue.io.req 1017 when(wb.io.block_miss_req) { 1018 missQueue.io.req.bits.cancel := true.B 1019 missReqArb.io.out.ready := false.B 1020 } 1021 1022 for (w <- 0 until LoadPipelineWidth) { ldu(w).io.mq_enq_cancel := missQueue.io.mq_enq_cancel } 1023 1024 XSPerfAccumulate("miss_queue_fire", PopCount(VecInit(missReqArb.io.in.map(_.fire))) >= 1.U) 1025 XSPerfAccumulate("miss_queue_muti_fire", PopCount(VecInit(missReqArb.io.in.map(_.fire))) > 1.U) 1026 1027 XSPerfAccumulate("miss_queue_has_enq_req", PopCount(VecInit(missReqArb.io.in.map(_.valid))) >= 1.U) 1028 XSPerfAccumulate("miss_queue_has_muti_enq_req", PopCount(VecInit(missReqArb.io.in.map(_.valid))) > 1.U) 1029 XSPerfAccumulate("miss_queue_has_muti_enq_but_not_fire", PopCount(VecInit(missReqArb.io.in.map(_.valid))) > 1.U && PopCount(VecInit(missReqArb.io.in.map(_.fire))) === 0.U) 1030 1031 // forward missqueue 1032 (0 until LoadPipelineWidth).map(i => io.lsu.forward_mshr(i).connect(missQueue.io.forward(i))) 1033 1034 // refill to load queue 1035 io.lsu.lsq <> missQueue.io.refill_to_ldq 1036 1037 // tilelink stuff 1038 bus.a <> missQueue.io.mem_acquire 1039 bus.e <> missQueue.io.mem_finish 1040 missQueue.io.probe_addr := bus.b.bits.address 1041 1042 missQueue.io.main_pipe_resp := RegNext(mainPipe.io.atomic_resp) 1043 1044 //---------------------------------------- 1045 // probe 1046 // probeQueue.io.mem_probe <> bus.b 1047 block_decoupled(bus.b, probeQueue.io.mem_probe, missQueue.io.probe_block) 1048 probeQueue.io.lrsc_locked_block <> mainPipe.io.lrsc_locked_block 1049 probeQueue.io.update_resv_set <> mainPipe.io.update_resv_set 1050 1051 //---------------------------------------- 1052 // mainPipe 1053 // when a req enters main pipe, if it is set-conflict with replace pipe or refill pipe, 1054 // block the req in main pipe 1055 block_decoupled(probeQueue.io.pipe_req, mainPipe.io.probe_req, missQueue.io.refill_pipe_req.valid) 1056 block_decoupled(io.lsu.store.req, mainPipe.io.store_req, refillPipe.io.req.valid) 1057 1058 io.lsu.store.replay_resp := RegNext(mainPipe.io.store_replay_resp) 1059 io.lsu.store.main_pipe_hit_resp := mainPipe.io.store_hit_resp 1060 1061 arbiter_with_pipereg( 1062 in = Seq(missQueue.io.main_pipe_req, io.lsu.atomics.req), 1063 out = mainPipe.io.atomic_req, 1064 name = Some("main_pipe_atomic_req") 1065 ) 1066 1067 mainPipe.io.invalid_resv_set := RegNext(wb.io.req.fire && wb.io.req.bits.addr === mainPipe.io.lrsc_locked_block.bits) 1068 1069 //---------------------------------------- 1070 // replace (main pipe) 1071 val mpStatus = mainPipe.io.status 1072 mainPipe.io.replace_req <> missQueue.io.replace_pipe_req 1073 missQueue.io.replace_pipe_resp := mainPipe.io.replace_resp 1074 1075 //---------------------------------------- 1076 // refill pipe 1077 val refillShouldBeBlocked = (mpStatus.s1.valid && mpStatus.s1.bits.set === missQueue.io.refill_pipe_req.bits.idx) || 1078 Cat(Seq(mpStatus.s2, mpStatus.s3).map(s => 1079 s.valid && 1080 s.bits.set === missQueue.io.refill_pipe_req.bits.idx && 1081 s.bits.way_en === missQueue.io.refill_pipe_req.bits.way_en 1082 )).orR 1083 block_decoupled(missQueue.io.refill_pipe_req, refillPipe.io.req, refillShouldBeBlocked) 1084 1085 val mpStatus_dup = mainPipe.io.status_dup 1086 val mq_refill_dup = missQueue.io.refill_pipe_req_dup 1087 val refillShouldBeBlocked_dup = VecInit((0 until nDupStatus).map { case i => 1088 mpStatus_dup(i).s1.valid && mpStatus_dup(i).s1.bits.set === mq_refill_dup(i).bits.idx || 1089 Cat(Seq(mpStatus_dup(i).s2, mpStatus_dup(i).s3).map(s => 1090 s.valid && 1091 s.bits.set === mq_refill_dup(i).bits.idx && 1092 s.bits.way_en === mq_refill_dup(i).bits.way_en 1093 )).orR 1094 }) 1095 dontTouch(refillShouldBeBlocked_dup) 1096 1097 refillPipe.io.req_dup_for_data_w.zipWithIndex.foreach { case (r, i) => 1098 r.bits := (mq_refill_dup.drop(dataWritePort).take(DCacheBanks))(i).bits 1099 } 1100 refillPipe.io.req_dup_for_meta_w.bits := mq_refill_dup(metaWritePort).bits 1101 refillPipe.io.req_dup_for_tag_w.bits := mq_refill_dup(tagWritePort).bits 1102 refillPipe.io.req_dup_for_err_w.bits := mq_refill_dup(errWritePort).bits 1103 refillPipe.io.req_dup_for_data_w.zipWithIndex.foreach { case (r, i) => 1104 r.valid := (mq_refill_dup.drop(dataWritePort).take(DCacheBanks))(i).valid && 1105 !(refillShouldBeBlocked_dup.drop(dataWritePort).take(DCacheBanks))(i) 1106 } 1107 refillPipe.io.req_dup_for_meta_w.valid := mq_refill_dup(metaWritePort).valid && !refillShouldBeBlocked_dup(metaWritePort) 1108 refillPipe.io.req_dup_for_tag_w.valid := mq_refill_dup(tagWritePort).valid && !refillShouldBeBlocked_dup(tagWritePort) 1109 refillPipe.io.req_dup_for_err_w.valid := mq_refill_dup(errWritePort).valid && !refillShouldBeBlocked_dup(errWritePort) 1110 1111 val refillPipe_io_req_valid_dup = VecInit(mq_refill_dup.zip(refillShouldBeBlocked_dup).map( 1112 x => x._1.valid && !x._2 1113 )) 1114 val refillPipe_io_data_write_valid_dup = VecInit(refillPipe_io_req_valid_dup.slice(0, nDupDataWriteReady)) 1115 val refillPipe_io_tag_write_valid_dup = VecInit(refillPipe_io_req_valid_dup.slice(nDupDataWriteReady, nDupStatus)) 1116 dontTouch(refillPipe_io_req_valid_dup) 1117 dontTouch(refillPipe_io_data_write_valid_dup) 1118 dontTouch(refillPipe_io_tag_write_valid_dup) 1119 mainPipe.io.data_write_ready_dup := VecInit(refillPipe_io_data_write_valid_dup.map(v => !v)) 1120 mainPipe.io.tag_write_ready_dup := VecInit(refillPipe_io_tag_write_valid_dup.map(v => !v)) 1121 mainPipe.io.wb_ready_dup := wb.io.req_ready_dup 1122 1123 mq_refill_dup.zip(refillShouldBeBlocked_dup).foreach { case (r, block) => 1124 r.ready := refillPipe.io.req.ready && !block 1125 } 1126 1127 missQueue.io.refill_pipe_resp := refillPipe.io.resp 1128 io.lsu.store.refill_hit_resp := RegNext(refillPipe.io.store_resp) 1129 1130 //---------------------------------------- 1131 // wb 1132 // add a queue between MainPipe and WritebackUnit to reduce MainPipe stalls due to WritebackUnit busy 1133 1134 wb.io.req <> mainPipe.io.wb 1135 bus.c <> wb.io.mem_release 1136 wb.io.release_wakeup := refillPipe.io.release_wakeup 1137 wb.io.release_update := mainPipe.io.release_update 1138 wb.io.probe_ttob_check_req <> mainPipe.io.probe_ttob_check_req 1139 wb.io.probe_ttob_check_resp <> mainPipe.io.probe_ttob_check_resp 1140 1141 io.lsu.release.valid := RegNext(wb.io.req.fire()) 1142 io.lsu.release.bits.paddr := RegNext(wb.io.req.bits.addr) 1143 // Note: RegNext() is required by: 1144 // * load queue released flag update logic 1145 // * load / load violation check logic 1146 // * and timing requirements 1147 // CHANGE IT WITH CARE 1148 1149 // connect bus d 1150 missQueue.io.mem_grant.valid := false.B 1151 missQueue.io.mem_grant.bits := DontCare 1152 1153 wb.io.mem_grant.valid := false.B 1154 wb.io.mem_grant.bits := DontCare 1155 1156 // in L1DCache, we ony expect Grant[Data] and ReleaseAck 1157 bus.d.ready := false.B 1158 when (bus.d.bits.opcode === TLMessages.Grant || bus.d.bits.opcode === TLMessages.GrantData) { 1159 missQueue.io.mem_grant <> bus.d 1160 } .elsewhen (bus.d.bits.opcode === TLMessages.ReleaseAck) { 1161 wb.io.mem_grant <> bus.d 1162 } .otherwise { 1163 assert (!bus.d.fire()) 1164 } 1165 1166 //---------------------------------------- 1167 // replacement algorithm 1168 val replacer = ReplacementPolicy.fromString(cacheParams.replacer, nWays, nSets) 1169 val replWayReqs = ldu.map(_.io.replace_way) ++ Seq(mainPipe.io.replace_way) 1170 1171 val victimList = VictimList(nSets) 1172 if (dwpuParam.enCfPred) { 1173 when(missQueue.io.replace_pipe_req.valid) { 1174 victimList.replace(get_idx(missQueue.io.replace_pipe_req.bits.vaddr)) 1175 } 1176 replWayReqs.foreach { 1177 case req => 1178 req.way := DontCare 1179 when(req.set.valid) { 1180 when(victimList.whether_sa(req.set.bits)) { 1181 req.way := replacer.way(req.set.bits) 1182 }.otherwise { 1183 req.way := req.dmWay 1184 } 1185 } 1186 } 1187 } else { 1188 replWayReqs.foreach { 1189 case req => 1190 req.way := DontCare 1191 when(req.set.valid) { 1192 req.way := replacer.way(req.set.bits) 1193 } 1194 } 1195 } 1196 1197 val replAccessReqs = ldu.map(_.io.replace_access) ++ Seq( 1198 mainPipe.io.replace_access 1199 ) 1200 val touchWays = Seq.fill(replAccessReqs.size)(Wire(ValidIO(UInt(log2Up(nWays).W)))) 1201 touchWays.zip(replAccessReqs).foreach { 1202 case (w, req) => 1203 w.valid := req.valid 1204 w.bits := req.bits.way 1205 } 1206 val touchSets = replAccessReqs.map(_.bits.set) 1207 replacer.access(touchSets, touchWays) 1208 1209 //---------------------------------------- 1210 // assertions 1211 // dcache should only deal with DRAM addresses 1212 when (bus.a.fire()) { 1213 assert(bus.a.bits.address >= 0x80000000L.U) 1214 } 1215 when (bus.b.fire()) { 1216 assert(bus.b.bits.address >= 0x80000000L.U) 1217 } 1218 when (bus.c.fire()) { 1219 assert(bus.c.bits.address >= 0x80000000L.U) 1220 } 1221 1222 //---------------------------------------- 1223 // utility functions 1224 def block_decoupled[T <: Data](source: DecoupledIO[T], sink: DecoupledIO[T], block_signal: Bool) = { 1225 sink.valid := source.valid && !block_signal 1226 source.ready := sink.ready && !block_signal 1227 sink.bits := source.bits 1228 } 1229 1230 //---------------------------------------- 1231 // Customized csr cache op support 1232 val cacheOpDecoder = Module(new CSRCacheOpDecoder("dcache", CacheInstrucion.COP_ID_DCACHE)) 1233 cacheOpDecoder.io.csr <> io.csr 1234 bankedDataArray.io.cacheOp.req := cacheOpDecoder.io.cache.req 1235 // dup cacheOp_req_valid 1236 bankedDataArray.io.cacheOp_req_dup.zipWithIndex.map{ case(dup, i) => dup := cacheOpDecoder.io.cache_req_dup(i) } 1237 // dup cacheOp_req_bits_opCode 1238 bankedDataArray.io.cacheOp_req_bits_opCode_dup.zipWithIndex.map{ case (dup, i) => dup := cacheOpDecoder.io.cacheOp_req_bits_opCode_dup(i) } 1239 1240 tagArray.io.cacheOp.req := cacheOpDecoder.io.cache.req 1241 // dup cacheOp_req_valid 1242 tagArray.io.cacheOp_req_dup.zipWithIndex.map{ case(dup, i) => dup := cacheOpDecoder.io.cache_req_dup(i) } 1243 // dup cacheOp_req_bits_opCode 1244 tagArray.io.cacheOp_req_bits_opCode_dup.zipWithIndex.map{ case (dup, i) => dup := cacheOpDecoder.io.cacheOp_req_bits_opCode_dup(i) } 1245 1246 cacheOpDecoder.io.cache.resp.valid := bankedDataArray.io.cacheOp.resp.valid || 1247 tagArray.io.cacheOp.resp.valid 1248 cacheOpDecoder.io.cache.resp.bits := Mux1H(List( 1249 bankedDataArray.io.cacheOp.resp.valid -> bankedDataArray.io.cacheOp.resp.bits, 1250 tagArray.io.cacheOp.resp.valid -> tagArray.io.cacheOp.resp.bits, 1251 )) 1252 cacheOpDecoder.io.error := io.error 1253 assert(!((bankedDataArray.io.cacheOp.resp.valid +& tagArray.io.cacheOp.resp.valid) > 1.U)) 1254 1255 //---------------------------------------- 1256 // performance counters 1257 val num_loads = PopCount(ldu.map(e => e.io.lsu.req.fire())) 1258 XSPerfAccumulate("num_loads", num_loads) 1259 1260 io.mshrFull := missQueue.io.full 1261 1262 // performance counter 1263 val ld_access = Wire(Vec(LoadPipelineWidth, missQueue.io.debug_early_replace.last.cloneType)) 1264 val st_access = Wire(ld_access.last.cloneType) 1265 ld_access.zip(ldu).foreach { 1266 case (a, u) => 1267 a.valid := RegNext(u.io.lsu.req.fire()) && !u.io.lsu.s1_kill 1268 a.bits.idx := RegNext(get_idx(u.io.lsu.req.bits.vaddr)) 1269 a.bits.tag := get_tag(u.io.lsu.s1_paddr_dup_dcache) 1270 } 1271 st_access.valid := RegNext(mainPipe.io.store_req.fire()) 1272 st_access.bits.idx := RegNext(get_idx(mainPipe.io.store_req.bits.vaddr)) 1273 st_access.bits.tag := RegNext(get_tag(mainPipe.io.store_req.bits.addr)) 1274 val access_info = ld_access.toSeq ++ Seq(st_access) 1275 val early_replace = RegNext(missQueue.io.debug_early_replace) 1276 val access_early_replace = access_info.map { 1277 case acc => 1278 Cat(early_replace.map { 1279 case r => 1280 acc.valid && r.valid && 1281 acc.bits.tag === r.bits.tag && 1282 acc.bits.idx === r.bits.idx 1283 }) 1284 } 1285 XSPerfAccumulate("access_early_replace", PopCount(Cat(access_early_replace))) 1286 1287 val perfEvents = (Seq(wb, mainPipe, missQueue, probeQueue) ++ ldu).flatMap(_.getPerfEvents) 1288 generatePerfEvent() 1289} 1290 1291class AMOHelper() extends ExtModule { 1292 val clock = IO(Input(Clock())) 1293 val enable = IO(Input(Bool())) 1294 val cmd = IO(Input(UInt(5.W))) 1295 val addr = IO(Input(UInt(64.W))) 1296 val wdata = IO(Input(UInt(64.W))) 1297 val mask = IO(Input(UInt(8.W))) 1298 val rdata = IO(Output(UInt(64.W))) 1299} 1300 1301class DCacheWrapper()(implicit p: Parameters) extends LazyModule with HasXSParameter { 1302 1303 val useDcache = coreParams.dcacheParametersOpt.nonEmpty 1304 val clientNode = if (useDcache) TLIdentityNode() else null 1305 val dcache = if (useDcache) LazyModule(new DCache()) else null 1306 if (useDcache) { 1307 clientNode := dcache.clientNode 1308 } 1309 1310 lazy val module = new LazyModuleImp(this) with HasPerfEvents { 1311 val io = IO(new DCacheIO) 1312 val perfEvents = if (!useDcache) { 1313 // a fake dcache which uses dpi-c to access memory, only for debug usage! 1314 val fake_dcache = Module(new FakeDCache()) 1315 io <> fake_dcache.io 1316 Seq() 1317 } 1318 else { 1319 io <> dcache.module.io 1320 dcache.module.getPerfEvents 1321 } 1322 generatePerfEvent() 1323 } 1324} 1325