1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.tilelink.TLPermissions._ 23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut} 24import huancun.DirtyKey 25import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate} 26 27class WritebackReq(implicit p: Parameters) extends DCacheBundle { 28 val addr = UInt(PAddrBits.W) 29 val addr_dup_0 = UInt(PAddrBits.W) 30 val addr_dup_1 = UInt(PAddrBits.W) 31 val param = UInt(cWidth.W) 32 val voluntary = Bool() 33 val hasData = Bool() 34 val dirty = Bool() 35 val data = UInt((cfg.blockBytes * 8).W) 36 37 val delay_release = Bool() 38 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 39 40 def dump() = { 41 XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n", 42 addr, param, voluntary, hasData, data) 43 } 44} 45 46// While a Release sleeps and waits for a refill to wake it up, 47// main pipe might update meta & data during this time. 48// So the meta & data to be released need to be updated too. 49class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle { 50 // only consider store here 51 val addr = UInt(PAddrBits.W) 52 val mask = UInt(DCacheBanks.W) 53 val data = UInt((cfg.blockBytes * 8).W) 54} 55 56class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 57{ 58 val io = IO(new Bundle { 59 val id = Input(UInt()) 60 // allocate this entry for new req 61 val primary_valid = Input(Bool()) 62 // this entry is free and can be allocated to new reqs 63 val primary_ready = Output(Bool()) 64 // this entry is busy, but it can merge the new req 65 val secondary_valid = Input(Bool()) 66 val secondary_ready = Output(Bool()) 67 val req = Flipped(DecoupledIO(new WritebackReq)) 68 69 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 70 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 71 72 val block_addr = Output(Valid(UInt())) 73 74 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 75 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 76 }) 77 78 val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4) 79 // ProbeAck: s_invalid -> s_release_req 80 // ProbeAck merge Release: s_invalid -> s_release_req 81 // Release: s_invalid -> s_sleep -> s_release_req -> s_release_resp 82 // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req 83 // (change Release into ProbeAck when Release is not fired) 84 // or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req 85 // (send a ProbeAck after Release transaction is over) 86 val state = RegInit(s_invalid) 87 val state_dup_0 = RegInit(s_invalid) 88 val state_dup_1 = RegInit(s_invalid) 89 90 // internal regs 91 // remaining beats 92 val remain = RegInit(0.U(refillCycles.W)) 93 val remain_dup_0 = RegInit(0.U(refillCycles.W)) 94 val remain_dup_1 = RegInit(0.U(refillCycles.W)) 95 val remain_set = WireInit(0.U(refillCycles.W)) 96 val remain_clr = WireInit(0.U(refillCycles.W)) 97 remain := (remain | remain_set) & ~remain_clr 98 remain_dup_0 := (remain_dup_0 | remain_set) & ~remain_clr 99 remain_dup_1 := (remain_dup_1 | remain_set) & ~remain_clr 100 101 val busy = remain.orR 102 103 val req = Reg(new WritebackReq) 104 105 // assign default signals to output signals 106 io.req.ready := false.B 107 io.mem_release.valid := false.B 108 io.mem_release.bits := DontCare 109 io.mem_grant.ready := false.B 110 io.block_addr.valid := state =/= s_invalid 111 io.block_addr.bits := req.addr 112 113 114 when (state =/= s_invalid) { 115 XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits) 116 } 117 118 def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = { 119 val full_wmask = FillInterleaved(64, wmask) 120 (~full_wmask & old_data | full_wmask & new_data) 121 } 122 123 // -------------------------------------------------------------------------------- 124 // s_invalid: receive requests 125 // new req entering 126 when (io.req.valid && io.primary_valid && io.primary_ready) { 127 assert (remain === 0.U) 128 req := io.req.bits 129 when (io.req.bits.delay_release) { 130 state := s_sleep 131 state_dup_0 := s_sleep 132 state_dup_1 := s_sleep 133 }.otherwise { 134 state := s_release_req 135 state_dup_0 := s_release_req 136 state_dup_1 := s_release_req 137 remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 138 } 139 } 140 141 // -------------------------------------------------------------------------------- 142 // s_sleep: wait for refill pipe to inform me that I can keep releasing 143 val merge = io.secondary_valid && io.secondary_ready 144 when (state === s_sleep) { 145 assert(remain === 0.U) 146 // There shouldn't be a new Release with the same addr in sleep state 147 assert(!(merge && io.req.bits.voluntary)) 148 149 val update = io.release_update.valid && io.release_update.bits.addr === req.addr 150 when (update) { 151 req.hasData := req.hasData || io.release_update.bits.mask.orR 152 req.dirty := req.dirty || io.release_update.bits.mask.orR 153 req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask) 154 }.elsewhen (merge) { 155 state := s_release_req 156 state_dup_0 := s_release_req 157 state_dup_1 := s_release_req 158 req.voluntary := false.B 159 req.param := req.param 160 req.hasData := req.hasData || io.req.bits.hasData 161 req.dirty := req.dirty || io.req.bits.dirty 162 req.data := Mux( 163 io.req.bits.hasData, 164 io.req.bits.data, 165 req.data 166 ) 167 req.delay_release := false.B 168 remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 169 } 170 171 when (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) { 172 state := s_release_req 173 state_dup_0 := s_release_req 174 state_dup_1 := s_release_req 175 req.delay_release := false.B 176 remain_set := Mux( 177 req.hasData || update && io.release_update.bits.mask.orR || merge && io.req.bits.hasData, 178 ~0.U(refillCycles.W), 179 1.U(refillCycles.W) 180 ) 181 } 182 } 183 184 // -------------------------------------------------------------------------------- 185 // while there beats remaining to be sent, we keep sending 186 // which beat to send in this cycle? 187 val beat = PriorityEncoder(remain_dup_0) 188 189 val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W))) 190 for (i <- 0 until refillCycles) { 191 beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits) 192 } 193 194 val probeResponse = edge.ProbeAck( 195 fromSource = io.id, 196 toAddress = req.addr_dup_0, 197 lgSize = log2Ceil(cfg.blockBytes).U, 198 reportPermissions = req.param 199 ) 200 201 val probeResponseData = edge.ProbeAck( 202 fromSource = io.id, 203 toAddress = req.addr_dup_0, 204 lgSize = log2Ceil(cfg.blockBytes).U, 205 reportPermissions = req.param, 206 data = beat_data(beat) 207 ) 208 209 val voluntaryRelease = edge.Release( 210 fromSource = io.id, 211 toAddress = req.addr_dup_1, 212 lgSize = log2Ceil(cfg.blockBytes).U, 213 shrinkPermissions = req.param 214 )._2 215 216 val voluntaryReleaseData = edge.Release( 217 fromSource = io.id, 218 toAddress = req.addr_dup_1, 219 lgSize = log2Ceil(cfg.blockBytes).U, 220 shrinkPermissions = req.param, 221 data = beat_data(beat) 222 )._2 223 224 voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty) 225 when(busy) { 226 assert(!req.dirty || req.hasData) 227 } 228 229 io.mem_release.valid := busy 230 io.mem_release.bits := Mux(req.voluntary, 231 Mux(req.hasData, voluntaryReleaseData, voluntaryRelease), 232 Mux(req.hasData, probeResponseData, probeResponse)) 233 234 when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain_dup_1) } 235 236 val (_, _, release_done, _) = edge.count(io.mem_release) 237 238// when (state === s_release_req && release_done) { 239// state := Mux(req.voluntary, s_release_resp, s_invalid) 240// } 241 242 // Because now wbq merges a same-addr req unconditionally, when the req to be merged comes too late, 243 // the previous req might not be able to merge. Thus we have to handle the new req later after the 244 // previous one finishes. 245 // TODO: initiate these 246 val release_later = RegInit(false.B) 247 val c_already_sent = RegInit(false.B) 248 def tmp_req() = new Bundle { 249 val param = UInt(cWidth.W) 250 val voluntary = Bool() 251 val hasData = Bool() 252 val dirty = Bool() 253 val delay_release = Bool() 254 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 255 256 def toWritebackReq = { 257 val r = Wire(new WritebackReq()) 258 r.data := req.data 259 r.addr := req.addr 260 r.addr_dup_0 := req.addr_dup_0 261 r.addr_dup_1 := req.addr_dup_1 262 r.param := param 263 r.voluntary := voluntary 264 r.hasData := hasData 265 r.dirty := dirty 266 r.delay_release := delay_release 267 r.miss_id := miss_id 268 r 269 } 270 } 271 val req_later = Reg(tmp_req()) 272 273 when (state_dup_0 === s_release_req) { 274 when (io.mem_release.fire()) { 275 c_already_sent := !release_done 276 } 277 278 when (req.voluntary) { 279 // The previous req is Release 280 when (release_done) { 281 state := s_release_resp 282 state_dup_0 := s_release_resp 283 state_dup_1 := s_release_resp 284 } 285 // merge a ProbeAck 286 when (merge) { 287 when (io.mem_release.fire() || c_already_sent) { 288 // too late to merge, handle the ProbeAck later 289 release_later := true.B 290 req_later.param := io.req.bits.param 291 req_later.voluntary := io.req.bits.voluntary 292 req_later.hasData := io.req.bits.hasData 293 req_later.dirty := io.req.bits.dirty 294 req_later.delay_release := io.req.bits.delay_release 295 req_later.miss_id := io.req.bits.miss_id 296 }.otherwise { 297 // Release hasn't been sent out yet, change Release to ProbeAck 298 req.voluntary := false.B 299 req.hasData := req.hasData || io.req.bits.hasData 300 req.dirty := req.dirty || io.req.bits.dirty 301 req.data := Mux( 302 io.req.bits.hasData, 303 io.req.bits.data, 304 req.data 305 ) 306 req.delay_release := false.B 307 remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 308 } 309 } 310 }.otherwise { 311 // The previous req is ProbeAck 312 when (merge) { 313 release_later := true.B 314 req_later.param := io.req.bits.param 315 req_later.voluntary := io.req.bits.voluntary 316 req_later.hasData := io.req.bits.hasData 317 req_later.dirty := io.req.bits.dirty 318 req_later.delay_release := io.req.bits.delay_release 319 req_later.miss_id := io.req.bits.miss_id 320 } 321 322 when (release_done) { 323 when (merge) { 324 // Send the Release after ProbeAck 325// state := s_release_req 326// req := Mux(merge, io.req.bits, req_later.toWritebackReq) 327// release_later := false.B 328 state := s_sleep 329 state_dup_0 := s_sleep 330 state_dup_1 := s_sleep 331 req := io.req.bits 332 release_later := false.B 333 }.elsewhen (release_later) { 334 state := Mux( 335 io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release, 336 s_release_req, 337 s_sleep 338 ) 339 state_dup_0 := Mux( 340 io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release, 341 s_release_req, 342 s_sleep 343 ) 344 state_dup_1 := Mux( 345 io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release, 346 s_release_req, 347 s_sleep 348 ) 349 req := req_later.toWritebackReq 350 when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) { 351 req.delay_release := false.B 352 } 353 release_later := false.B 354 }.otherwise { 355 state := s_invalid 356 state_dup_0 := s_invalid 357 state_dup_1 := s_invalid 358 release_later := false.B 359 } 360 } 361 362 when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) { 363 req_later.delay_release := false.B 364 } 365 } 366 } 367 368 // -------------------------------------------------------------------------------- 369 // receive ReleaseAck for Releases 370 when (state_dup_0 === s_release_resp) { 371 io.mem_grant.ready := true.B 372 373 when (merge) { 374 release_later := true.B 375 req_later.param := io.req.bits.param 376 req_later.voluntary := io.req.bits.voluntary 377 req_later.hasData := io.req.bits.hasData 378 req_later.dirty := io.req.bits.dirty 379 req_later.delay_release := io.req.bits.delay_release 380 req_later.miss_id := io.req.bits.miss_id 381 } 382 when (io.mem_grant.fire()) { 383 when (merge) { 384 state := s_release_req 385 state_dup_0 := s_release_req 386 state_dup_1 := s_release_req 387 req := io.req.bits 388 remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 389 release_later := false.B 390 }.elsewhen(release_later) { 391 state := s_release_req 392 state_dup_0 := s_release_req 393 state_dup_1 := s_release_req 394 req := req_later.toWritebackReq 395 remain_set := Mux(req_later.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 396 release_later := false.B 397 }.otherwise { 398 state := s_invalid 399 state_dup_0 := s_invalid 400 state_dup_1 := s_invalid 401 release_later := false.B 402 } 403 } 404 } 405 406 // When does this entry merge a new req? 407 // 1. When this entry is free 408 // 2. When this entry wants to release while still waiting for release_wakeup signal, 409 // and a probe req with the same addr comes. In this case we merge probe with release, 410 // handle this probe, so we don't need another release. 411 io.primary_ready := state_dup_1 === s_invalid 412 io.secondary_ready := state_dup_1 =/= s_invalid && io.req.bits.addr === req.addr 413 414 // performance counters 415 XSPerfAccumulate("wb_req", io.req.fire()) 416 XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary) 417 XSPerfAccumulate("wb_probe_resp", state_dup_0 === s_release_req && release_done && !req.voluntary) 418 XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready) 419 XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state_dup_1 === s_release_resp) 420} 421 422class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents { 423 val io = IO(new Bundle { 424 val req = Flipped(DecoupledIO(new WritebackReq)) 425 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 426 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 427 428 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 429 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 430 431 val miss_req = Flipped(Valid(UInt())) 432 val block_miss_req = Output(Bool()) 433 }) 434 435 require(cfg.nReleaseEntries > cfg.nMissEntries) 436 437 val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 438 val secondary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 439 val accept = Cat(primary_ready_vec).orR 440 val merge = Cat(secondary_ready_vec).orR 441 val alloc = accept && !merge 442 // When there are empty entries, merge or allocate a new entry. 443 // When there is no empty entry, reject it even if it can be merged. 444 io.req.ready := accept 445 446 // assign default values to output signals 447 io.mem_release.valid := false.B 448 io.mem_release.bits := DontCare 449 io.mem_grant.ready := false.B 450 451 require(isPow2(cfg.nMissEntries)) 452 val grant_source = io.mem_grant.bits.source 453 val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge))) 454 entries.zipWithIndex.foreach { 455 case (entry, i) => 456 val former_primary_ready = if(i == 0) 457 false.B 458 else 459 Cat((0 until i).map(j => entries(j).io.primary_ready)).orR 460 val entry_id = (i + releaseIdBase).U 461 462 entry.io.id := entry_id 463 464 // entry req 465 entry.io.req.valid := io.req.valid 466 primary_ready_vec(i) := entry.io.primary_ready 467 secondary_ready_vec(i) := entry.io.secondary_ready 468 entry.io.req.bits := io.req.bits 469 470 entry.io.primary_valid := alloc && 471 !former_primary_ready && 472 entry.io.primary_ready 473 entry.io.secondary_valid := io.req.valid && accept 474 475 entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid 476 entry.io.mem_grant.bits := io.mem_grant.bits 477// when (entry_id === grant_source) { 478// io.mem_grant.ready := entry.io.mem_grant.ready 479// } 480 481 entry.io.release_wakeup := io.release_wakeup 482 entry.io.release_update := io.release_update 483 } 484 assert(RegNext(!(io.mem_grant.valid && !io.mem_grant.ready))) 485 io.mem_grant.ready := true.B 486 487 val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR 488 io.block_miss_req := io.miss_req.valid && miss_req_conflict 489 490 TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*) 491 492 // sanity check 493 // print all input/output requests for debug purpose 494 // print req 495 when (io.req.fire()) { 496 io.req.bits.dump() 497 } 498 499 when (io.mem_release.fire()) { 500 io.mem_release.bits.dump 501 } 502 503 when (io.mem_grant.fire()) { 504 io.mem_grant.bits.dump 505 } 506 507 when (io.miss_req.valid) { 508 XSDebug("miss_req: addr: %x\n", io.miss_req.bits) 509 } 510 511 when (io.block_miss_req) { 512 XSDebug("block_miss_req\n") 513 } 514 515 // performance counters 516 XSPerfAccumulate("wb_req", io.req.fire()) 517 518 val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid))) 519 val perfEvents = Seq( 520 ("dcache_wbq_req ", io.req.fire()), 521 ("dcache_wbq_1_4_valid", (perfValidCount < (cfg.nReleaseEntries.U/4.U))), 522 ("dcache_wbq_2_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/4.U)) & (perfValidCount <= (cfg.nReleaseEntries.U/2.U))), 523 ("dcache_wbq_3_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/2.U)) & (perfValidCount <= (cfg.nReleaseEntries.U*3.U/4.U))), 524 ("dcache_wbq_4_4_valid", (perfValidCount > (cfg.nReleaseEntries.U*3.U/4.U))), 525 ) 526 generatePerfEvent() 527} 528