1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chipsalliance.rocketchip.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.tilelink.TLPermissions._ 23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut} 24import huancun.DirtyKey 25import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate} 26 27class WritebackReq(implicit p: Parameters) extends DCacheBundle { 28 val addr = UInt(PAddrBits.W) 29 val param = UInt(cWidth.W) 30 val voluntary = Bool() 31 val hasData = Bool() 32 val dirty = Bool() 33 val data = UInt((cfg.blockBytes * 8).W) 34 35 val delay_release = Bool() 36 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 37 38 def dump() = { 39 XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n", 40 addr, param, voluntary, hasData, data) 41 } 42} 43 44// While a Release sleeps and waits for a refill to wake it up, 45// main pipe might update meta & data during this time. 46// So the meta & data to be released need to be updated too. 47class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle { 48 // only consider store here 49 val addr = UInt(PAddrBits.W) 50 val mask = UInt(DCacheBanks.W) 51 val data = UInt((cfg.blockBytes * 8).W) 52} 53 54class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 55{ 56 val io = IO(new Bundle { 57 val id = Input(UInt()) 58 // allocate this entry for new req 59 val primary_valid = Input(Bool()) 60 // this entry is free and can be allocated to new reqs 61 val primary_ready = Output(Bool()) 62 // this entry is busy, but it can merge the new req 63 val secondary_valid = Input(Bool()) 64 val secondary_ready = Output(Bool()) 65 val req = Flipped(DecoupledIO(new WritebackReq)) 66 67 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 68 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 69 70 val block_addr = Output(Valid(UInt())) 71 72 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 73 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 74 }) 75 76 val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4) 77 // ProbeAck: s_invalid -> s_release_req 78 // ProbeAck merge Release: s_invalid -> s_release_req 79 // Release: s_invalid -> s_sleep -> s_release_req -> s_release_resp 80 // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req 81 // (change Release into ProbeAck when Release is not fired) 82 // or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req 83 // (send a ProbeAck after Release transaction is over) 84 val state = RegInit(s_invalid) 85 86 // internal regs 87 // remaining beats 88 val remain = RegInit(0.U(refillCycles.W)) 89 val remain_set = WireInit(0.U(refillCycles.W)) 90 val remain_clr = WireInit(0.U(refillCycles.W)) 91 remain := (remain | remain_set) & ~remain_clr 92 93 val busy = remain.orR 94 95 val req = Reg(new WritebackReq) 96 97 // assign default signals to output signals 98 io.req.ready := false.B 99 io.mem_release.valid := false.B 100 io.mem_release.bits := DontCare 101 io.mem_grant.ready := false.B 102 io.block_addr.valid := state =/= s_invalid 103 io.block_addr.bits := req.addr 104 105 106 when (state =/= s_invalid) { 107 XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits) 108 } 109 110 def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = { 111 val full_wmask = FillInterleaved(64, wmask) 112 (~full_wmask & old_data | full_wmask & new_data) 113 } 114 115 // -------------------------------------------------------------------------------- 116 // s_invalid: receive requests 117 // new req entering 118 when (io.req.valid && io.primary_valid && io.primary_ready) { 119 assert (remain === 0.U) 120 req := io.req.bits 121 when (io.req.bits.delay_release) { 122 state := s_sleep 123 }.otherwise { 124 state := s_release_req 125 remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 126 } 127 } 128 129 // -------------------------------------------------------------------------------- 130 // s_sleep: wait for refill pipe to inform me that I can keep releasing 131 val merge = io.secondary_valid && io.secondary_ready 132 when (state === s_sleep) { 133 assert(remain === 0.U) 134 // There shouldn't be a new Release with the same addr in sleep state 135 assert(!(merge && io.req.bits.voluntary)) 136 137 val update = io.release_update.valid && io.release_update.bits.addr === req.addr 138 when (update) { 139 req.hasData := req.hasData || io.release_update.bits.mask.orR 140 req.dirty := req.dirty || io.release_update.bits.mask.orR 141 req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask) 142 }.elsewhen (merge) { 143 state := s_release_req 144 req.voluntary := false.B 145 req.param := req.param 146 req.hasData := req.hasData || io.req.bits.hasData 147 req.dirty := req.dirty || io.req.bits.dirty 148 req.data := Mux( 149 io.req.bits.hasData, 150 io.req.bits.data, 151 req.data 152 ) 153 req.delay_release := false.B 154 remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 155 } 156 157 when (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) { 158 state := s_release_req 159 req.delay_release := false.B 160 remain_set := Mux( 161 req.hasData || update && io.release_update.bits.mask.orR || merge && io.req.bits.hasData, 162 ~0.U(refillCycles.W), 163 1.U(refillCycles.W) 164 ) 165 } 166 } 167 168 // -------------------------------------------------------------------------------- 169 // while there beats remaining to be sent, we keep sending 170 // which beat to send in this cycle? 171 val beat = PriorityEncoder(remain) 172 173 val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W))) 174 for (i <- 0 until refillCycles) { 175 beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits) 176 } 177 178 val probeResponse = edge.ProbeAck( 179 fromSource = io.id, 180 toAddress = req.addr, 181 lgSize = log2Ceil(cfg.blockBytes).U, 182 reportPermissions = req.param 183 ) 184 185 val probeResponseData = edge.ProbeAck( 186 fromSource = io.id, 187 toAddress = req.addr, 188 lgSize = log2Ceil(cfg.blockBytes).U, 189 reportPermissions = req.param, 190 data = beat_data(beat) 191 ) 192 193 val voluntaryRelease = edge.Release( 194 fromSource = io.id, 195 toAddress = req.addr, 196 lgSize = log2Ceil(cfg.blockBytes).U, 197 shrinkPermissions = req.param 198 )._2 199 200 val voluntaryReleaseData = edge.Release( 201 fromSource = io.id, 202 toAddress = req.addr, 203 lgSize = log2Ceil(cfg.blockBytes).U, 204 shrinkPermissions = req.param, 205 data = beat_data(beat) 206 )._2 207 208 voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty) 209 when(busy) { 210 assert(!req.dirty || req.hasData) 211 } 212 213 io.mem_release.valid := busy 214 io.mem_release.bits := Mux(req.voluntary, 215 Mux(req.hasData, voluntaryReleaseData, voluntaryRelease), 216 Mux(req.hasData, probeResponseData, probeResponse)) 217 218 when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) } 219 220 val (_, _, release_done, _) = edge.count(io.mem_release) 221 222// when (state === s_release_req && release_done) { 223// state := Mux(req.voluntary, s_release_resp, s_invalid) 224// } 225 226 // Because now wbq merges a same-addr req unconditionally, when the req to be merged comes too late, 227 // the previous req might not be able to merge. Thus we have to handle the new req later after the 228 // previous one finishes. 229 // TODO: initiate these 230 val release_later = RegInit(false.B) 231 val c_already_sent = RegInit(false.B) 232 def tmp_req() = new Bundle { 233 val param = UInt(cWidth.W) 234 val voluntary = Bool() 235 val hasData = Bool() 236 val dirty = Bool() 237 val delay_release = Bool() 238 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 239 240 def toWritebackReq = { 241 val r = Wire(new WritebackReq()) 242 r.data := req.data 243 r.addr := req.addr 244 r.param := param 245 r.voluntary := voluntary 246 r.hasData := hasData 247 r.dirty := dirty 248 r.delay_release := delay_release 249 r.miss_id := miss_id 250 r 251 } 252 } 253 val req_later = Reg(tmp_req()) 254 255 when (state === s_release_req) { 256 when (io.mem_release.fire()) { 257 c_already_sent := !release_done 258 } 259 260 when (req.voluntary) { 261 // The previous req is Release 262 when (release_done) { 263 state := s_release_resp 264 } 265 // merge a ProbeAck 266 when (merge) { 267 when (io.mem_release.fire() || c_already_sent) { 268 // too late to merge, handle the ProbeAck later 269 release_later := true.B 270 req_later.param := io.req.bits.param 271 req_later.voluntary := io.req.bits.voluntary 272 req_later.hasData := io.req.bits.hasData 273 req_later.dirty := io.req.bits.dirty 274 req_later.delay_release := io.req.bits.delay_release 275 req_later.miss_id := io.req.bits.miss_id 276 }.otherwise { 277 // Release hasn't been sent out yet, change Release to ProbeAck 278 req.voluntary := false.B 279 req.hasData := req.hasData || io.req.bits.hasData 280 req.dirty := req.dirty || io.req.bits.dirty 281 req.data := Mux( 282 io.req.bits.hasData, 283 io.req.bits.data, 284 req.data 285 ) 286 req.delay_release := false.B 287 remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 288 } 289 } 290 }.otherwise { 291 // The previous req is ProbeAck 292 when (merge) { 293 release_later := true.B 294 req_later.param := io.req.bits.param 295 req_later.voluntary := io.req.bits.voluntary 296 req_later.hasData := io.req.bits.hasData 297 req_later.dirty := io.req.bits.dirty 298 req_later.delay_release := io.req.bits.delay_release 299 req_later.miss_id := io.req.bits.miss_id 300 } 301 302 when (release_done) { 303 when (merge) { 304 // Send the Release after ProbeAck 305// state := s_release_req 306// req := Mux(merge, io.req.bits, req_later.toWritebackReq) 307// release_later := false.B 308 state := s_sleep 309 req := io.req.bits 310 release_later := false.B 311 }.elsewhen (release_later) { 312 state := Mux( 313 io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release, 314 s_release_req, 315 s_sleep 316 ) 317 req := req_later.toWritebackReq 318 when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) { 319 req.delay_release := false.B 320 } 321 release_later := false.B 322 }.otherwise { 323 state := s_invalid 324 release_later := false.B 325 } 326 } 327 328 when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) { 329 req_later.delay_release := false.B 330 } 331 } 332 } 333 334 // -------------------------------------------------------------------------------- 335 // receive ReleaseAck for Releases 336 when (state === s_release_resp) { 337 io.mem_grant.ready := true.B 338 339 when (merge) { 340 release_later := true.B 341 req_later.param := io.req.bits.param 342 req_later.voluntary := io.req.bits.voluntary 343 req_later.hasData := io.req.bits.hasData 344 req_later.dirty := io.req.bits.dirty 345 req_later.delay_release := io.req.bits.delay_release 346 req_later.miss_id := io.req.bits.miss_id 347 } 348 when (io.mem_grant.fire()) { 349 when (merge) { 350 state := s_release_req 351 req := io.req.bits 352 remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 353 release_later := false.B 354 }.elsewhen(release_later) { 355 state := s_release_req 356 req := req_later.toWritebackReq 357 remain_set := Mux(req_later.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 358 release_later := false.B 359 }.otherwise { 360 state := s_invalid 361 release_later := false.B 362 } 363 } 364 } 365 366 // When does this entry merge a new req? 367 // 1. When this entry is free 368 // 2. When this entry wants to release while still waiting for release_wakeup signal, 369 // and a probe req with the same addr comes. In this case we merge probe with release, 370 // handle this probe, so we don't need another release. 371 io.primary_ready := state === s_invalid 372 io.secondary_ready := state =/= s_invalid && io.req.bits.addr === req.addr 373 374 // performance counters 375 XSPerfAccumulate("wb_req", io.req.fire()) 376 XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary) 377 XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary) 378 XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready) 379 XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp) 380} 381 382class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents { 383 val io = IO(new Bundle { 384 val req = Flipped(DecoupledIO(new WritebackReq)) 385 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 386 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 387 388 val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W))) 389 val release_update = Flipped(ValidIO(new ReleaseUpdate)) 390 391 val miss_req = Flipped(Valid(UInt())) 392 val block_miss_req = Output(Bool()) 393 }) 394 395 require(cfg.nReleaseEntries > cfg.nMissEntries) 396 397 val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 398 val secondary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 399 val accept = Cat(primary_ready_vec).orR 400 val merge = Cat(secondary_ready_vec).orR 401 val alloc = accept && !merge 402 // When there are empty entries, merge or allocate a new entry. 403 // When there is no empty entry, reject it even if it can be merged. 404 io.req.ready := accept 405 406 // assign default values to output signals 407 io.mem_release.valid := false.B 408 io.mem_release.bits := DontCare 409 io.mem_grant.ready := false.B 410 411 require(isPow2(cfg.nMissEntries)) 412 val grant_source = io.mem_grant.bits.source 413 val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge))) 414 entries.zipWithIndex.foreach { 415 case (entry, i) => 416 val former_primary_ready = if(i == 0) 417 false.B 418 else 419 Cat((0 until i).map(j => entries(j).io.primary_ready)).orR 420 val entry_id = (i + releaseIdBase).U 421 422 entry.io.id := entry_id 423 424 // entry req 425 entry.io.req.valid := io.req.valid 426 primary_ready_vec(i) := entry.io.primary_ready 427 secondary_ready_vec(i) := entry.io.secondary_ready 428 entry.io.req.bits := io.req.bits 429 430 entry.io.primary_valid := alloc && 431 !former_primary_ready && 432 entry.io.primary_ready 433 entry.io.secondary_valid := io.req.valid && accept 434 435 entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid 436 entry.io.mem_grant.bits := io.mem_grant.bits 437// when (entry_id === grant_source) { 438// io.mem_grant.ready := entry.io.mem_grant.ready 439// } 440 441 entry.io.release_wakeup := io.release_wakeup 442 entry.io.release_update := io.release_update 443 } 444 assert(RegNext(!(io.mem_grant.valid && !io.mem_grant.ready))) 445 io.mem_grant.ready := true.B 446 447 val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR 448 io.block_miss_req := io.miss_req.valid && miss_req_conflict 449 450 TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*) 451 452 // sanity check 453 // print all input/output requests for debug purpose 454 // print req 455 when (io.req.fire()) { 456 io.req.bits.dump() 457 } 458 459 when (io.mem_release.fire()) { 460 io.mem_release.bits.dump 461 } 462 463 when (io.mem_grant.fire()) { 464 io.mem_grant.bits.dump 465 } 466 467 when (io.miss_req.valid) { 468 XSDebug("miss_req: addr: %x\n", io.miss_req.bits) 469 } 470 471 when (io.block_miss_req) { 472 XSDebug("block_miss_req\n") 473 } 474 475 // performance counters 476 XSPerfAccumulate("wb_req", io.req.fire()) 477 478 val perfEvents = Seq( 479 ("dcache_wbq_req ", io.req.fire() ), 480 ("dcache_wbq_1_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U)) ), 481 ("dcache_wbq_2_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U)) ), 482 ("dcache_wbq_3_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ), 483 ("dcache_wbq_4_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U)) ), 484 ) 485 generatePerfEvent() 486} 487