1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.cache 18 19import chisel3._ 20import chisel3.util._ 21import freechips.rocketchip.tilelink.TLPermissions._ 22import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut} 23import org.chipsalliance.cde.config.Parameters 24import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate} 25 26class WritebackReqCtrl(implicit p: Parameters) extends DCacheBundle { 27 val param = UInt(cWidth.W) 28 val voluntary = Bool() 29 val hasData = Bool() 30 val dirty = Bool() 31 32 val delay_release = Bool() 33 val miss_id = UInt(log2Up(cfg.nMissEntries).W) 34} 35 36class WritebackReqWodata(implicit p: Parameters) extends WritebackReqCtrl { 37 val addr = UInt(PAddrBits.W) 38 39 def dump() = { 40 XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b\n", 41 addr, param, voluntary, hasData) 42 } 43} 44 45class WritebackReqData(implicit p: Parameters) extends DCacheBundle { 46 val data = UInt((cfg.blockBytes * 8).W) 47} 48 49class WritebackReq(implicit p: Parameters) extends WritebackReqWodata { 50 val data = UInt((cfg.blockBytes * 8).W) 51 52 override def dump() = { 53 XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n", 54 addr, param, voluntary, hasData, data) 55 } 56 57 def toWritebackReqWodata(): WritebackReqWodata = { 58 val out = Wire(new WritebackReqWodata) 59 out.addr := addr 60 out.param := param 61 out.voluntary := voluntary 62 out.hasData := hasData 63 out.dirty := dirty 64 out.delay_release := delay_release 65 out.miss_id := miss_id 66 out 67 } 68 69 def toWritebackReqCtrl(): WritebackReqCtrl = { 70 val out = Wire(new WritebackReqCtrl) 71 out.param := param 72 out.voluntary := voluntary 73 out.hasData := hasData 74 out.dirty := dirty 75 out.delay_release := delay_release 76 out.miss_id := miss_id 77 out 78 } 79 80 def toWritebackReqData(): WritebackReqData = { 81 val out = Wire(new WritebackReqData) 82 out.data := data 83 out 84 } 85} 86 87// While a Release sleeps and waits for a refill to wake it up, 88// main pipe might update meta & data during this time. 89// So the meta & data to be released need to be updated too. 90class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle { 91 // only consider store here 92 val addr = UInt(PAddrBits.W) 93 val mask = UInt(DCacheBanks.W) 94 val data = UInt((cfg.blockBytes * 8).W) 95} 96 97// To reduce fanout, writeback queue entry data is updated 1 cycle 98// after ReleaseUpdate.fire 99class WBQEntryReleaseUpdate(implicit p: Parameters) extends DCacheBundle { 100 // only consider store here 101 val addr = UInt(PAddrBits.W) 102 val mask_delayed = UInt(DCacheBanks.W) 103 val data_delayed = UInt((cfg.blockBytes * 8).W) 104 val mask_orr = Bool() 105} 106 107// When a probe TtoB req enter dcache main pipe, check if that cacheline 108// is waiting for release. If it is so, change TtoB to TtoN, set dcache 109// coh to N. 110class ProbeToBCheckReq(implicit p: Parameters) extends DCacheBundle { 111 val addr = UInt(PAddrBits.W) // paddr from mainpipe s1 112} 113 114class ProbeToBCheckResp(implicit p: Parameters) extends DCacheBundle { 115 val toN = Bool() // need to set dcache coh to N 116} 117 118class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump 119{ 120 val io = IO(new Bundle { 121 val id = Input(UInt()) 122 123 val req = Flipped(DecoupledIO(new WritebackReqWodata)) 124 val req_data = Input(new WritebackReqData) 125 126 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 127 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 128 val primary_valid = Input(Bool()) 129 val primary_ready = Output(Bool()) 130 val primary_ready_dup = Vec(nDupWbReady, Output(Bool())) 131 132 val block_addr = Output(Valid(UInt())) 133 }) 134 135 val s_invalid :: s_release_req :: s_release_resp ::Nil = Enum(3) 136 // ProbeAck: s_invalid -> s_release_req 137 // ProbeAck merge Release: s_invalid -> s_release_req 138 // Release: s_invalid -> s_sleep -> s_release_req -> s_release_resp 139 // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req 140 // (change Release into ProbeAck when Release is not fired) 141 // or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req 142 // (send a ProbeAck after Release transaction is over) 143 144 val state = RegInit(s_invalid) 145 val state_dup_0 = RegInit(s_invalid) 146 val state_dup_1 = RegInit(s_invalid) 147 val state_dup_for_mp = RegInit(VecInit(Seq.fill(nDupWbReady)(s_invalid))) 148 149 val remain = RegInit(0.U(refillCycles.W)) 150 val remain_dup_0 = RegInit(0.U(refillCycles.W)) 151 val remain_dup_1 = RegInit(0.U(refillCycles.W)) 152 val remain_set = WireInit(0.U(refillCycles.W)) 153 val remain_clr = WireInit(0.U(refillCycles.W)) 154 remain := (remain | remain_set) & ~remain_clr 155 remain_dup_0 := (remain_dup_0 | remain_set) & ~remain_clr 156 remain_dup_1 := (remain_dup_1 | remain_set) & ~remain_clr 157 158 // writeback queue data 159 val data = Reg(UInt((cfg.blockBytes * 8).W)) 160 161 // writeback queue paddr 162 val paddr_dup_0 = Reg(UInt(PAddrBits.W)) 163 val paddr_dup_1 = Reg(UInt(PAddrBits.W)) 164 val paddr_dup_2 = Reg(UInt(PAddrBits.W)) 165 166 // pending data write 167 // !s_data_override means there is an in-progress data write 168 val s_data_override = RegInit(true.B) 169 // !s_data_merge means there is an in-progress data merge 170 //val s_data_merge = RegInit(true.B) 171 172 // there are valid request that can be sent to release bus 173 //val busy = remain.orR && s_data_override && s_data_merge // have remain beats and data write finished 174 val busy = remain.orR && s_data_override // have remain beats and data write finished 175 val req = Reg(new WritebackReqWodata) 176 177 // assign default signals to output signals 178 io.req.ready := false.B 179 io.mem_release.valid := false.B 180 io.mem_release.bits := DontCare 181 io.mem_grant.ready := false.B 182 io.block_addr.valid := state =/= s_invalid 183 io.block_addr.bits := req.addr 184 185 s_data_override := true.B // data_override takes only 1 cycle 186 //s_data_merge := true.B // data_merge takes only 1 cycle 187 188 when (state =/= s_invalid) { 189 XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits) 190 } 191 192 193 // -------------------------------------------------------------------------------- 194 // s_invalid: receive requests 195 // new req entering 196 io.req.ready := state === s_invalid 197 val alloc = io.req.valid && io.primary_valid && io.primary_ready 198 when (alloc) { 199 assert (remain === 0.U) 200 req := io.req.bits 201 s_data_override := false.B 202 // only update paddr when allocate a new missqueue entry 203 paddr_dup_0 := io.req.bits.addr 204 paddr_dup_1 := io.req.bits.addr 205 paddr_dup_2 := io.req.bits.addr 206 207 remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W)) 208 state := s_release_req 209 state_dup_0 := s_release_req 210 state_dup_1 := s_release_req 211 state_dup_for_mp.foreach(_ := s_release_req) 212 } 213 214 // -------------------------------------------------------------------------------- 215 // while there beats remaining to be sent, we keep sending 216 // which beat to send in this cycle? 217 val beat = PriorityEncoder(remain_dup_0) 218 219 val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W))) 220 for (i <- 0 until refillCycles) { 221 beat_data(i) := data((i + 1) * beatBits - 1, i * beatBits) 222 } 223 224 val probeResponse = edge.ProbeAck( 225 fromSource = io.id, 226 toAddress = paddr_dup_1, 227 lgSize = log2Ceil(cfg.blockBytes).U, 228 reportPermissions = req.param 229 ) 230 231 val probeResponseData = edge.ProbeAck( 232 fromSource = io.id, 233 toAddress = paddr_dup_1, 234 lgSize = log2Ceil(cfg.blockBytes).U, 235 reportPermissions = req.param, 236 data = beat_data(beat) 237 ) 238 239 val voluntaryRelease = edge.Release( 240 fromSource = io.id, 241 toAddress = paddr_dup_2, 242 lgSize = log2Ceil(cfg.blockBytes).U, 243 shrinkPermissions = req.param 244 )._2 245 246 val voluntaryReleaseData = edge.Release( 247 fromSource = io.id, 248 toAddress = paddr_dup_2, 249 lgSize = log2Ceil(cfg.blockBytes).U, 250 shrinkPermissions = req.param, 251 data = beat_data(beat) 252 )._2 253 254 // voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty) 255 when(busy) { 256 assert(!req.dirty || req.hasData) 257 } 258 259 io.mem_release.valid := busy 260 io.mem_release.bits := Mux(req.voluntary, 261 Mux(req.hasData, voluntaryReleaseData, voluntaryRelease), 262 Mux(req.hasData, probeResponseData, probeResponse)) 263 264 265 when (io.mem_release.fire) {remain_clr := PriorityEncoderOH(remain_dup_1)} 266 267 val (_, _, release_done, _) = edge.count(io.mem_release) 268 269 when(state === s_release_req && release_done){ 270 state := Mux(req.voluntary, s_release_resp, s_invalid) 271 when(req.voluntary){ 272 state_dup_for_mp.foreach(_ := s_release_resp) 273 } .otherwise{ 274 state_dup_for_mp.foreach(_ := s_invalid) 275 } 276 } 277 278 io.primary_ready := state === s_invalid 279 io.primary_ready_dup.zip(state_dup_for_mp).foreach { case (rdy, st) => rdy := st === s_invalid } 280 // -------------------------------------------------------------------------------- 281 // receive ReleaseAck for Releases 282 when (state === s_release_resp) { 283 io.mem_grant.ready := true.B 284 when (io.mem_grant.fire) { 285 state := s_invalid 286 state_dup_for_mp.foreach(_ := s_invalid) 287 } 288 } 289 290 // data update logic 291 when(!s_data_override && (req.hasData || RegNext(alloc))) { 292 data := io.req_data.data 293 } 294 295 // assert(!RegNext(!s_data_merge && !s_data_override)) 296 297 // performance counters 298 XSPerfAccumulate("wb_req", io.req.fire) 299 XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary) 300 XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary) 301 XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready) 302 XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp) 303} 304 305class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents 306{ 307 val io = IO(new Bundle { 308 val req = Flipped(DecoupledIO(new WritebackReq)) 309 val req_ready_dup = Vec(nDupWbReady, Output(Bool())) 310 val mem_release = DecoupledIO(new TLBundleC(edge.bundle)) 311 val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle))) 312 313 //val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq)) 314 //val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp) 315 316 val miss_req = Flipped(Valid(UInt())) 317 val block_miss_req = Output(Bool()) 318 }) 319 320 require(cfg.nReleaseEntries > cfg.nMissEntries) 321 322 val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool())) 323 val alloc = Cat(primary_ready_vec).orR 324 325 val req = io.req 326 val block_conflict = Wire(Bool()) 327 328 req.ready := alloc && !block_conflict 329 330 // assign default values to output signals 331 io.mem_release.valid := false.B 332 io.mem_release.bits := DontCare 333 io.mem_grant.ready := false.B 334 335 // delay data write in writeback req for 1 cycle 336 val req_data = RegEnable(io.req.bits.toWritebackReqData(), io.req.valid) 337 338 require(isPow2(cfg.nMissEntries)) 339 val grant_source = io.mem_grant.bits.source 340 val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge))) 341 entries.zipWithIndex.foreach { 342 case (entry, i) => 343 val former_primary_ready = if(i == 0) 344 false.B 345 else 346 Cat((0 until i).map(j => entries(j).io.primary_ready)).orR 347 val entry_id = (i + releaseIdBase).U 348 349 entry.io.id := entry_id 350 351 // entry req 352 entry.io.req.valid := req.valid && !block_conflict 353 primary_ready_vec(i) := entry.io.primary_ready 354 entry.io.req.bits := req.bits 355 entry.io.req_data := req_data 356 357 entry.io.primary_valid := alloc && 358 !former_primary_ready && 359 entry.io.primary_ready 360 361 entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid 362 entry.io.mem_grant.bits := io.mem_grant.bits 363 //when (i.U === io.mem_grant.bits.source) { 364 // io.mem_grant.ready := entry.io.mem_grant.ready 365 //} 366 } 367 368 io.req_ready_dup.zipWithIndex.foreach { case (rdy, i) => 369 rdy := Cat(entries.map(_.io.primary_ready_dup(i))).orR && !block_conflict 370 } 371 372 io.mem_grant.ready := true.B 373 block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.req.bits.addr)).asUInt.orR 374 val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR 375 io.block_miss_req := io.miss_req.valid && miss_req_conflict 376 377 TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*) 378 379 // sanity check 380 // print all input/output requests for debug purpose 381 // print req 382 when(io.req.fire) { 383 io.req.bits.dump() 384 } 385 386 when(io.mem_release.fire){ 387 io.mem_grant.bits.dump 388 } 389 390 when (io.miss_req.valid) { 391 XSDebug("miss_req: addr: %x\n", io.miss_req.bits) 392 } 393 394 when (io.block_miss_req) { 395 XSDebug("block_miss_req\n") 396 } 397 398 // performance counters 399 XSPerfAccumulate("wb_req", io.req.fire) 400 401 val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid))) 402 val perfEvents = Seq( 403 ("dcache_wbq_req ", io.req.fire), 404 ("dcache_wbq_1_4_valid", (perfValidCount < (cfg.nReleaseEntries.U/4.U))), 405 ("dcache_wbq_2_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/4.U)) & (perfValidCount <= (cfg.nReleaseEntries.U/2.U))), 406 ("dcache_wbq_3_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/2.U)) & (perfValidCount <= (cfg.nReleaseEntries.U*3.U/4.U))), 407 ("dcache_wbq_4_4_valid", (perfValidCount > (cfg.nReleaseEntries.U*3.U/4.U))), 408 ) 409 generatePerfEvent() 410 411}