xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision 57bb43b5f11c3f1e89ac52f232fe73056b35d9bd)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.TLPermissions._
23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut}
24import huancun.DirtyKey
25import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate}
26
27class WritebackReq(implicit p: Parameters) extends DCacheBundle {
28  val addr = UInt(PAddrBits.W)
29  val param  = UInt(cWidth.W)
30  val voluntary = Bool()
31  val hasData = Bool()
32  val dirty = Bool()
33  val data = UInt((cfg.blockBytes * 8).W)
34
35  val delay_release = Bool()
36  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
37
38  def dump() = {
39    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
40      addr, param, voluntary, hasData, data)
41  }
42}
43
44// While a Release sleeps and waits for a refill to wake it up,
45// main pipe might update meta & data during this time.
46// So the meta & data to be released need to be updated too.
47class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
48  // only consider store here
49  val addr = UInt(PAddrBits.W)
50  val mask = UInt(DCacheBanks.W)
51  val data = UInt((cfg.blockBytes * 8).W)
52}
53
54class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
55{
56  val io = IO(new Bundle {
57    val id = Input(UInt())
58    // allocate this entry for new req
59    val primary_valid = Input(Bool())
60    // this entry is free and can be allocated to new reqs
61    val primary_ready = Output(Bool())
62    // this entry is busy, but it can merge the new req
63    val secondary_valid = Input(Bool())
64    val secondary_ready = Output(Bool())
65    val req = Flipped(DecoupledIO(new WritebackReq))
66
67    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
68    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
69
70    val block_addr  = Output(Valid(UInt()))
71
72    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
73    val release_update = Flipped(ValidIO(new ReleaseUpdate))
74  })
75
76  val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4)
77  // ProbeAck:               s_invalid ->            s_release_req
78  // ProbeAck merge Release: s_invalid ->            s_release_req
79  // Release:                s_invalid -> s_sleep -> s_release_req -> s_release_resp
80  // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req
81  //                        (change Release into ProbeAck when Release is not fired)
82  //                     or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req
83  //                        (send a ProbeAck after Release transaction is over)
84  val state = RegInit(s_invalid)
85
86  // internal regs
87  // remaining beats
88  val remain = RegInit(0.U(refillCycles.W))
89  val remain_set = WireInit(0.U(refillCycles.W))
90  val remain_clr = WireInit(0.U(refillCycles.W))
91  remain := (remain | remain_set) & ~remain_clr
92
93  val busy = remain.orR
94
95  val req  = Reg(new WritebackReq)
96
97  // assign default signals to output signals
98  io.req.ready := false.B
99  io.mem_release.valid := false.B
100  io.mem_release.bits  := DontCare
101  io.mem_grant.ready   := false.B
102  io.block_addr.valid  := state =/= s_invalid
103  io.block_addr.bits   := req.addr
104
105
106  when (state =/= s_invalid) {
107    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
108  }
109
110  def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
111    val full_wmask = FillInterleaved(64, wmask)
112    (~full_wmask & old_data | full_wmask & new_data)
113  }
114
115  // --------------------------------------------------------------------------------
116  // s_invalid: receive requests
117  // new req entering
118  when (io.req.valid && io.primary_valid && io.primary_ready) {
119    assert (remain === 0.U)
120    req := io.req.bits
121    when (io.req.bits.delay_release) {
122      state := s_sleep
123    }.otherwise {
124      state := s_release_req
125      remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
126    }
127  }
128
129  // --------------------------------------------------------------------------------
130  // s_sleep: wait for refill pipe to inform me that I can keep releasing
131  val merge = io.secondary_valid && io.secondary_ready
132  when (state === s_sleep) {
133    assert(remain === 0.U)
134    // There shouldn't be a new Release with the same addr in sleep state
135    assert(!(merge && io.req.bits.voluntary))
136
137    val update = io.release_update.valid && io.release_update.bits.addr === req.addr
138    when (update) {
139      req.hasData := req.hasData || io.release_update.bits.mask.orR
140      req.dirty := req.dirty || io.release_update.bits.mask.orR
141      req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask)
142    }.elsewhen (merge) {
143      state := s_release_req
144      req.voluntary := false.B
145      req.param := req.param
146      req.hasData := req.hasData || io.req.bits.hasData
147      req.dirty := req.dirty || io.req.bits.dirty
148      req.data := Mux(
149        io.req.bits.hasData,
150        io.req.bits.data,
151        req.data
152      )
153      req.delay_release := false.B
154      remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
155    }
156
157    when (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) {
158      state := s_release_req
159      req.delay_release := false.B
160      remain_set := Mux(
161        req.hasData || update && io.release_update.bits.mask.orR || merge && io.req.bits.hasData,
162        ~0.U(refillCycles.W),
163        1.U(refillCycles.W)
164      )
165    }
166  }
167
168  // --------------------------------------------------------------------------------
169  // while there beats remaining to be sent, we keep sending
170  // which beat to send in this cycle?
171  val beat = PriorityEncoder(remain)
172
173  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
174  for (i <- 0 until refillCycles) {
175    beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits)
176  }
177
178  val probeResponse = edge.ProbeAck(
179    fromSource = io.id,
180    toAddress = req.addr,
181    lgSize = log2Ceil(cfg.blockBytes).U,
182    reportPermissions = req.param
183  )
184
185  val probeResponseData = edge.ProbeAck(
186    fromSource = io.id,
187    toAddress = req.addr,
188    lgSize = log2Ceil(cfg.blockBytes).U,
189    reportPermissions = req.param,
190    data = beat_data(beat)
191  )
192
193  val voluntaryRelease = edge.Release(
194    fromSource = io.id,
195    toAddress = req.addr,
196    lgSize = log2Ceil(cfg.blockBytes).U,
197    shrinkPermissions = req.param
198  )._2
199
200  val voluntaryReleaseData = edge.Release(
201    fromSource = io.id,
202    toAddress = req.addr,
203    lgSize = log2Ceil(cfg.blockBytes).U,
204    shrinkPermissions = req.param,
205    data = beat_data(beat)
206  )._2
207
208  voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
209  when(busy) {
210    assert(!req.dirty || req.hasData)
211  }
212
213  io.mem_release.valid := busy
214  io.mem_release.bits  := Mux(req.voluntary,
215    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
216    Mux(req.hasData, probeResponseData, probeResponse))
217
218  when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain) }
219
220  val (_, _, release_done, _) = edge.count(io.mem_release)
221
222//  when (state === s_release_req && release_done) {
223//    state := Mux(req.voluntary, s_release_resp, s_invalid)
224//  }
225
226  // Because now wbq merges a same-addr req unconditionally, when the req to be merged comes too late,
227  // the previous req might not be able to merge. Thus we have to handle the new req later after the
228  // previous one finishes.
229  // TODO: initiate these
230  val release_later = RegInit(false.B)
231  val c_already_sent = RegInit(false.B)
232  def tmp_req() = new Bundle {
233    val param = UInt(cWidth.W)
234    val voluntary = Bool()
235    val hasData = Bool()
236    val dirty = Bool()
237    val delay_release = Bool()
238    val miss_id = UInt(log2Up(cfg.nMissEntries).W)
239
240    def toWritebackReq = {
241      val r = Wire(new WritebackReq())
242      r.data := req.data
243      r.addr := req.addr
244      r.param := param
245      r.voluntary := voluntary
246      r.hasData := hasData
247      r.dirty := dirty
248      r.delay_release := delay_release
249      r.miss_id := miss_id
250      r
251    }
252  }
253  val req_later = Reg(tmp_req())
254
255  when (state === s_release_req) {
256    when (io.mem_release.fire()) {
257      c_already_sent := !release_done
258    }
259
260    when (req.voluntary) {
261      // The previous req is Release
262      when (release_done) {
263        state := s_release_resp
264      }
265      // merge a ProbeAck
266      when (merge) {
267        when (io.mem_release.fire() || c_already_sent) {
268          // too late to merge, handle the ProbeAck later
269          release_later := true.B
270          req_later.param := io.req.bits.param
271          req_later.voluntary := io.req.bits.voluntary
272          req_later.hasData := io.req.bits.hasData
273          req_later.dirty := io.req.bits.dirty
274          req_later.delay_release := io.req.bits.delay_release
275          req_later.miss_id := io.req.bits.miss_id
276        }.otherwise {
277          // Release hasn't been sent out yet, change Release to ProbeAck
278          req.voluntary := false.B
279          req.hasData := req.hasData || io.req.bits.hasData
280          req.dirty := req.dirty || io.req.bits.dirty
281          req.data := Mux(
282            io.req.bits.hasData,
283            io.req.bits.data,
284            req.data
285          )
286          req.delay_release := false.B
287          remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
288        }
289      }
290    }.otherwise {
291      // The previous req is ProbeAck
292      when (merge) {
293        release_later := true.B
294        req_later.param := io.req.bits.param
295        req_later.voluntary := io.req.bits.voluntary
296        req_later.hasData := io.req.bits.hasData
297        req_later.dirty := io.req.bits.dirty
298        req_later.delay_release := io.req.bits.delay_release
299        req_later.miss_id := io.req.bits.miss_id
300      }
301
302      when (release_done) {
303        when (merge) {
304          // Send the Release after ProbeAck
305//          state := s_release_req
306//          req := Mux(merge, io.req.bits, req_later.toWritebackReq)
307//          release_later := false.B
308          state := s_sleep
309          req := io.req.bits
310          release_later := false.B
311        }.elsewhen (release_later) {
312          state := Mux(
313            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
314            s_release_req,
315            s_sleep
316          )
317          req := req_later.toWritebackReq
318          when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) {
319            req.delay_release := false.B
320          }
321          release_later := false.B
322        }.otherwise {
323          state := s_invalid
324          release_later := false.B
325        }
326      }
327
328      when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) {
329        req_later.delay_release := false.B
330      }
331    }
332  }
333
334  // --------------------------------------------------------------------------------
335  // receive ReleaseAck for Releases
336  when (state === s_release_resp) {
337    io.mem_grant.ready := true.B
338
339    when (merge) {
340      release_later := true.B
341      req_later.param := io.req.bits.param
342      req_later.voluntary := io.req.bits.voluntary
343      req_later.hasData := io.req.bits.hasData
344      req_later.dirty := io.req.bits.dirty
345      req_later.delay_release := io.req.bits.delay_release
346      req_later.miss_id := io.req.bits.miss_id
347    }
348    when (io.mem_grant.fire()) {
349      when (merge) {
350        state := s_release_req
351        req := io.req.bits
352        remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
353        release_later := false.B
354      }.elsewhen(release_later) {
355        state := s_release_req
356        req := req_later.toWritebackReq
357        remain_set := Mux(req_later.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
358        release_later := false.B
359      }.otherwise {
360        state := s_invalid
361        release_later := false.B
362      }
363    }
364  }
365
366  // When does this entry merge a new req?
367  // 1. When this entry is free
368  // 2. When this entry wants to release while still waiting for release_wakeup signal,
369  //    and a probe req with the same addr comes. In this case we merge probe with release,
370  //    handle this probe, so we don't need another release.
371  io.primary_ready := state === s_invalid
372  io.secondary_ready := state =/= s_invalid && io.req.bits.addr === req.addr
373
374  // performance counters
375  XSPerfAccumulate("wb_req", io.req.fire())
376  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
377  XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary)
378  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
379  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp)
380}
381
382class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents {
383  val io = IO(new Bundle {
384    val req = Flipped(DecoupledIO(new WritebackReq))
385    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
386    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
387
388    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
389    val release_update = Flipped(ValidIO(new ReleaseUpdate))
390
391    val miss_req = Flipped(Valid(UInt()))
392    val block_miss_req = Output(Bool())
393  })
394
395  require(cfg.nReleaseEntries > cfg.nMissEntries)
396
397  val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
398  val secondary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
399  val accept = Cat(primary_ready_vec).orR
400  val merge = Cat(secondary_ready_vec).orR
401  val alloc = accept && !merge
402  // When there are empty entries, merge or allocate a new entry.
403  // When there is no empty entry, reject it even if it can be merged.
404  io.req.ready := accept
405
406  // assign default values to output signals
407  io.mem_release.valid := false.B
408  io.mem_release.bits  := DontCare
409  io.mem_grant.ready   := false.B
410
411  require(isPow2(cfg.nMissEntries))
412  val grant_source = io.mem_grant.bits.source
413  val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge)))
414  entries.zipWithIndex.foreach {
415    case (entry, i) =>
416      val former_primary_ready = if(i == 0)
417        false.B
418      else
419        Cat((0 until i).map(j => entries(j).io.primary_ready)).orR
420      val entry_id = (i + releaseIdBase).U
421
422      entry.io.id := entry_id
423
424      // entry req
425      entry.io.req.valid := io.req.valid
426      primary_ready_vec(i)   := entry.io.primary_ready
427      secondary_ready_vec(i) := entry.io.secondary_ready
428      entry.io.req.bits  := io.req.bits
429
430      entry.io.primary_valid := alloc &&
431        !former_primary_ready &&
432        entry.io.primary_ready
433      entry.io.secondary_valid := io.req.valid && accept
434
435      entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
436      entry.io.mem_grant.bits  := io.mem_grant.bits
437//      when (entry_id === grant_source) {
438//        io.mem_grant.ready := entry.io.mem_grant.ready
439//      }
440
441      entry.io.release_wakeup := io.release_wakeup
442      entry.io.release_update := io.release_update
443  }
444  assert(RegNext(!(io.mem_grant.valid && !io.mem_grant.ready)))
445  io.mem_grant.ready := true.B
446
447  val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
448  io.block_miss_req := io.miss_req.valid && miss_req_conflict
449
450  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
451
452  // sanity check
453  // print all input/output requests for debug purpose
454  // print req
455  when (io.req.fire()) {
456    io.req.bits.dump()
457  }
458
459  when (io.mem_release.fire()) {
460    io.mem_release.bits.dump
461  }
462
463  when (io.mem_grant.fire()) {
464    io.mem_grant.bits.dump
465  }
466
467  when (io.miss_req.valid) {
468    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
469  }
470
471  when (io.block_miss_req) {
472    XSDebug("block_miss_req\n")
473  }
474
475  // performance counters
476  XSPerfAccumulate("wb_req", io.req.fire())
477
478  val perfEvents = Seq(
479    ("dcache_wbq_req      ", io.req.fire()                                                                                                                                                              ),
480    ("dcache_wbq_1_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nReleaseEntries.U/4.U))                                                                                          ),
481    ("dcache_wbq_2_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U/2.U))     ),
482    ("dcache_wbq_3_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nReleaseEntries.U*3.U/4.U)) ),
483    ("dcache_wbq_4_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nReleaseEntries.U*3.U/4.U))                                                                                      ),
484  )
485  generatePerfEvent()
486}
487