xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision f94d088c9ad83d5f297a63aba8cf1f28e4202d1b)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.TLPermissions._
23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut}
24import huancun.DirtyKey
25import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate}
26
27class WritebackReq(implicit p: Parameters) extends DCacheBundle {
28  val addr = UInt(PAddrBits.W)
29  val addr_dup_0 = UInt(PAddrBits.W)
30  val addr_dup_1 = UInt(PAddrBits.W)
31  val param  = UInt(cWidth.W)
32  val voluntary = Bool()
33  val hasData = Bool()
34  val dirty = Bool()
35  val data = UInt((cfg.blockBytes * 8).W)
36
37  val delay_release = Bool()
38  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
39
40  def dump() = {
41    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
42      addr, param, voluntary, hasData, data)
43  }
44}
45
46// While a Release sleeps and waits for a refill to wake it up,
47// main pipe might update meta & data during this time.
48// So the meta & data to be released need to be updated too.
49class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
50  // only consider store here
51  val addr = UInt(PAddrBits.W)
52  val mask = UInt(DCacheBanks.W)
53  val data = UInt((cfg.blockBytes * 8).W)
54}
55
56class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
57{
58  val io = IO(new Bundle {
59    val id = Input(UInt())
60    // allocate this entry for new req
61    val primary_valid = Input(Bool())
62    // this entry is free and can be allocated to new reqs
63    val primary_ready = Output(Bool())
64    // this entry is busy, but it can merge the new req
65    val secondary_valid = Input(Bool())
66    val secondary_ready = Output(Bool())
67    val req = Flipped(DecoupledIO(new WritebackReq))
68
69    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
70    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
71
72    val block_addr  = Output(Valid(UInt()))
73
74    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
75    val release_update = Flipped(ValidIO(new ReleaseUpdate))
76  })
77
78  val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4)
79  // ProbeAck:               s_invalid ->            s_release_req
80  // ProbeAck merge Release: s_invalid ->            s_release_req
81  // Release:                s_invalid -> s_sleep -> s_release_req -> s_release_resp
82  // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req
83  //                        (change Release into ProbeAck when Release is not fired)
84  //                     or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req
85  //                        (send a ProbeAck after Release transaction is over)
86  val state = RegInit(s_invalid)
87  val state_dup_0 = RegInit(s_invalid)
88  val state_dup_1 = RegInit(s_invalid)
89
90  // internal regs
91  // remaining beats
92  val remain = RegInit(0.U(refillCycles.W))
93  val remain_dup_0 = RegInit(0.U(refillCycles.W))
94  val remain_dup_1 = RegInit(0.U(refillCycles.W))
95  val remain_set = WireInit(0.U(refillCycles.W))
96  val remain_clr = WireInit(0.U(refillCycles.W))
97  remain := (remain | remain_set) & ~remain_clr
98  remain_dup_0 := (remain_dup_0 | remain_set) & ~remain_clr
99  remain_dup_1 := (remain_dup_1 | remain_set) & ~remain_clr
100
101  val busy = remain.orR
102
103  val req  = Reg(new WritebackReq)
104
105  // assign default signals to output signals
106  io.req.ready := false.B
107  io.mem_release.valid := false.B
108  io.mem_release.bits  := DontCare
109  io.mem_grant.ready   := false.B
110  io.block_addr.valid  := state =/= s_invalid
111  io.block_addr.bits   := req.addr
112
113
114  when (state =/= s_invalid) {
115    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
116  }
117
118  def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
119    val full_wmask = FillInterleaved(64, wmask)
120    (~full_wmask & old_data | full_wmask & new_data)
121  }
122
123  // --------------------------------------------------------------------------------
124  // s_invalid: receive requests
125  // new req entering
126  when (io.req.valid && io.primary_valid && io.primary_ready) {
127    assert (remain === 0.U)
128    req := io.req.bits
129    when (io.req.bits.delay_release) {
130      state := s_sleep
131      state_dup_0 := s_sleep
132      state_dup_1 := s_sleep
133    }.otherwise {
134      state := s_release_req
135      state_dup_0 := s_release_req
136      state_dup_1 := s_release_req
137      remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
138    }
139  }
140
141  // --------------------------------------------------------------------------------
142  // s_sleep: wait for refill pipe to inform me that I can keep releasing
143  val merge = io.secondary_valid && io.secondary_ready
144  when (state === s_sleep) {
145    assert(remain === 0.U)
146    // There shouldn't be a new Release with the same addr in sleep state
147    assert(!(merge && io.req.bits.voluntary))
148
149    val update = io.release_update.valid && io.release_update.bits.addr === req.addr
150    when (update) {
151      req.hasData := req.hasData || io.release_update.bits.mask.orR
152      req.dirty := req.dirty || io.release_update.bits.mask.orR
153      req.data := mergeData(req.data, io.release_update.bits.data, io.release_update.bits.mask)
154    }.elsewhen (merge) {
155      state := s_release_req
156      state_dup_0 := s_release_req
157      state_dup_1 := s_release_req
158      req.voluntary := false.B
159      req.param := req.param
160      req.hasData := req.hasData || io.req.bits.hasData
161      req.dirty := req.dirty || io.req.bits.dirty
162      req.data := Mux(
163        io.req.bits.hasData,
164        io.req.bits.data,
165        req.data
166      )
167      req.delay_release := false.B
168      remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
169    }
170
171    when (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) {
172      state := s_release_req
173      state_dup_0 := s_release_req
174      state_dup_1 := s_release_req
175      req.delay_release := false.B
176      remain_set := Mux(
177        req.hasData || update && io.release_update.bits.mask.orR || merge && io.req.bits.hasData,
178        ~0.U(refillCycles.W),
179        1.U(refillCycles.W)
180      )
181    }
182  }
183
184  // --------------------------------------------------------------------------------
185  // while there beats remaining to be sent, we keep sending
186  // which beat to send in this cycle?
187  val beat = PriorityEncoder(remain_dup_0)
188
189  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
190  for (i <- 0 until refillCycles) {
191    beat_data(i) := req.data((i + 1) * beatBits - 1, i * beatBits)
192  }
193
194  val probeResponse = edge.ProbeAck(
195    fromSource = io.id,
196    toAddress = req.addr_dup_0,
197    lgSize = log2Ceil(cfg.blockBytes).U,
198    reportPermissions = req.param
199  )
200
201  val probeResponseData = edge.ProbeAck(
202    fromSource = io.id,
203    toAddress = req.addr_dup_0,
204    lgSize = log2Ceil(cfg.blockBytes).U,
205    reportPermissions = req.param,
206    data = beat_data(beat)
207  )
208
209  val voluntaryRelease = edge.Release(
210    fromSource = io.id,
211    toAddress = req.addr_dup_1,
212    lgSize = log2Ceil(cfg.blockBytes).U,
213    shrinkPermissions = req.param
214  )._2
215
216  val voluntaryReleaseData = edge.Release(
217    fromSource = io.id,
218    toAddress = req.addr_dup_1,
219    lgSize = log2Ceil(cfg.blockBytes).U,
220    shrinkPermissions = req.param,
221    data = beat_data(beat)
222  )._2
223
224  voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
225  when(busy) {
226    assert(!req.dirty || req.hasData)
227  }
228
229  io.mem_release.valid := busy
230  io.mem_release.bits  := Mux(req.voluntary,
231    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
232    Mux(req.hasData, probeResponseData, probeResponse))
233
234  when (io.mem_release.fire()) { remain_clr := PriorityEncoderOH(remain_dup_1) }
235
236  val (_, _, release_done, _) = edge.count(io.mem_release)
237
238//  when (state === s_release_req && release_done) {
239//    state := Mux(req.voluntary, s_release_resp, s_invalid)
240//  }
241
242  // Because now wbq merges a same-addr req unconditionally, when the req to be merged comes too late,
243  // the previous req might not be able to merge. Thus we have to handle the new req later after the
244  // previous one finishes.
245  // TODO: initiate these
246  val release_later = RegInit(false.B)
247  val c_already_sent = RegInit(false.B)
248  def tmp_req() = new Bundle {
249    val param = UInt(cWidth.W)
250    val voluntary = Bool()
251    val hasData = Bool()
252    val dirty = Bool()
253    val delay_release = Bool()
254    val miss_id = UInt(log2Up(cfg.nMissEntries).W)
255
256    def toWritebackReq = {
257      val r = Wire(new WritebackReq())
258      r.data := req.data
259      r.addr := req.addr
260      r.addr_dup_0 := req.addr_dup_0
261      r.addr_dup_1 := req.addr_dup_1
262      r.param := param
263      r.voluntary := voluntary
264      r.hasData := hasData
265      r.dirty := dirty
266      r.delay_release := delay_release
267      r.miss_id := miss_id
268      r
269    }
270  }
271  val req_later = Reg(tmp_req())
272
273  when (state_dup_0 === s_release_req) {
274    when (io.mem_release.fire()) {
275      c_already_sent := !release_done
276    }
277
278    when (req.voluntary) {
279      // The previous req is Release
280      when (release_done) {
281        state := s_release_resp
282        state_dup_0 := s_release_resp
283        state_dup_1 := s_release_resp
284      }
285      // merge a ProbeAck
286      when (merge) {
287        when (io.mem_release.fire() || c_already_sent) {
288          // too late to merge, handle the ProbeAck later
289          release_later := true.B
290          req_later.param := io.req.bits.param
291          req_later.voluntary := io.req.bits.voluntary
292          req_later.hasData := io.req.bits.hasData
293          req_later.dirty := io.req.bits.dirty
294          req_later.delay_release := io.req.bits.delay_release
295          req_later.miss_id := io.req.bits.miss_id
296        }.otherwise {
297          // Release hasn't been sent out yet, change Release to ProbeAck
298          req.voluntary := false.B
299          req.hasData := req.hasData || io.req.bits.hasData
300          req.dirty := req.dirty || io.req.bits.dirty
301          req.data := Mux(
302            io.req.bits.hasData,
303            io.req.bits.data,
304            req.data
305          )
306          req.delay_release := false.B
307          remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
308        }
309      }
310    }.otherwise {
311      // The previous req is ProbeAck
312      when (merge) {
313        release_later := true.B
314        req_later.param := io.req.bits.param
315        req_later.voluntary := io.req.bits.voluntary
316        req_later.hasData := io.req.bits.hasData
317        req_later.dirty := io.req.bits.dirty
318        req_later.delay_release := io.req.bits.delay_release
319        req_later.miss_id := io.req.bits.miss_id
320      }
321
322      when (release_done) {
323        when (merge) {
324          // Send the Release after ProbeAck
325//          state := s_release_req
326//          req := Mux(merge, io.req.bits, req_later.toWritebackReq)
327//          release_later := false.B
328          state := s_sleep
329          state_dup_0 := s_sleep
330          state_dup_1 := s_sleep
331          req := io.req.bits
332          release_later := false.B
333        }.elsewhen (release_later) {
334          state := Mux(
335            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
336            s_release_req,
337            s_sleep
338          )
339          state_dup_0 := Mux(
340            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
341            s_release_req,
342            s_sleep
343          )
344          state_dup_1 := Mux(
345            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
346            s_release_req,
347            s_sleep
348          )
349          req := req_later.toWritebackReq
350          when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) {
351            req.delay_release := false.B
352          }
353          release_later := false.B
354        }.otherwise {
355          state := s_invalid
356          state_dup_0 := s_invalid
357          state_dup_1 := s_invalid
358          release_later := false.B
359        }
360      }
361
362      when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) {
363        req_later.delay_release := false.B
364      }
365    }
366  }
367
368  // --------------------------------------------------------------------------------
369  // receive ReleaseAck for Releases
370  when (state_dup_0 === s_release_resp) {
371    io.mem_grant.ready := true.B
372
373    when (merge) {
374      release_later := true.B
375      req_later.param := io.req.bits.param
376      req_later.voluntary := io.req.bits.voluntary
377      req_later.hasData := io.req.bits.hasData
378      req_later.dirty := io.req.bits.dirty
379      req_later.delay_release := io.req.bits.delay_release
380      req_later.miss_id := io.req.bits.miss_id
381    }
382    when (io.mem_grant.fire()) {
383      when (merge) {
384        state := s_release_req
385        state_dup_0 := s_release_req
386        state_dup_1 := s_release_req
387        req := io.req.bits
388        remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
389        release_later := false.B
390      }.elsewhen(release_later) {
391        state := s_release_req
392        state_dup_0 := s_release_req
393        state_dup_1 := s_release_req
394        req := req_later.toWritebackReq
395        remain_set := Mux(req_later.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
396        release_later := false.B
397      }.otherwise {
398        state := s_invalid
399        state_dup_0 := s_invalid
400        state_dup_1 := s_invalid
401        release_later := false.B
402      }
403    }
404  }
405
406  // When does this entry merge a new req?
407  // 1. When this entry is free
408  // 2. When this entry wants to release while still waiting for release_wakeup signal,
409  //    and a probe req with the same addr comes. In this case we merge probe with release,
410  //    handle this probe, so we don't need another release.
411  io.primary_ready := state_dup_1 === s_invalid
412  io.secondary_ready := state_dup_1 =/= s_invalid && io.req.bits.addr === req.addr
413
414  // performance counters
415  XSPerfAccumulate("wb_req", io.req.fire())
416  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
417  XSPerfAccumulate("wb_probe_resp", state_dup_0 === s_release_req && release_done && !req.voluntary)
418  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
419  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state_dup_1 === s_release_resp)
420}
421
422class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents {
423  val io = IO(new Bundle {
424    val req = Flipped(DecoupledIO(new WritebackReq))
425    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
426    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
427
428    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
429    val release_update = Flipped(ValidIO(new ReleaseUpdate))
430
431    val miss_req = Flipped(Valid(UInt()))
432    val block_miss_req = Output(Bool())
433  })
434
435  require(cfg.nReleaseEntries > cfg.nMissEntries)
436
437  val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
438  val secondary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
439  val accept = Cat(primary_ready_vec).orR
440  val merge = Cat(secondary_ready_vec).orR
441  val alloc = accept && !merge
442  // When there are empty entries, merge or allocate a new entry.
443  // When there is no empty entry, reject it even if it can be merged.
444  io.req.ready := accept
445
446  // assign default values to output signals
447  io.mem_release.valid := false.B
448  io.mem_release.bits  := DontCare
449  io.mem_grant.ready   := false.B
450
451  require(isPow2(cfg.nMissEntries))
452  val grant_source = io.mem_grant.bits.source
453  val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge)))
454  entries.zipWithIndex.foreach {
455    case (entry, i) =>
456      val former_primary_ready = if(i == 0)
457        false.B
458      else
459        Cat((0 until i).map(j => entries(j).io.primary_ready)).orR
460      val entry_id = (i + releaseIdBase).U
461
462      entry.io.id := entry_id
463
464      // entry req
465      entry.io.req.valid := io.req.valid
466      primary_ready_vec(i)   := entry.io.primary_ready
467      secondary_ready_vec(i) := entry.io.secondary_ready
468      entry.io.req.bits  := io.req.bits
469
470      entry.io.primary_valid := alloc &&
471        !former_primary_ready &&
472        entry.io.primary_ready
473      entry.io.secondary_valid := io.req.valid && accept
474
475      entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
476      entry.io.mem_grant.bits  := io.mem_grant.bits
477//      when (entry_id === grant_source) {
478//        io.mem_grant.ready := entry.io.mem_grant.ready
479//      }
480
481      entry.io.release_wakeup := io.release_wakeup
482      entry.io.release_update := io.release_update
483  }
484  assert(RegNext(!(io.mem_grant.valid && !io.mem_grant.ready)))
485  io.mem_grant.ready := true.B
486
487  val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
488  io.block_miss_req := io.miss_req.valid && miss_req_conflict
489
490  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
491
492  // sanity check
493  // print all input/output requests for debug purpose
494  // print req
495  when (io.req.fire()) {
496    io.req.bits.dump()
497  }
498
499  when (io.mem_release.fire()) {
500    io.mem_release.bits.dump
501  }
502
503  when (io.mem_grant.fire()) {
504    io.mem_grant.bits.dump
505  }
506
507  when (io.miss_req.valid) {
508    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
509  }
510
511  when (io.block_miss_req) {
512    XSDebug("block_miss_req\n")
513  }
514
515  // performance counters
516  XSPerfAccumulate("wb_req", io.req.fire())
517
518  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
519  val perfEvents = Seq(
520    ("dcache_wbq_req      ", io.req.fire()),
521    ("dcache_wbq_1_4_valid", (perfValidCount < (cfg.nReleaseEntries.U/4.U))),
522    ("dcache_wbq_2_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/4.U)) & (perfValidCount <= (cfg.nReleaseEntries.U/2.U))),
523    ("dcache_wbq_3_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/2.U)) & (perfValidCount <= (cfg.nReleaseEntries.U*3.U/4.U))),
524    ("dcache_wbq_4_4_valid", (perfValidCount > (cfg.nReleaseEntries.U*3.U/4.U))),
525  )
526  generatePerfEvent()
527}
528