xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision 8891a219bbc84f568e1d134854d8d5ed86d6d560)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.TLPermissions._
23import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut}
24import coupledL2.DirtyKey
25import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate}
26
27class WritebackReqCtrl(implicit p: Parameters) extends DCacheBundle {
28  val param  = UInt(cWidth.W)
29  val voluntary = Bool()
30  val hasData = Bool()
31  val dirty = Bool()
32
33  val delay_release = Bool()
34  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
35}
36
37class WritebackReqWodata(implicit p: Parameters) extends WritebackReqCtrl {
38  val addr = UInt(PAddrBits.W)
39
40  def dump() = {
41    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b\n",
42      addr, param, voluntary, hasData)
43  }
44}
45
46class WritebackReqData(implicit p: Parameters) extends DCacheBundle {
47  val data = UInt((cfg.blockBytes * 8).W)
48}
49
50class WritebackReq(implicit p: Parameters) extends WritebackReqWodata {
51  val data = UInt((cfg.blockBytes * 8).W)
52
53  override def dump() = {
54    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
55      addr, param, voluntary, hasData, data)
56  }
57
58  def toWritebackReqWodata(): WritebackReqWodata = {
59    val out = Wire(new WritebackReqWodata)
60    out.addr := addr
61    out.param := param
62    out.voluntary := voluntary
63    out.hasData := hasData
64    out.dirty := dirty
65    out.delay_release := delay_release
66    out.miss_id := miss_id
67    out
68  }
69
70  def toWritebackReqCtrl(): WritebackReqCtrl = {
71    val out = Wire(new WritebackReqCtrl)
72    out.param := param
73    out.voluntary := voluntary
74    out.hasData := hasData
75    out.dirty := dirty
76    out.delay_release := delay_release
77    out.miss_id := miss_id
78    out
79  }
80
81  def toWritebackReqData(): WritebackReqData = {
82    val out = Wire(new WritebackReqData)
83    out.data := data
84    out
85  }
86}
87
88// While a Release sleeps and waits for a refill to wake it up,
89// main pipe might update meta & data during this time.
90// So the meta & data to be released need to be updated too.
91class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
92  // only consider store here
93  val addr = UInt(PAddrBits.W)
94  val mask = UInt(DCacheBanks.W)
95  val data = UInt((cfg.blockBytes * 8).W)
96}
97
98// To reduce fanout, writeback queue entry data is updated 1 cycle
99// after ReleaseUpdate.fire
100class WBQEntryReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
101  // only consider store here
102  val addr = UInt(PAddrBits.W)
103  val mask_delayed = UInt(DCacheBanks.W)
104  val data_delayed = UInt((cfg.blockBytes * 8).W)
105  val mask_orr = Bool()
106}
107
108// When a probe TtoB req enter dcache main pipe, check if that cacheline
109// is waiting for release. If it is so, change TtoB to TtoN, set dcache
110// coh to N.
111class ProbeToBCheckReq(implicit p: Parameters) extends DCacheBundle {
112  val addr = UInt(PAddrBits.W) // paddr from mainpipe s1
113}
114
115class ProbeToBCheckResp(implicit p: Parameters) extends DCacheBundle {
116  val toN = Bool() // need to set dcache coh to N
117}
118
119class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
120{
121  val io = IO(new Bundle {
122    val id = Input(UInt())
123    // allocate this entry for new req
124    val primary_valid = Input(Bool())
125    // this entry is free and can be allocated to new reqs
126    val primary_ready = Output(Bool())
127    val primary_ready_dup = Vec(nDupWbReady, Output(Bool()))
128    // this entry is busy, but it can merge the new req
129    val secondary_valid = Input(Bool())
130    val secondary_ready = Output(Bool())
131    val req = Flipped(DecoupledIO(new WritebackReqWodata))
132    val req_data = Input(new WritebackReqData)
133
134    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
135    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
136
137    val block_addr  = Output(Valid(UInt()))
138
139    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
140    val release_update = Flipped(ValidIO(new WBQEntryReleaseUpdate))
141
142    val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq))
143    val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp)
144  })
145
146  val s_invalid :: s_sleep :: s_release_req :: s_release_resp :: Nil = Enum(4)
147  // ProbeAck:               s_invalid ->            s_release_req
148  // ProbeAck merge Release: s_invalid ->            s_release_req
149  // Release:                s_invalid -> s_sleep -> s_release_req -> s_release_resp
150  // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req
151  //                        (change Release into ProbeAck when Release is not fired)
152  //                     or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req
153  //                        (send a ProbeAck after Release transaction is over)
154  val state = RegInit(s_invalid)
155  val state_dup_0 = RegInit(s_invalid)
156  val state_dup_1 = RegInit(s_invalid)
157  val state_dup_for_mp = RegInit(VecInit(Seq.fill(nDupWbReady)(s_invalid)))
158
159  // internal regs
160  // remaining beats
161  val remain = RegInit(0.U(refillCycles.W))
162  val remain_dup_0 = RegInit(0.U(refillCycles.W))
163  val remain_dup_1 = RegInit(0.U(refillCycles.W))
164  val remain_set = WireInit(0.U(refillCycles.W))
165  val remain_clr = WireInit(0.U(refillCycles.W))
166  remain := (remain | remain_set) & ~remain_clr
167  remain_dup_0 := (remain_dup_0 | remain_set) & ~remain_clr
168  remain_dup_1 := (remain_dup_1 | remain_set) & ~remain_clr
169
170  // writeback queue data
171  val data = Reg(UInt((cfg.blockBytes * 8).W))
172
173  // writeback queue paddr
174  val paddr_dup_0 = Reg(UInt(PAddrBits.W))
175  val paddr_dup_1 = Reg(UInt(PAddrBits.W))
176  val paddr_dup_2 = Reg(UInt(PAddrBits.W))
177
178  // pending data write
179  // !s_data_override means there is an in-progress data write
180  val s_data_override = RegInit(true.B)
181  // !s_data_merge means there is an in-progress data merge
182  val s_data_merge = RegInit(true.B)
183
184  // there are valid request that can be sent to release bus
185  val busy = remain.orR && s_data_override && s_data_merge // have remain beats and data write finished
186
187  val req  = Reg(new WritebackReqCtrl)
188
189  // assign default signals to output signals
190  io.req.ready := false.B
191  io.mem_release.valid := false.B
192  io.mem_release.bits  := DontCare
193  io.mem_grant.ready   := false.B
194  io.block_addr.valid  := state =/= s_invalid
195  io.block_addr.bits   := paddr_dup_0
196
197  s_data_override := true.B // data_override takes only 1 cycle
198  s_data_merge := true.B // data_merge takes only 1 cycle
199
200
201  when (state =/= s_invalid) {
202    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
203  }
204
205  def mergeData(old_data: UInt, new_data: UInt, wmask: UInt): UInt = {
206    val full_wmask = FillInterleaved(64, wmask)
207    (~full_wmask & old_data | full_wmask & new_data)
208  }
209
210  // --------------------------------------------------------------------------------
211  // s_invalid: receive requests
212  // new req entering
213  val alloc = io.req.valid && io.primary_valid && io.primary_ready
214  when (alloc) {
215    assert (remain === 0.U)
216    req := io.req.bits
217    s_data_override := false.B
218    // only update paddr when allocate a new missqueue entry
219    paddr_dup_0 := io.req.bits.addr
220    paddr_dup_1 := io.req.bits.addr
221    paddr_dup_2 := io.req.bits.addr
222    when (io.req.bits.delay_release) {
223      state := s_sleep
224      state_dup_0 := s_sleep
225      state_dup_1 := s_sleep
226      state_dup_for_mp.foreach(_ := s_sleep)
227    }.otherwise {
228      state := s_release_req
229      state_dup_0 := s_release_req
230      state_dup_1 := s_release_req
231      state_dup_for_mp.foreach(_ := s_release_req)
232      remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
233    }
234  }
235
236  // --------------------------------------------------------------------------------
237  // s_sleep: wait for refill pipe to inform me that I can keep releasing
238  val merge = io.secondary_valid && io.secondary_ready
239  when (state === s_sleep) {
240    assert(remain === 0.U)
241    // There shouldn't be a new Release with the same addr in sleep state
242    assert(!(merge && io.req.bits.voluntary))
243
244    val update = io.release_update.valid && io.release_update.bits.addr === paddr_dup_0
245    when (update) {
246      req.hasData := req.hasData || io.release_update.bits.mask_orr
247      req.dirty := req.dirty || io.release_update.bits.mask_orr
248      s_data_merge := false.B
249    }.elsewhen (merge) {
250      state := s_release_req
251      state_dup_0 := s_release_req
252      state_dup_1 := s_release_req
253      state_dup_for_mp.foreach(_ := s_release_req)
254      req.voluntary := false.B
255      req.param := req.param
256      req.hasData := req.hasData || io.req.bits.hasData
257      req.dirty := req.dirty || io.req.bits.dirty
258      s_data_override := !io.req.bits.hasData // update data when io.req.bits.hasData
259      req.delay_release := false.B
260      remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
261    }
262
263    when (io.release_wakeup.valid && io.release_wakeup.bits === req.miss_id) {
264      state := s_release_req
265      state_dup_0 := s_release_req
266      state_dup_1 := s_release_req
267      state_dup_for_mp.foreach(_ := s_release_req)
268      req.delay_release := false.B
269      remain_set := Mux(
270        req.hasData || update && io.release_update.bits.mask_orr || merge && io.req.bits.hasData,
271        ~0.U(refillCycles.W),
272        1.U(refillCycles.W)
273      )
274    }
275  }
276
277  // --------------------------------------------------------------------------------
278  // while there beats remaining to be sent, we keep sending
279  // which beat to send in this cycle?
280  val beat = PriorityEncoder(remain_dup_0)
281
282  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
283  for (i <- 0 until refillCycles) {
284    beat_data(i) := data((i + 1) * beatBits - 1, i * beatBits)
285  }
286
287  val probeResponse = edge.ProbeAck(
288    fromSource = io.id,
289    toAddress = paddr_dup_1,
290    lgSize = log2Ceil(cfg.blockBytes).U,
291    reportPermissions = req.param
292  )
293
294  val probeResponseData = edge.ProbeAck(
295    fromSource = io.id,
296    toAddress = paddr_dup_1,
297    lgSize = log2Ceil(cfg.blockBytes).U,
298    reportPermissions = req.param,
299    data = beat_data(beat)
300  )
301
302  val voluntaryRelease = edge.Release(
303    fromSource = io.id,
304    toAddress = paddr_dup_2,
305    lgSize = log2Ceil(cfg.blockBytes).U,
306    shrinkPermissions = req.param
307  )._2
308
309  val voluntaryReleaseData = edge.Release(
310    fromSource = io.id,
311    toAddress = paddr_dup_2,
312    lgSize = log2Ceil(cfg.blockBytes).U,
313    shrinkPermissions = req.param,
314    data = beat_data(beat)
315  )._2
316
317  // voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
318  when(busy) {
319    assert(!req.dirty || req.hasData)
320  }
321
322  io.mem_release.valid := busy
323  io.mem_release.bits  := Mux(req.voluntary,
324    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
325    Mux(req.hasData, probeResponseData, probeResponse))
326
327  when (io.mem_release.fire) { remain_clr := PriorityEncoderOH(remain_dup_1) }
328
329  val (_, _, release_done, _) = edge.count(io.mem_release)
330
331//  when (state === s_release_req && release_done) {
332//    state := Mux(req.voluntary, s_release_resp, s_invalid)
333//  }
334
335  // Because now wbq merges a same-addr req unconditionally, when the req to be merged comes too late,
336  // the previous req might not be able to merge. Thus we have to handle the new req later after the
337  // previous one finishes.
338  // TODO: initiate these
339  val release_later = RegInit(false.B)
340  val c_already_sent = RegInit(false.B)
341  def tmp_req() = new Bundle {
342    val param = UInt(cWidth.W)
343    val voluntary = Bool()
344    val hasData = Bool()
345    val dirty = Bool()
346    val delay_release = Bool()
347    val miss_id = UInt(log2Up(cfg.nMissEntries).W)
348
349    def toWritebackReqCtrl = {
350      val r = Wire(new WritebackReqCtrl())
351      r.param := param
352      r.voluntary := voluntary
353      r.hasData := hasData
354      r.dirty := dirty
355      r.delay_release := delay_release
356      r.miss_id := miss_id
357      r
358    }
359  }
360  val req_later = Reg(tmp_req())
361
362  when (state_dup_0 === s_release_req) {
363    when (io.mem_release.fire) {
364      c_already_sent := !release_done
365    }
366
367    when (req.voluntary) {
368      // The previous req is Release
369      when (release_done) {
370        state := s_release_resp
371        state_dup_0 := s_release_resp
372        state_dup_1 := s_release_resp
373        state_dup_for_mp.foreach(_ := s_release_resp)
374      }
375      // merge a ProbeAck
376      when (merge) {
377        when (io.mem_release.fire || c_already_sent) {
378          // too late to merge, handle the ProbeAck later
379          release_later := true.B
380          req_later.param := io.req.bits.param
381          req_later.voluntary := io.req.bits.voluntary
382          req_later.hasData := io.req.bits.hasData
383          req_later.dirty := io.req.bits.dirty
384          req_later.delay_release := io.req.bits.delay_release
385          req_later.miss_id := io.req.bits.miss_id
386        }.otherwise {
387          // Release hasn't been sent out yet, change Release to ProbeAck
388          req.voluntary := false.B
389          req.hasData := req.hasData || io.req.bits.hasData
390          req.dirty := req.dirty || io.req.bits.dirty
391          // s_data_override := false.B
392          req.delay_release := false.B
393          remain_set := Mux(req.hasData || io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
394        }
395      }
396    }.otherwise {
397      // The previous req is ProbeAck
398      when (merge) {
399        release_later := true.B
400        req_later.param := io.req.bits.param
401        req_later.voluntary := io.req.bits.voluntary
402        req_later.hasData := io.req.bits.hasData
403        req_later.dirty := io.req.bits.dirty
404        req_later.delay_release := io.req.bits.delay_release
405        req_later.miss_id := io.req.bits.miss_id
406      }
407
408      when (release_done) {
409        when (merge) {
410          // Send the Release after ProbeAck
411//          state := s_release_req
412//          req := Mux(merge, io.req.bits, req_later.toWritebackReqCtrl)
413//          release_later := false.B
414          state := s_sleep
415          state_dup_0 := s_sleep
416          state_dup_1 := s_sleep
417          state_dup_for_mp.foreach(_ := s_sleep)
418          req := io.req.bits
419          release_later := false.B
420        }.elsewhen (release_later) {
421          state := Mux(
422            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
423            s_release_req,
424            s_sleep
425          )
426          state_dup_0 := Mux(
427            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
428            s_release_req,
429            s_sleep
430          )
431          state_dup_1 := Mux(
432            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
433            s_release_req,
434            s_sleep
435          )
436          state_dup_for_mp.foreach(_ := Mux(
437            io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release,
438            s_release_req,
439            s_sleep
440          ))
441          req := req_later.toWritebackReqCtrl
442          when(io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id || !req_later.delay_release) {
443            remain_set := Mux(req_later.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
444            remain_clr := 0.U
445          }.otherwise {
446            remain_set := 0.U
447          }
448          when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) {
449            req.delay_release := false.B
450          }
451          release_later := false.B
452        }.otherwise {
453          state := s_invalid
454          state_dup_0 := s_invalid
455          state_dup_1 := s_invalid
456          state_dup_for_mp.foreach(_ := s_invalid)
457          release_later := false.B
458        }
459      }
460
461      when (io.release_wakeup.valid && io.release_wakeup.bits === req_later.miss_id) {
462        req_later.delay_release := false.B
463      }
464    }
465  }
466
467  // --------------------------------------------------------------------------------
468  // receive ReleaseAck for Releases
469  when (state_dup_0 === s_release_resp) {
470    io.mem_grant.ready := true.B
471
472    when (merge) {
473      release_later := true.B
474      req_later.param := io.req.bits.param
475      req_later.voluntary := io.req.bits.voluntary
476      req_later.hasData := io.req.bits.hasData
477      req_later.dirty := io.req.bits.dirty
478      req_later.delay_release := io.req.bits.delay_release
479      req_later.miss_id := io.req.bits.miss_id
480    }
481    when (io.mem_grant.fire) {
482      when (merge) {
483        state := s_release_req
484        state_dup_0 := s_release_req
485        state_dup_1 := s_release_req
486        state_dup_for_mp.foreach(_ := s_release_req)
487        req := io.req.bits
488        remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
489        release_later := false.B
490      }.elsewhen(release_later) {
491        state := s_release_req
492        state_dup_0 := s_release_req
493        state_dup_1 := s_release_req
494        state_dup_for_mp.foreach(_ := s_release_req)
495        req := req_later.toWritebackReqCtrl
496        remain_set := Mux(req_later.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
497        release_later := false.B
498      }.otherwise {
499        state := s_invalid
500        state_dup_0 := s_invalid
501        state_dup_1 := s_invalid
502        state_dup_for_mp.foreach(_ := s_invalid)
503        release_later := false.B
504      }
505    }
506  }
507
508  // When does this entry merge a new req?
509  // 1. When this entry is free
510  // 2. When this entry wants to release while still waiting for release_wakeup signal,
511  //    and a probe req with the same addr comes. In this case we merge probe with release,
512  //    handle this probe, so we don't need another release.
513  io.primary_ready := state_dup_1 === s_invalid
514  io.primary_ready_dup.zip(state_dup_for_mp).foreach { case (rdy, st) => rdy := st === s_invalid }
515  io.secondary_ready := state_dup_1 =/= s_invalid && io.req.bits.addr === paddr_dup_0
516
517  io.probe_ttob_check_resp.valid := RegNext(io.probe_ttob_check_req.valid) // for debug only
518  io.probe_ttob_check_resp.bits.toN := RegNext(
519    state_dup_1 === s_sleep &&
520    io.probe_ttob_check_req.bits.addr === paddr_dup_0 &&
521    io.probe_ttob_check_req.valid
522  )
523
524  // data update logic
525  when (!s_data_merge) {
526    data := mergeData(data, io.release_update.bits.data_delayed, io.release_update.bits.mask_delayed)
527  }
528
529  when (!s_data_override && (req.hasData || RegNext(alloc))) {
530    data := io.req_data.data
531  }
532
533  assert(!RegNext(!s_data_merge && !s_data_override))
534
535  // performance counters
536  XSPerfAccumulate("wb_req", io.req.fire)
537  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
538  XSPerfAccumulate("wb_probe_resp", state_dup_0 === s_release_req && release_done && !req.voluntary)
539  XSPerfAccumulate("wb_probe_ttob_fix", io.probe_ttob_check_resp.valid && io.probe_ttob_check_resp.bits.toN)
540  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
541  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state_dup_1 === s_release_resp)
542}
543
544class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents {
545  val io = IO(new Bundle {
546    val req = Flipped(DecoupledIO(new WritebackReq))
547    val req_ready_dup = Vec(nDupWbReady, Output(Bool()))
548    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
549    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
550
551    val release_wakeup = Flipped(ValidIO(UInt(log2Up(cfg.nMissEntries).W)))
552    val release_update = Flipped(ValidIO(new ReleaseUpdate))
553
554    val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq))
555    val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp)
556
557    val miss_req = Flipped(Valid(UInt()))
558    val block_miss_req = Output(Bool())
559  })
560
561  require(cfg.nReleaseEntries > cfg.nMissEntries)
562
563  val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
564  val secondary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
565  val accept = Cat(primary_ready_vec).orR
566  val merge = Cat(secondary_ready_vec).orR
567  val alloc = accept && !merge
568  // When there are empty entries, merge or allocate a new entry.
569  // When there is no empty entry, reject it even if it can be merged.
570  io.req.ready := accept
571
572  // assign default values to output signals
573  io.mem_release.valid := false.B
574  io.mem_release.bits  := DontCare
575  io.mem_grant.ready   := false.B
576
577  // dalay data write in miss queue release update for 1 cycle
578  val release_update_bits_for_entry = Wire(new WBQEntryReleaseUpdate)
579  release_update_bits_for_entry.addr := io.release_update.bits.addr
580  release_update_bits_for_entry.mask_delayed := RegEnable(io.release_update.bits.mask, io.release_update.valid)
581  release_update_bits_for_entry.data_delayed := RegEnable(io.release_update.bits.data, io.release_update.valid)
582  release_update_bits_for_entry.mask_orr := io.release_update.bits.mask.orR
583
584  // delay data write in writeback req for 1 cycle
585  val req_data = RegEnable(io.req.bits.toWritebackReqData(), io.req.valid)
586
587  require(isPow2(cfg.nMissEntries))
588  val grant_source = io.mem_grant.bits.source
589  val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge)))
590  entries.zipWithIndex.foreach {
591    case (entry, i) =>
592      val former_primary_ready = if(i == 0)
593        false.B
594      else
595        Cat((0 until i).map(j => entries(j).io.primary_ready)).orR
596      val entry_id = (i + releaseIdBase).U
597
598      entry.io.id := entry_id
599
600      // entry req
601      entry.io.req.valid := io.req.valid
602      primary_ready_vec(i)   := entry.io.primary_ready
603      secondary_ready_vec(i) := entry.io.secondary_ready
604      entry.io.req.bits  := io.req.bits
605      entry.io.req_data  := req_data
606
607      entry.io.primary_valid := alloc &&
608        !former_primary_ready &&
609        entry.io.primary_ready
610      entry.io.secondary_valid := io.req.valid && accept
611
612      entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
613      entry.io.mem_grant.bits  := io.mem_grant.bits
614
615      entry.io.release_wakeup := io.release_wakeup
616      entry.io.release_update.valid := io.release_update.valid
617      entry.io.release_update.bits := release_update_bits_for_entry // data write delayed
618
619      entry.io.probe_ttob_check_req := io.probe_ttob_check_req
620  }
621
622  io.req_ready_dup.zipWithIndex.foreach { case (rdy, i) =>
623    rdy := Cat(entries.map(_.io.primary_ready_dup(i))).orR
624  }
625
626  io.probe_ttob_check_resp.valid := RegNext(io.probe_ttob_check_req.valid) // for debug only
627  io.probe_ttob_check_resp.bits.toN := VecInit(entries.map(e => e.io.probe_ttob_check_resp.bits.toN)).asUInt.orR
628
629  assert(RegNext(!(io.mem_grant.valid && !io.mem_grant.ready)))
630  io.mem_grant.ready := true.B
631
632  val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
633  io.block_miss_req := io.miss_req.valid && miss_req_conflict
634
635  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
636
637  // sanity check
638  // print all input/output requests for debug purpose
639  // print req
640  when (io.req.fire) {
641    io.req.bits.dump()
642  }
643
644  when (io.mem_release.fire) {
645    io.mem_release.bits.dump
646  }
647
648  when (io.mem_grant.fire) {
649    io.mem_grant.bits.dump
650  }
651
652  when (io.miss_req.valid) {
653    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
654  }
655
656  when (io.block_miss_req) {
657    XSDebug("block_miss_req\n")
658  }
659
660  // performance counters
661  XSPerfAccumulate("wb_req", io.req.fire)
662
663  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
664  val perfEvents = Seq(
665    ("dcache_wbq_req      ", io.req.fire),
666    ("dcache_wbq_1_4_valid", (perfValidCount < (cfg.nReleaseEntries.U/4.U))),
667    ("dcache_wbq_2_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/4.U)) & (perfValidCount <= (cfg.nReleaseEntries.U/2.U))),
668    ("dcache_wbq_3_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/2.U)) & (perfValidCount <= (cfg.nReleaseEntries.U*3.U/4.U))),
669    ("dcache_wbq_4_4_valid", (perfValidCount > (cfg.nReleaseEntries.U*3.U/4.U))),
670  )
671  generatePerfEvent()
672}
673