xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision bb2f3f51dd67f6e16e0cc1ffe43368c9fc7e4aef)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chisel3._
20import chisel3.util._
21import freechips.rocketchip.tilelink.TLPermissions._
22import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut}
23import org.chipsalliance.cde.config.Parameters
24import utils.{HasPerfEvents, HasTLDump}
25import utility.{XSDebug, XSPerfAccumulate}
26
27
28class WritebackReqCtrl(implicit p: Parameters) extends DCacheBundle {
29  val param  = UInt(cWidth.W)
30  val voluntary = Bool()
31  val hasData = Bool()
32  val dirty = Bool()
33
34  val delay_release = Bool()
35  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
36}
37
38class WritebackReqWodata(implicit p: Parameters) extends WritebackReqCtrl {
39  val addr = UInt(PAddrBits.W)
40
41  def dump() = {
42    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b\n",
43      addr, param, voluntary, hasData)
44  }
45}
46
47class WritebackReqData(implicit p: Parameters) extends DCacheBundle {
48  val data = UInt((cfg.blockBytes * 8).W)
49}
50
51class WritebackReq(implicit p: Parameters) extends WritebackReqWodata {
52  val data = UInt((cfg.blockBytes * 8).W)
53
54  override def dump() = {
55    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
56      addr, param, voluntary, hasData, data)
57  }
58
59  def toWritebackReqWodata(): WritebackReqWodata = {
60    val out = Wire(new WritebackReqWodata)
61    out.addr := addr
62    out.param := param
63    out.voluntary := voluntary
64    out.hasData := hasData
65    out.dirty := dirty
66    out.delay_release := delay_release
67    out.miss_id := miss_id
68    out
69  }
70
71  def toWritebackReqCtrl(): WritebackReqCtrl = {
72    val out = Wire(new WritebackReqCtrl)
73    out.param := param
74    out.voluntary := voluntary
75    out.hasData := hasData
76    out.dirty := dirty
77    out.delay_release := delay_release
78    out.miss_id := miss_id
79    out
80  }
81
82  def toWritebackReqData(): WritebackReqData = {
83    val out = Wire(new WritebackReqData)
84    out.data := data
85    out
86  }
87}
88
89// While a Release sleeps and waits for a refill to wake it up,
90// main pipe might update meta & data during this time.
91// So the meta & data to be released need to be updated too.
92class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
93  // only consider store here
94  val addr = UInt(PAddrBits.W)
95  val mask = UInt(DCacheBanks.W)
96  val data = UInt((cfg.blockBytes * 8).W)
97}
98
99// To reduce fanout, writeback queue entry data is updated 1 cycle
100// after ReleaseUpdate.fire
101class WBQEntryReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
102  // only consider store here
103  val addr = UInt(PAddrBits.W)
104  val mask_delayed = UInt(DCacheBanks.W)
105  val data_delayed = UInt((cfg.blockBytes * 8).W)
106  val mask_orr = Bool()
107}
108
109// When a probe TtoB req enter dcache main pipe, check if that cacheline
110// is waiting for release. If it is so, change TtoB to TtoN, set dcache
111// coh to N.
112class ProbeToBCheckReq(implicit p: Parameters) extends DCacheBundle {
113  val addr = UInt(PAddrBits.W) // paddr from mainpipe s1
114}
115
116class ProbeToBCheckResp(implicit p: Parameters) extends DCacheBundle {
117  val toN = Bool() // need to set dcache coh to N
118}
119
120class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
121{
122  val io = IO(new Bundle {
123    val id = Input(UInt())
124
125    val req = Flipped(DecoupledIO(new WritebackReqWodata))
126    val req_data = Input(new WritebackReqData)
127
128    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
129    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
130    val primary_valid = Input(Bool())
131    val primary_ready = Output(Bool())
132    val primary_ready_dup = Vec(nDupWbReady, Output(Bool()))
133
134    val block_addr  = Output(Valid(UInt()))
135  })
136
137  val s_invalid :: s_release_req :: s_release_resp ::Nil = Enum(3)
138  // ProbeAck:               s_invalid ->            s_release_req
139  // ProbeAck merge Release: s_invalid ->            s_release_req
140  // Release:                s_invalid -> s_sleep -> s_release_req -> s_release_resp
141  // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req
142  //                        (change Release into ProbeAck when Release is not fired)
143  //                     or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req
144  //                        (send a ProbeAck after Release transaction is over)
145
146  val state = RegInit(s_invalid)
147  val state_dup_0 = RegInit(s_invalid)
148  val state_dup_1 = RegInit(s_invalid)
149  val state_dup_for_mp = RegInit(VecInit(Seq.fill(nDupWbReady)(s_invalid))) //TODO: clock gate
150
151  val remain = RegInit(0.U(refillCycles.W))
152  val remain_dup_0 = RegInit(0.U(refillCycles.W))
153  val remain_dup_1 = RegInit(0.U(refillCycles.W))
154  val remain_set = WireInit(0.U(refillCycles.W))
155  val remain_clr = WireInit(0.U(refillCycles.W))
156  remain := (remain | remain_set) & ~remain_clr
157  remain_dup_0 := (remain_dup_0 | remain_set) & ~remain_clr
158  remain_dup_1 := (remain_dup_1 | remain_set) & ~remain_clr
159
160  // writeback queue data
161  val data = Reg(UInt((cfg.blockBytes * 8).W))
162
163  // writeback queue paddr
164  val paddr_dup_0 = Reg(UInt(PAddrBits.W))
165  val paddr_dup_1 = Reg(UInt(PAddrBits.W))
166  val paddr_dup_2 = Reg(UInt(PAddrBits.W))
167
168  // pending data write
169  // !s_data_override means there is an in-progress data write
170  val s_data_override = RegInit(true.B)
171  // !s_data_merge means there is an in-progress data merge
172  //val s_data_merge = RegInit(true.B)
173
174  // there are valid request that can be sent to release bus
175  //val busy = remain.orR && s_data_override && s_data_merge // have remain beats and data write finished
176  val busy = remain.orR && s_data_override  // have remain beats and data write finished
177  val req = Reg(new WritebackReqWodata)
178
179  // assign default signals to output signals
180  io.req.ready := false.B
181  io.mem_release.valid := false.B
182  io.mem_release.bits  := DontCare
183  io.mem_grant.ready   := false.B
184  io.block_addr.valid  := state =/= s_invalid
185  io.block_addr.bits   := req.addr
186
187  s_data_override := true.B // data_override takes only 1 cycle
188  //s_data_merge := true.B // data_merge takes only 1 cycle
189
190  when (state =/= s_invalid) {
191    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
192  }
193
194
195  // --------------------------------------------------------------------------------
196  // s_invalid: receive requests
197  // new req entering
198  io.req.ready := state === s_invalid
199  val alloc = io.req.valid && io.primary_valid && io.primary_ready
200  when (alloc) {
201    assert (remain === 0.U)
202    req := io.req.bits
203    s_data_override := false.B
204    // only update paddr when allocate a new missqueue entry
205    paddr_dup_0 := io.req.bits.addr
206    paddr_dup_1 := io.req.bits.addr
207    paddr_dup_2 := io.req.bits.addr
208
209    remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
210    state      := s_release_req
211    state_dup_0 := s_release_req
212    state_dup_1 := s_release_req
213    state_dup_for_mp.foreach(_ := s_release_req)
214  }
215
216  // --------------------------------------------------------------------------------
217  // while there beats remaining to be sent, we keep sending
218  // which beat to send in this cycle?
219  val beat = PriorityEncoder(remain_dup_0)
220
221  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
222  for (i <- 0 until refillCycles) {
223    beat_data(i) := data((i + 1) * beatBits - 1, i * beatBits)
224  }
225
226  val probeResponse = edge.ProbeAck(
227    fromSource = io.id,
228    toAddress = paddr_dup_1,
229    lgSize = log2Ceil(cfg.blockBytes).U,
230    reportPermissions = req.param
231  )
232
233  val probeResponseData = edge.ProbeAck(
234    fromSource = io.id,
235    toAddress = paddr_dup_1,
236    lgSize = log2Ceil(cfg.blockBytes).U,
237    reportPermissions = req.param,
238    data = beat_data(beat)
239  )
240
241  val voluntaryRelease = edge.Release(
242    fromSource = io.id,
243    toAddress = paddr_dup_2,
244    lgSize = log2Ceil(cfg.blockBytes).U,
245    shrinkPermissions = req.param
246  )._2
247
248  val voluntaryReleaseData = edge.Release(
249    fromSource = io.id,
250    toAddress = paddr_dup_2,
251    lgSize = log2Ceil(cfg.blockBytes).U,
252    shrinkPermissions = req.param,
253    data = beat_data(beat)
254  )._2
255
256  // voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
257  when(busy) {
258    assert(!req.dirty || req.hasData)
259  }
260
261  io.mem_release.valid := busy
262  io.mem_release.bits  := Mux(req.voluntary,
263    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
264    Mux(req.hasData, probeResponseData, probeResponse))
265
266
267  when (io.mem_release.fire) {remain_clr := PriorityEncoderOH(remain_dup_1)}
268
269  val (_, _, release_done, _) = edge.count(io.mem_release)
270
271  when(state === s_release_req && release_done){
272    state := Mux(req.voluntary, s_release_resp, s_invalid)
273    when(req.voluntary){
274      state_dup_for_mp.foreach(_ := s_release_resp)
275    } .otherwise{
276      state_dup_for_mp.foreach(_ := s_invalid)
277    }
278  }
279
280  io.primary_ready := state === s_invalid
281  io.primary_ready_dup.zip(state_dup_for_mp).foreach { case (rdy, st) => rdy := st === s_invalid }
282  // --------------------------------------------------------------------------------
283  // receive ReleaseAck for Releases
284  when (state === s_release_resp) {
285    io.mem_grant.ready := true.B
286    when (io.mem_grant.fire) {
287      state := s_invalid
288      state_dup_for_mp.foreach(_ := s_invalid)
289    }
290  }
291
292  // data update logic
293  when(!s_data_override && (req.hasData || RegNext(alloc))) {
294    data := io.req_data.data
295  }
296
297  // assert(!RegNext(!s_data_merge && !s_data_override))
298
299  // performance counters
300  XSPerfAccumulate("wb_req", io.req.fire)
301  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
302  XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary)
303  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
304  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp)
305}
306
307class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents
308{
309  val io = IO(new Bundle {
310    val req = Flipped(DecoupledIO(new WritebackReq))
311    val req_ready_dup = Vec(nDupWbReady, Output(Bool()))
312    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
313    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
314
315    //val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq))
316    //val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp)
317
318    val miss_req = Flipped(Valid(UInt()))
319    val block_miss_req = Output(Bool())
320  })
321
322  require(cfg.nReleaseEntries > cfg.nMissEntries)
323
324  val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
325  val alloc = Cat(primary_ready_vec).orR
326
327  val req = io.req
328  val block_conflict = Wire(Bool())
329
330  req.ready := alloc && !block_conflict
331
332  // assign default values to output signals
333  io.mem_release.valid := false.B
334  io.mem_release.bits  := DontCare
335  io.mem_grant.ready   := false.B
336
337  // delay data write in writeback req for 1 cycle
338  val req_data = RegEnable(io.req.bits.toWritebackReqData(), io.req.valid)
339
340  require(isPow2(cfg.nMissEntries))
341  val grant_source = io.mem_grant.bits.source
342  val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge)))
343  entries.zipWithIndex.foreach {
344    case (entry, i) =>
345      val former_primary_ready = if(i == 0)
346        false.B
347      else
348        Cat((0 until i).map(j => entries(j).io.primary_ready)).orR
349      val entry_id = (i + releaseIdBase).U
350
351      entry.io.id := entry_id
352
353      // entry req
354      entry.io.req.valid := req.valid && !block_conflict
355      primary_ready_vec(i)   := entry.io.primary_ready
356      entry.io.req.bits  := req.bits
357      entry.io.req_data  := req_data
358
359      entry.io.primary_valid := alloc &&
360        !former_primary_ready &&
361        entry.io.primary_ready
362
363      entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
364      entry.io.mem_grant.bits  := io.mem_grant.bits
365      //when (i.U === io.mem_grant.bits.source) {
366      //  io.mem_grant.ready := entry.io.mem_grant.ready
367      //}
368  }
369
370  io.req_ready_dup.zipWithIndex.foreach { case (rdy, i) =>
371    rdy := Cat(entries.map(_.io.primary_ready_dup(i))).orR && !block_conflict
372  }
373
374  io.mem_grant.ready := true.B
375  block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.req.bits.addr)).asUInt.orR
376  val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
377  io.block_miss_req := io.miss_req.valid && miss_req_conflict
378
379  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
380
381  // sanity check
382  // print all input/output requests for debug purpose
383  // print req
384  when(io.req.fire) {
385    io.req.bits.dump()
386  }
387
388  when(io.mem_release.fire){
389    io.mem_grant.bits.dump
390  }
391
392  when (io.miss_req.valid) {
393    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
394  }
395
396  when (io.block_miss_req) {
397    XSDebug("block_miss_req\n")
398  }
399
400  // performance counters
401  XSPerfAccumulate("wb_req", io.req.fire)
402
403  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
404  val perfEvents = Seq(
405    ("dcache_wbq_req      ", io.req.fire),
406    ("dcache_wbq_1_4_valid", (perfValidCount < (cfg.nReleaseEntries.U/4.U))),
407    ("dcache_wbq_2_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/4.U)) & (perfValidCount <= (cfg.nReleaseEntries.U/2.U))),
408    ("dcache_wbq_3_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/2.U)) & (perfValidCount <= (cfg.nReleaseEntries.U*3.U/4.U))),
409    ("dcache_wbq_4_4_valid", (perfValidCount > (cfg.nReleaseEntries.U*3.U/4.U))),
410  )
411  generatePerfEvent()
412
413}