xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/WritebackQueue.scala (revision 4daa5bf3c3f27e7fd090866d52405b21e107eb8d)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chisel3._
20import chisel3.util._
21import freechips.rocketchip.tilelink.TLPermissions._
22import freechips.rocketchip.tilelink.{TLArbiter, TLBundleC, TLBundleD, TLEdgeOut}
23import org.chipsalliance.cde.config.Parameters
24import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate}
25
26class WritebackReqCtrl(implicit p: Parameters) extends DCacheBundle {
27  val param  = UInt(cWidth.W)
28  val voluntary = Bool()
29  val hasData = Bool()
30  val dirty = Bool()
31
32  val delay_release = Bool()
33  val miss_id = UInt(log2Up(cfg.nMissEntries).W)
34}
35
36class WritebackReqWodata(implicit p: Parameters) extends WritebackReqCtrl {
37  val addr = UInt(PAddrBits.W)
38
39  def dump() = {
40    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b\n",
41      addr, param, voluntary, hasData)
42  }
43}
44
45class WritebackReqData(implicit p: Parameters) extends DCacheBundle {
46  val data = UInt((cfg.blockBytes * 8).W)
47}
48
49class WritebackReq(implicit p: Parameters) extends WritebackReqWodata {
50  val data = UInt((cfg.blockBytes * 8).W)
51
52  override def dump() = {
53    XSDebug("WritebackReq addr: %x param: %d voluntary: %b hasData: %b data: %x\n",
54      addr, param, voluntary, hasData, data)
55  }
56
57  def toWritebackReqWodata(): WritebackReqWodata = {
58    val out = Wire(new WritebackReqWodata)
59    out.addr := addr
60    out.param := param
61    out.voluntary := voluntary
62    out.hasData := hasData
63    out.dirty := dirty
64    out.delay_release := delay_release
65    out.miss_id := miss_id
66    out
67  }
68
69  def toWritebackReqCtrl(): WritebackReqCtrl = {
70    val out = Wire(new WritebackReqCtrl)
71    out.param := param
72    out.voluntary := voluntary
73    out.hasData := hasData
74    out.dirty := dirty
75    out.delay_release := delay_release
76    out.miss_id := miss_id
77    out
78  }
79
80  def toWritebackReqData(): WritebackReqData = {
81    val out = Wire(new WritebackReqData)
82    out.data := data
83    out
84  }
85}
86
87// While a Release sleeps and waits for a refill to wake it up,
88// main pipe might update meta & data during this time.
89// So the meta & data to be released need to be updated too.
90class ReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
91  // only consider store here
92  val addr = UInt(PAddrBits.W)
93  val mask = UInt(DCacheBanks.W)
94  val data = UInt((cfg.blockBytes * 8).W)
95}
96
97// To reduce fanout, writeback queue entry data is updated 1 cycle
98// after ReleaseUpdate.fire
99class WBQEntryReleaseUpdate(implicit p: Parameters) extends DCacheBundle {
100  // only consider store here
101  val addr = UInt(PAddrBits.W)
102  val mask_delayed = UInt(DCacheBanks.W)
103  val data_delayed = UInt((cfg.blockBytes * 8).W)
104  val mask_orr = Bool()
105}
106
107// When a probe TtoB req enter dcache main pipe, check if that cacheline
108// is waiting for release. If it is so, change TtoB to TtoN, set dcache
109// coh to N.
110class ProbeToBCheckReq(implicit p: Parameters) extends DCacheBundle {
111  val addr = UInt(PAddrBits.W) // paddr from mainpipe s1
112}
113
114class ProbeToBCheckResp(implicit p: Parameters) extends DCacheBundle {
115  val toN = Bool() // need to set dcache coh to N
116}
117
118class WritebackEntry(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
119{
120  val io = IO(new Bundle {
121    val id = Input(UInt())
122
123    val req = Flipped(DecoupledIO(new WritebackReqWodata))
124    val req_data = Input(new WritebackReqData)
125
126    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
127    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
128    val primary_valid = Input(Bool())
129    val primary_ready = Output(Bool())
130    val primary_ready_dup = Vec(nDupWbReady, Output(Bool()))
131
132    val block_addr  = Output(Valid(UInt()))
133  })
134
135  val s_invalid :: s_release_req :: s_release_resp ::Nil = Enum(3)
136  // ProbeAck:               s_invalid ->            s_release_req
137  // ProbeAck merge Release: s_invalid ->            s_release_req
138  // Release:                s_invalid -> s_sleep -> s_release_req -> s_release_resp
139  // Release merge ProbeAck: s_invalid -> s_sleep -> s_release_req
140  //                        (change Release into ProbeAck when Release is not fired)
141  //                     or: s_invalid -> s_sleep -> s_release_req -> s_release_resp -> s_release_req
142  //                        (send a ProbeAck after Release transaction is over)
143
144  val state = RegInit(s_invalid)
145  val state_dup_0 = RegInit(s_invalid)
146  val state_dup_1 = RegInit(s_invalid)
147  val state_dup_for_mp = RegInit(VecInit(Seq.fill(nDupWbReady)(s_invalid)))
148
149  val remain = RegInit(0.U(refillCycles.W))
150  val remain_dup_0 = RegInit(0.U(refillCycles.W))
151  val remain_dup_1 = RegInit(0.U(refillCycles.W))
152  val remain_set = WireInit(0.U(refillCycles.W))
153  val remain_clr = WireInit(0.U(refillCycles.W))
154  remain := (remain | remain_set) & ~remain_clr
155  remain_dup_0 := (remain_dup_0 | remain_set) & ~remain_clr
156  remain_dup_1 := (remain_dup_1 | remain_set) & ~remain_clr
157
158  // writeback queue data
159  val data = Reg(UInt((cfg.blockBytes * 8).W))
160
161  // writeback queue paddr
162  val paddr_dup_0 = Reg(UInt(PAddrBits.W))
163  val paddr_dup_1 = Reg(UInt(PAddrBits.W))
164  val paddr_dup_2 = Reg(UInt(PAddrBits.W))
165
166  // pending data write
167  // !s_data_override means there is an in-progress data write
168  val s_data_override = RegInit(true.B)
169  // !s_data_merge means there is an in-progress data merge
170  //val s_data_merge = RegInit(true.B)
171
172  // there are valid request that can be sent to release bus
173  //val busy = remain.orR && s_data_override && s_data_merge // have remain beats and data write finished
174  val busy = remain.orR && s_data_override  // have remain beats and data write finished
175  val req = Reg(new WritebackReqWodata)
176
177  // assign default signals to output signals
178  io.req.ready := false.B
179  io.mem_release.valid := false.B
180  io.mem_release.bits  := DontCare
181  io.mem_grant.ready   := false.B
182  io.block_addr.valid  := state =/= s_invalid
183  io.block_addr.bits   := req.addr
184
185  s_data_override := true.B // data_override takes only 1 cycle
186  //s_data_merge := true.B // data_merge takes only 1 cycle
187
188  when (state =/= s_invalid) {
189    XSDebug("WritebackEntry: %d state: %d block_addr: %x\n", io.id, state, io.block_addr.bits)
190  }
191
192
193  // --------------------------------------------------------------------------------
194  // s_invalid: receive requests
195  // new req entering
196  io.req.ready := state === s_invalid
197  val alloc = io.req.valid && io.primary_valid && io.primary_ready
198  when (alloc) {
199    assert (remain === 0.U)
200    req := io.req.bits
201    s_data_override := false.B
202    // only update paddr when allocate a new missqueue entry
203    paddr_dup_0 := io.req.bits.addr
204    paddr_dup_1 := io.req.bits.addr
205    paddr_dup_2 := io.req.bits.addr
206
207    remain_set := Mux(io.req.bits.hasData, ~0.U(refillCycles.W), 1.U(refillCycles.W))
208    state      := s_release_req
209    state_dup_0 := s_release_req
210    state_dup_1 := s_release_req
211    state_dup_for_mp.foreach(_ := s_release_req)
212  }
213
214  // --------------------------------------------------------------------------------
215  // while there beats remaining to be sent, we keep sending
216  // which beat to send in this cycle?
217  val beat = PriorityEncoder(remain_dup_0)
218
219  val beat_data = Wire(Vec(refillCycles, UInt(beatBits.W)))
220  for (i <- 0 until refillCycles) {
221    beat_data(i) := data((i + 1) * beatBits - 1, i * beatBits)
222  }
223
224  val probeResponse = edge.ProbeAck(
225    fromSource = io.id,
226    toAddress = paddr_dup_1,
227    lgSize = log2Ceil(cfg.blockBytes).U,
228    reportPermissions = req.param
229  )
230
231  val probeResponseData = edge.ProbeAck(
232    fromSource = io.id,
233    toAddress = paddr_dup_1,
234    lgSize = log2Ceil(cfg.blockBytes).U,
235    reportPermissions = req.param,
236    data = beat_data(beat)
237  )
238
239  val voluntaryRelease = edge.Release(
240    fromSource = io.id,
241    toAddress = paddr_dup_2,
242    lgSize = log2Ceil(cfg.blockBytes).U,
243    shrinkPermissions = req.param
244  )._2
245
246  val voluntaryReleaseData = edge.Release(
247    fromSource = io.id,
248    toAddress = paddr_dup_2,
249    lgSize = log2Ceil(cfg.blockBytes).U,
250    shrinkPermissions = req.param,
251    data = beat_data(beat)
252  )._2
253
254  // voluntaryReleaseData.echo.lift(DirtyKey).foreach(_ := req.dirty)
255  when(busy) {
256    assert(!req.dirty || req.hasData)
257  }
258
259  io.mem_release.valid := busy
260  io.mem_release.bits  := Mux(req.voluntary,
261    Mux(req.hasData, voluntaryReleaseData, voluntaryRelease),
262    Mux(req.hasData, probeResponseData, probeResponse))
263
264
265  when (io.mem_release.fire) {remain_clr := PriorityEncoderOH(remain_dup_1)}
266
267  val (_, _, release_done, _) = edge.count(io.mem_release)
268
269  when(state === s_release_req && release_done){
270    state := Mux(req.voluntary, s_release_resp, s_invalid)
271    when(req.voluntary){
272      state_dup_for_mp.foreach(_ := s_release_resp)
273    } .otherwise{
274      state_dup_for_mp.foreach(_ := s_invalid)
275    }
276  }
277
278  io.primary_ready := state === s_invalid
279  io.primary_ready_dup.zip(state_dup_for_mp).foreach { case (rdy, st) => rdy := st === s_invalid }
280  // --------------------------------------------------------------------------------
281  // receive ReleaseAck for Releases
282  when (state === s_release_resp) {
283    io.mem_grant.ready := true.B
284    when (io.mem_grant.fire) {
285      state := s_invalid
286      state_dup_for_mp.foreach(_ := s_invalid)
287    }
288  }
289
290  // data update logic
291  when(!s_data_override && (req.hasData || RegNext(alloc))) {
292    data := io.req_data.data
293  }
294
295  // assert(!RegNext(!s_data_merge && !s_data_override))
296
297  // performance counters
298  XSPerfAccumulate("wb_req", io.req.fire)
299  XSPerfAccumulate("wb_release", state === s_release_req && release_done && req.voluntary)
300  XSPerfAccumulate("wb_probe_resp", state === s_release_req && release_done && !req.voluntary)
301  XSPerfAccumulate("penalty_blocked_by_channel_C", io.mem_release.valid && !io.mem_release.ready)
302  XSPerfAccumulate("penalty_waiting_for_channel_D", io.mem_grant.ready && !io.mem_grant.valid && state === s_release_resp)
303}
304
305class WritebackQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents
306{
307  val io = IO(new Bundle {
308    val req = Flipped(DecoupledIO(new WritebackReq))
309    val req_ready_dup = Vec(nDupWbReady, Output(Bool()))
310    val mem_release = DecoupledIO(new TLBundleC(edge.bundle))
311    val mem_grant = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
312
313    //val probe_ttob_check_req = Flipped(ValidIO(new ProbeToBCheckReq))
314    //val probe_ttob_check_resp = ValidIO(new ProbeToBCheckResp)
315
316    val miss_req = Flipped(Valid(UInt()))
317    val block_miss_req = Output(Bool())
318  })
319
320  require(cfg.nReleaseEntries > cfg.nMissEntries)
321
322  val primary_ready_vec = Wire(Vec(cfg.nReleaseEntries, Bool()))
323  val alloc = Cat(primary_ready_vec).orR
324
325  val req = io.req
326  val block_conflict = Wire(Bool())
327
328  req.ready := alloc && !block_conflict
329
330  // assign default values to output signals
331  io.mem_release.valid := false.B
332  io.mem_release.bits  := DontCare
333  io.mem_grant.ready   := false.B
334
335  // delay data write in writeback req for 1 cycle
336  val req_data = RegEnable(io.req.bits.toWritebackReqData(), io.req.valid)
337
338  require(isPow2(cfg.nMissEntries))
339  val grant_source = io.mem_grant.bits.source
340  val entries = Seq.fill(cfg.nReleaseEntries)(Module(new WritebackEntry(edge)))
341  entries.zipWithIndex.foreach {
342    case (entry, i) =>
343      val former_primary_ready = if(i == 0)
344        false.B
345      else
346        Cat((0 until i).map(j => entries(j).io.primary_ready)).orR
347      val entry_id = (i + releaseIdBase).U
348
349      entry.io.id := entry_id
350
351      // entry req
352      entry.io.req.valid := req.valid && !block_conflict
353      primary_ready_vec(i)   := entry.io.primary_ready
354      entry.io.req.bits  := req.bits
355      entry.io.req_data  := req_data
356
357      entry.io.primary_valid := alloc &&
358        !former_primary_ready &&
359        entry.io.primary_ready
360
361      entry.io.mem_grant.valid := (entry_id === grant_source) && io.mem_grant.valid
362      entry.io.mem_grant.bits  := io.mem_grant.bits
363      //when (i.U === io.mem_grant.bits.source) {
364      //  io.mem_grant.ready := entry.io.mem_grant.ready
365      //}
366  }
367
368  io.req_ready_dup.zipWithIndex.foreach { case (rdy, i) =>
369    rdy := Cat(entries.map(_.io.primary_ready_dup(i))).orR && !block_conflict
370  }
371
372  io.mem_grant.ready := true.B
373  block_conflict := VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.req.bits.addr)).asUInt.orR
374  val miss_req_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.miss_req.bits)).asUInt.orR
375  io.block_miss_req := io.miss_req.valid && miss_req_conflict
376
377  TLArbiter.robin(edge, io.mem_release, entries.map(_.io.mem_release):_*)
378
379  // sanity check
380  // print all input/output requests for debug purpose
381  // print req
382  when(io.req.fire) {
383    io.req.bits.dump()
384  }
385
386  when(io.mem_release.fire){
387    io.mem_grant.bits.dump
388  }
389
390  when (io.miss_req.valid) {
391    XSDebug("miss_req: addr: %x\n", io.miss_req.bits)
392  }
393
394  when (io.block_miss_req) {
395    XSDebug("block_miss_req\n")
396  }
397
398  // performance counters
399  XSPerfAccumulate("wb_req", io.req.fire)
400
401  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
402  val perfEvents = Seq(
403    ("dcache_wbq_req      ", io.req.fire),
404    ("dcache_wbq_1_4_valid", (perfValidCount < (cfg.nReleaseEntries.U/4.U))),
405    ("dcache_wbq_2_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/4.U)) & (perfValidCount <= (cfg.nReleaseEntries.U/2.U))),
406    ("dcache_wbq_3_4_valid", (perfValidCount > (cfg.nReleaseEntries.U/2.U)) & (perfValidCount <= (cfg.nReleaseEntries.U*3.U/4.U))),
407    ("dcache_wbq_4_4_valid", (perfValidCount > (cfg.nReleaseEntries.U*3.U/4.U))),
408  )
409  generatePerfEvent()
410
411}