xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/Uncache.scala (revision cfdd605febd2f8dbeb92a2bc1431887d8627bc48)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chisel3._
20import chisel3.util._
21import org.chipsalliance.cde.config.Parameters
22import utils._
23import utility._
24import xiangshan._
25import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
26import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
27
28class UncacheFlushBundle extends Bundle {
29  val valid = Output(Bool())
30  val empty = Input(Bool())
31}
32
33class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
34  val cmd = UInt(M_SZ.W)
35  val addr = UInt(PAddrBits.W)
36  val data = UInt(XLEN.W)
37  val mask = UInt((XLEN/8).W)
38  val id = UInt(uncacheIdxBits.W)
39  val nc = Bool()
40  val atomic = Bool()
41
42  // FIXME lyq: data and resp_data can be merged?
43  val resp_data = UInt(XLEN.W)
44  val resp_nderr = Bool()
45
46  def set(x: UncacheWordReq): Unit = {
47    cmd := x.cmd
48    addr := x.addr
49    data := x.data
50    mask := x.mask
51    id := x.id
52    nc := x.nc
53    atomic := x.atomic
54    resp_nderr := false.B
55    resp_data := 0.U
56  }
57
58  def update(x: TLBundleD): Unit = {
59    resp_data := x.data
60    resp_nderr := x.denied
61  }
62
63  def toUncacheWordResp(): UncacheWordResp = {
64    val r = Wire(new UncacheWordResp)
65    r := DontCare
66    r.data := resp_data
67    r.id := id
68    r.nderr := resp_nderr
69    r.nc := nc
70    r.is2lq := cmd === MemoryOpConstants.M_XRD
71    r.miss := false.B
72    r.replay := false.B
73    r.tag_error := false.B
74    r.error := false.B
75    r
76  }
77}
78
79class UncacheEntryState(implicit p: Parameters) extends DCacheBundle {
80  // FIXME lyq: state is multi bools or UInt()?
81  // valid (-> waitSame) -> inflight -> waitReturn
82  val valid = Bool()
83  val inflight = Bool() // uncache -> L2
84  val waitSame = Bool()
85  val waitReturn = Bool() // uncache -> LSQ
86
87  def init: Unit = {
88    valid := false.B
89    inflight := false.B
90    waitSame := false.B
91    waitReturn := false.B
92  }
93
94  def isValid(): Bool = valid
95  def isInflight(): Bool = inflight
96  def isWaitReturn(): Bool = waitReturn
97  def isWaitSame(): Bool = waitSame
98  def can2Uncache(): Bool = valid && !inflight && !waitSame && !waitReturn
99  def can2Lsq(): Bool = valid && waitReturn
100
101  def setValid(x: Bool): Unit = { valid := x}
102  def setInflight(x: Bool): Unit = { inflight := x}
103  def setWaitReturn(x: Bool): Unit = { waitReturn := x }
104  def setWaitSame(x: Bool): Unit = { waitSame := x}
105
106  def updateUncacheResp(): Unit = {
107    assert(inflight, "The request was not sent and a response was received")
108    inflight := false.B
109    waitReturn := true.B
110  }
111  def updateReturn(): Unit = {
112    valid := false.B
113    inflight := false.B
114    waitSame := false.B
115    waitReturn := false.B
116  }
117}
118
119class UncacheIO(implicit p: Parameters) extends DCacheBundle {
120  val hartId = Input(UInt())
121  val enableOutstanding = Input(Bool())
122  val flush = Flipped(new UncacheFlushBundle)
123  val lsq = Flipped(new UncacheWordIO)
124}
125
126// convert DCacheIO to TileLink
127// for Now, we only deal with TL-UL
128
129class Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter {
130  override def shouldBeInlined: Boolean = false
131  def idRange: Int = UncacheBufferSize
132
133  val clientParameters = TLMasterPortParameters.v1(
134    clients = Seq(TLMasterParameters.v1(
135      "uncache",
136      sourceId = IdRange(0, idRange)
137    ))
138  )
139  val clientNode = TLClientNode(Seq(clientParameters))
140
141  lazy val module = new UncacheImp(this)
142}
143
144/* Uncache Buffer */
145class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
146  with HasTLDump
147  with HasXSParameter
148  with HasPerfEvents
149{
150  private val INDEX_WIDTH = log2Up(UncacheBufferSize)
151  println(s"Uncahe Buffer Size: $UncacheBufferSize entries")
152  val io = IO(new UncacheIO)
153
154  val (bus, edge) = outer.clientNode.out.head
155
156  val req  = io.lsq.req
157  val resp = io.lsq.resp
158  val mem_acquire = bus.a
159  val mem_grant   = bus.d
160  val req_ready = WireInit(false.B)
161
162  // assign default values to output signals
163  bus.b.ready := false.B
164  bus.c.valid := false.B
165  bus.c.bits  := DontCare
166  bus.d.ready := false.B
167  bus.e.valid := false.B
168  bus.e.bits  := DontCare
169  io.lsq.req.ready := req_ready
170  io.lsq.resp.valid := false.B
171  io.lsq.resp.bits := DontCare
172
173
174  /******************************************************************
175   * Data Structure
176   ******************************************************************/
177
178  val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry))
179  val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState))))
180  val fence = RegInit(Bool(), false.B)
181  val s_idle :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4)
182  val uState = RegInit(s_idle)
183
184  def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
185
186  val q0_entry = Wire(new UncacheEntry)
187  val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
188  val q0_canSent = Wire(Bool())
189  /******************************************************************
190   * uState for non-outstanding
191   ******************************************************************/
192
193  switch(uState){
194    is(s_idle){
195      when(req.fire){
196        uState := s_refill_req
197      }
198    }
199    is(s_refill_req){
200      when(mem_acquire.fire){
201        uState := s_refill_resp
202      }
203    }
204    is(s_refill_resp){
205      when(mem_grant.fire){
206        uState := s_send_resp
207      }
208    }
209    is(s_send_resp){
210      when(resp.fire){
211        uState := s_idle
212      }
213    }
214  }
215
216
217  /******************************************************************
218   * Enter Buffer
219   *  Version 0 (better timing)
220   *    e0 judge: alloc/merge write vec
221   *    e1 alloc
222   *
223   *  Version 1 (better performance)
224   *    solved in one cycle for achieving the original performance.
225   ******************************************************************/
226
227  /**
228    TODO lyq: how to merge
229    1. same addr
230    2. same cmd
231    3. valid
232    FIXME lyq: not merge now due to the following issues
233    1. load cann't be merged
234    2. how to merge store and response precisely
235  */
236
237  val e0_invalids = sizeMap(i => !states(i).isValid())
238  val e0_invalid_oh = VecInit(PriorityEncoderOH(e0_invalids)).asUInt
239  val e0_fire = req.fire
240  val e0_req = req.bits
241
242  req_ready := e0_invalid_oh.orR
243
244  for (i <- 0 until UncacheBufferSize) {
245    val alloc = e0_fire && e0_invalid_oh(i)
246    when(alloc){
247      entries(i).set(e0_req)
248      states(i).setValid(true.B)
249
250      // judge whether wait same block: e0 & q0
251      val waitSameVec = sizeMap(j =>
252        e0_req.addr === entries(j).addr && states(j).isValid() && states(j).isInflight()
253      )
254      val waitQ0 = e0_req.addr === q0_entry.addr && q0_canSent
255      when (waitSameVec.reduce(_ || _) || waitQ0) {
256        states(i).setWaitSame(true.B)
257      }
258    }
259  }
260
261
262  /******************************************************************
263   * Uncache Req
264   *  Version 0 (better timing)
265   *    q0: choose which one is sent
266   *    q0: sent
267   *
268   *  Version 1 (better performance)
269   *    solved in one cycle for achieving the original performance.
270   *    NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline,
271   *          because there is no guarantee that mem_aquire will be always ready.
272   ******************************************************************/
273
274  val q0_canSentVec = sizeMap(i =>
275    // (io.enableOutstanding || uState === s_refill_req) && // FIXME lyq: comment for debug
276    states(i).can2Uncache()
277  )
278  val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
279  q0_canSentIdx := q0_res._1
280  q0_canSent := q0_res._2
281  q0_entry := entries(q0_canSentIdx)
282
283  val size = PopCount(q0_entry.mask)
284  val (lgSize, legal) = PriorityMuxWithFlag(Seq(
285    1.U -> 0.U,
286    2.U -> 1.U,
287    4.U -> 2.U,
288    8.U -> 3.U
289  ).map(m => (size===m._1) -> m._2))
290  assert(!(q0_canSent && !legal))
291
292  val q0_load = edge.Get(
293    fromSource      = q0_canSentIdx,
294    toAddress       = q0_entry.addr,
295    lgSize          = lgSize
296  )._2
297
298  val q0_store = edge.Put(
299    fromSource      = q0_canSentIdx,
300    toAddress       = q0_entry.addr,
301    lgSize          = lgSize,
302    data            = q0_entry.data,
303    mask            = q0_entry.mask
304  )._2
305
306  val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR
307
308  mem_acquire.valid := q0_canSent
309  mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load)
310  when(mem_acquire.fire){
311    states(q0_canSentIdx).setInflight(true.B)
312
313    // q0 should judge whether wait same block
314    (0 until UncacheBufferSize).map(j =>
315      when(q0_entry.addr === entries(j).addr && states(j).isValid() && !states(j).isWaitReturn()){
316        states(j).setWaitSame(true.B)
317      }
318    )
319  }
320
321
322  /******************************************************************
323   * Uncache Resp
324   ******************************************************************/
325
326  val (_, _, refill_done, _) = edge.addr_inc(mem_grant)
327
328  mem_grant.ready := true.B
329  when (mem_grant.fire) {
330    val id = mem_grant.bits.source
331    entries(id).update(mem_grant.bits)
332    states(id).updateUncacheResp()
333    assert(refill_done, "Uncache response should be one beat only!")
334
335    // remove state of wait same block
336    (0 until UncacheBufferSize).map(j =>
337      when(entries(id).addr === entries(j).addr && states(j).isValid() && states(j).isWaitSame()){
338        states(j).setWaitSame(false.B)
339      }
340    )
341  }
342
343
344  /******************************************************************
345   * Return to LSQ
346   ******************************************************************/
347
348  val r0_canSentVec = sizeMap(i => states(i).can2Lsq())
349  val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec)
350  resp.valid := r0_canSent
351  resp.bits := entries(r0_canSentIdx).toUncacheWordResp()
352  when(resp.fire){
353    states(r0_canSentIdx).updateReturn()
354  }
355
356
357  /******************************************************************
358   * Buffer Flush
359   * // FIXME lyq: how to deal
360   * 1. when io.flush.valid is true
361   * 2. when io.lsq.req.bits.atomic is true
362   ******************************************************************/
363
364  val invalid_entries = PopCount(states.map(!_.isValid()))
365  io.flush.empty := invalid_entries === UncacheBufferSize.U
366
367
368  /******************************************************************
369   * Debug / Performance
370   ******************************************************************/
371
372  /* Debug Counters */
373  // print all input/output requests for debug purpose
374  // print req/resp
375  XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n",
376    req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask)
377  XSDebug(resp.fire, "data: %x\n", req.bits.data)
378  // print tilelink messages
379  when(mem_acquire.valid){
380    XSDebug("mem_acquire valid, ready=%d ", mem_acquire.ready)
381    mem_acquire.bits.dump
382  }
383  when (mem_grant.fire) {
384    XSDebug("mem_grant fire ")
385    mem_grant.bits.dump
386  }
387
388  /* Performance Counters */
389  def isStore: Bool = io.lsq.req.bits.cmd === MemoryOpConstants.M_XWR
390  XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore && !io.lsq.req.bits.nc)
391  XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore && !io.lsq.req.bits.nc)
392  XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore && io.lsq.req.bits.nc)
393  XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore && io.lsq.req.bits.nc)
394  XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
395
396  val perfEvents = Seq(
397    ("uncache_mmio_store", io.lsq.req.fire && isStore && !io.lsq.req.bits.nc),
398    ("uncache_mmio_load", io.lsq.req.fire && !isStore && !io.lsq.req.bits.nc),
399    ("uncache_nc_store", io.lsq.req.fire && isStore && io.lsq.req.bits.nc),
400    ("uncache_nc_load", io.lsq.req.fire && !isStore && io.lsq.req.bits.nc),
401    ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
402  )
403
404  generatePerfEvent()
405  //  End
406}
407