xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/Uncache.scala (revision e04c5f647e1e5251ae701f95f5b9bd4e0172caed)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chisel3._
20import chisel3.util._
21import org.chipsalliance.cde.config.Parameters
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.mem._
26import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
27import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
28
29class UncacheFlushBundle extends Bundle {
30  val valid = Output(Bool())
31  val empty = Input(Bool())
32}
33
34class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
35  val cmd = UInt(M_SZ.W)
36  val addr = UInt(PAddrBits.W)
37  val vaddr = UInt(VAddrBits.W)
38  val data = UInt(XLEN.W)
39  val mask = UInt(DataBytes.W)
40  val id = UInt(uncacheIdxBits.W)
41  val nc = Bool()
42  val atomic = Bool()
43
44  // FIXME lyq: data and resp_data can be merged?
45  val resp_data = UInt(XLEN.W)
46  val resp_nderr = Bool()
47
48  // FIXME lyq: Confirm the forward logic. if no forward, it can be removed
49  val fwd_data = UInt(XLEN.W)
50  val fwd_mask = UInt(DataBytes.W)
51
52  def set(x: UncacheWordReq): Unit = {
53    cmd := x.cmd
54    addr := x.addr
55    vaddr := x.vaddr
56    data := x.data
57    mask := x.mask
58    id := x.id
59    nc := x.nc
60    atomic := x.atomic
61    resp_nderr := false.B
62    resp_data := 0.U
63    fwd_data := 0.U
64    fwd_mask := 0.U
65  }
66
67  def update(x: TLBundleD): Unit = {
68    resp_data := x.data
69    resp_nderr := x.denied
70  }
71
72  def update(forwardData: UInt, forwardMask: UInt): Unit = {
73    fwd_data := forwardData
74    fwd_mask := forwardMask
75  }
76
77  def toUncacheWordResp(): UncacheWordResp = {
78    val resp_fwd_data = VecInit((0 until DataBytes).map(j =>
79      Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), resp_data(8*(j+1)-1, 8*j))
80    )).asUInt
81    val r = Wire(new UncacheWordResp)
82    r := DontCare
83    r.data := resp_fwd_data
84    r.id := id
85    r.nderr := resp_nderr
86    r.nc := nc
87    r.is2lq := cmd === MemoryOpConstants.M_XRD
88    r.miss := false.B
89    r.replay := false.B
90    r.tag_error := false.B
91    r.error := false.B
92    r
93  }
94}
95
96class UncacheEntryState(implicit p: Parameters) extends DCacheBundle {
97  // FIXME lyq: state is multi bools or UInt()?
98  // valid (-> waitSame) -> inflight -> waitReturn
99  val valid = Bool()
100  val inflight = Bool() // uncache -> L2
101  val waitSame = Bool()
102  val waitReturn = Bool() // uncache -> LSQ
103
104  def init: Unit = {
105    valid := false.B
106    inflight := false.B
107    waitSame := false.B
108    waitReturn := false.B
109  }
110
111  def isValid(): Bool = valid
112  def isInflight(): Bool = inflight
113  def isWaitReturn(): Bool = waitReturn
114  def isWaitSame(): Bool = waitSame
115  def can2Uncache(): Bool = valid && !inflight && !waitSame && !waitReturn
116  def can2Lsq(): Bool = valid && waitReturn
117
118  def setValid(x: Bool): Unit = { valid := x}
119  def setInflight(x: Bool): Unit = { inflight := x}
120  def setWaitReturn(x: Bool): Unit = { waitReturn := x }
121  def setWaitSame(x: Bool): Unit = { waitSame := x}
122
123  def updateUncacheResp(): Unit = {
124    assert(inflight, "The request was not sent and a response was received")
125    inflight := false.B
126    waitReturn := true.B
127  }
128  def updateReturn(): Unit = {
129    valid := false.B
130    inflight := false.B
131    waitSame := false.B
132    waitReturn := false.B
133  }
134}
135
136class UncacheIO(implicit p: Parameters) extends DCacheBundle {
137  val hartId = Input(UInt())
138  val enableOutstanding = Input(Bool())
139  val flush = Flipped(new UncacheFlushBundle)
140  val lsq = Flipped(new UncacheWordIO)
141  val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
142}
143
144// convert DCacheIO to TileLink
145// for Now, we only deal with TL-UL
146
147class Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter {
148  override def shouldBeInlined: Boolean = false
149  def idRange: Int = UncacheBufferSize
150
151  val clientParameters = TLMasterPortParameters.v1(
152    clients = Seq(TLMasterParameters.v1(
153      "uncache",
154      sourceId = IdRange(0, idRange)
155    ))
156  )
157  val clientNode = TLClientNode(Seq(clientParameters))
158
159  lazy val module = new UncacheImp(this)
160}
161
162/* Uncache Buffer */
163class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
164  with HasTLDump
165  with HasXSParameter
166  with HasPerfEvents
167{
168  private val INDEX_WIDTH = log2Up(UncacheBufferSize)
169  println(s"Uncahe Buffer Size: $UncacheBufferSize entries")
170  val io = IO(new UncacheIO)
171
172  val (bus, edge) = outer.clientNode.out.head
173
174  val req  = io.lsq.req
175  val resp = io.lsq.resp
176  val mem_acquire = bus.a
177  val mem_grant   = bus.d
178  val req_ready = WireInit(false.B)
179
180  // assign default values to output signals
181  bus.b.ready := false.B
182  bus.c.valid := false.B
183  bus.c.bits  := DontCare
184  bus.d.ready := false.B
185  bus.e.valid := false.B
186  bus.e.bits  := DontCare
187  io.lsq.req.ready := req_ready
188  io.lsq.resp.valid := false.B
189  io.lsq.resp.bits := DontCare
190
191
192  /******************************************************************
193   * Data Structure
194   ******************************************************************/
195
196  val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry))
197  val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState))))
198  val fence = RegInit(Bool(), false.B)
199  val s_idle :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4)
200  val uState = RegInit(s_idle)
201
202  def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
203  def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
204  def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
205
206  // drain buffer
207  val empty = Wire(Bool())
208  val f0_needDrain = Wire(Bool())
209  val do_uarch_drain = RegNext(f0_needDrain)
210
211  val q0_entry = Wire(new UncacheEntry)
212  val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
213  val q0_canSent = Wire(Bool())
214
215
216  /******************************************************************
217   * uState for non-outstanding
218   ******************************************************************/
219
220  switch(uState){
221    is(s_idle){
222      when(req.fire){
223        uState := s_refill_req
224      }
225    }
226    is(s_refill_req){
227      when(mem_acquire.fire){
228        uState := s_refill_resp
229      }
230    }
231    is(s_refill_resp){
232      when(mem_grant.fire){
233        uState := s_send_resp
234      }
235    }
236    is(s_send_resp){
237      when(resp.fire){
238        uState := s_idle
239      }
240    }
241  }
242
243
244  /******************************************************************
245   * Enter Buffer
246   *  Version 0 (better timing)
247   *    e0 judge: alloc/merge write vec
248   *    e1 alloc
249   *
250   *  Version 1 (better performance)
251   *    solved in one cycle for achieving the original performance.
252   ******************************************************************/
253
254  /**
255    TODO lyq: how to merge
256    1. same addr
257    2. same cmd
258    3. valid
259    FIXME lyq: not merge now due to the following issues
260    1. load cann't be merged
261    2. how to merge store and response precisely
262  */
263
264  val e0_fire = req.fire
265  val e0_req = req.bits
266  /**
267    TODO lyq: prohibit or wait or forward?
268    NOW: strict block by same address; otherwise: exhaustive consideration is needed.
269      - ld->ld wait
270      - ld->st forward
271      - st->ld forward
272      - st->st block
273  */
274  val e0_existSameVec = sizeMap(j =>
275    e0_req.addr === entries(j).addr && states(j).isValid()
276  )
277  val e0_invalidVec = sizeMap(i => !states(i).isValid() && !e0_existSameVec(i))
278  val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
279  val e0_alloc = e0_canAlloc && e0_fire
280  req_ready := e0_invalidVec.asUInt.orR && !do_uarch_drain
281
282  when (e0_alloc) {
283    entries(e0_allocIdx).set(e0_req)
284    states(e0_allocIdx).setValid(true.B)
285
286    // judge whether wait same block: e0 & q0
287    val waitSameVec = sizeMap(j =>
288      e0_req.addr === entries(j).addr && states(j).isValid() && states(j).isInflight()
289    )
290    val waitQ0 = e0_req.addr === q0_entry.addr && q0_canSent
291    when (waitSameVec.reduce(_ || _) || waitQ0) {
292      states(e0_allocIdx).setWaitSame(true.B)
293    }
294
295  }
296
297
298  /******************************************************************
299   * Uncache Req
300   *  Version 0 (better timing)
301   *    q0: choose which one is sent
302   *    q0: sent
303   *
304   *  Version 1 (better performance)
305   *    solved in one cycle for achieving the original performance.
306   *    NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline,
307   *          because there is no guarantee that mem_aquire will be always ready.
308   ******************************************************************/
309
310  val q0_canSentVec = sizeMap(i =>
311    (io.enableOutstanding || uState === s_refill_req) &&
312    states(i).can2Uncache()
313  )
314  val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
315  q0_canSentIdx := q0_res._1
316  q0_canSent := q0_res._2
317  q0_entry := entries(q0_canSentIdx)
318
319  val size = PopCount(q0_entry.mask)
320  val (lgSize, legal) = PriorityMuxWithFlag(Seq(
321    1.U -> 0.U,
322    2.U -> 1.U,
323    4.U -> 2.U,
324    8.U -> 3.U
325  ).map(m => (size===m._1) -> m._2))
326  assert(!(q0_canSent && !legal))
327
328  val q0_load = edge.Get(
329    fromSource      = q0_canSentIdx,
330    toAddress       = q0_entry.addr,
331    lgSize          = lgSize
332  )._2
333
334  val q0_store = edge.Put(
335    fromSource      = q0_canSentIdx,
336    toAddress       = q0_entry.addr,
337    lgSize          = lgSize,
338    data            = q0_entry.data,
339    mask            = q0_entry.mask
340  )._2
341
342  val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR
343
344  mem_acquire.valid := q0_canSent
345  mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load)
346  when(mem_acquire.fire){
347    states(q0_canSentIdx).setInflight(true.B)
348
349    // q0 should judge whether wait same block
350    (0 until UncacheBufferSize).map(j =>
351      when(q0_entry.addr === entries(j).addr && states(j).isValid() && !states(j).isWaitReturn()){
352        states(j).setWaitSame(true.B)
353      }
354    )
355  }
356
357
358  /******************************************************************
359   * Uncache Resp
360   ******************************************************************/
361
362  val (_, _, refill_done, _) = edge.addr_inc(mem_grant)
363
364  mem_grant.ready := true.B
365  when (mem_grant.fire) {
366    val id = mem_grant.bits.source
367    entries(id).update(mem_grant.bits)
368    states(id).updateUncacheResp()
369    assert(refill_done, "Uncache response should be one beat only!")
370
371    // remove state of wait same block
372    (0 until UncacheBufferSize).map(j =>
373      when(entries(id).addr === entries(j).addr && states(j).isValid() && states(j).isWaitSame()){
374        states(j).setWaitSame(false.B)
375      }
376    )
377  }
378
379
380  /******************************************************************
381   * Return to LSQ
382   ******************************************************************/
383
384  val r0_canSentVec = sizeMap(i => states(i).can2Lsq())
385  val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec)
386  resp.valid := r0_canSent
387  resp.bits := entries(r0_canSentIdx).toUncacheWordResp()
388  when(resp.fire){
389    states(r0_canSentIdx).updateReturn()
390  }
391
392
393  /******************************************************************
394   * Buffer Flush
395   * // FIXME lyq: how to deal
396   * 1. when io.flush.valid is true
397   * 2. when io.lsq.req.bits.atomic is true
398   ******************************************************************/
399  empty := !VecInit(states.map(_.isValid())).asUInt.orR
400  io.flush.empty := empty
401
402
403  /******************************************************************
404   * Load Data Forward
405   *
406   * 0. ld in ldu pipeline
407   *    f0: tag match, fast resp
408   *    f1: data resp
409   *
410   * 1. ld in buffer (in "Enter Buffer")
411   *    ld(en) -> st(in): ld entry.update, state.updateUncacheResp
412   *    st(en) -> ld(in): ld entry.update, state.updateUncacheResp
413   *    NOW: strict block by same address; there is no such forward.
414   *
415   ******************************************************************/
416
417  val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid())
418  val f0_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool()))
419  f0_needDrain := f0_tagMismatchVec.asUInt.orR && !empty
420
421  for ((forward, i) <- io.forward.zipWithIndex) {
422    val f0_vtagMatches = sizeMap(w => entries(w).vaddr === forward.vaddr)
423    val f0_ptagMatches = sizeMap(w => entries(w).addr === forward.paddr)
424    f0_tagMismatchVec(i) := forward.valid && sizeMap(w =>
425      f0_vtagMatches(w) =/= f0_ptagMatches(w) && f0_validMask(w)
426    ).asUInt.orR
427    when (f0_tagMismatchVec(i)) {
428      XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n",
429        RegNext(f0_ptagMatches.asUInt),
430        RegNext(f0_vtagMatches.asUInt),
431        RegNext(forward.vaddr),
432        RegNext(forward.paddr)
433      )
434    }
435
436    val f0_validTagMatches = sizeMap(w => f0_ptagMatches(w) && f0_validMask(w) && forward.valid)
437
438    val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask))
439    val f0_fwdDataCandidates = VecInit(entries.map(e => e.data))
440    val f0_fwdMask = shiftMaskToHigh(
441      forward.paddr,
442      Mux1H(f0_validTagMatches, f0_fwdMaskCandidates)
443    ).asTypeOf(Vec(VDataBytes, Bool()))
444    val f0_fwdData = shiftDataToHigh(
445      forward.paddr,
446      Mux1H(f0_validTagMatches, f0_fwdDataCandidates)
447    ).asTypeOf(Vec(VDataBytes, UInt(8.W)))
448
449    val f1_fwdValid = RegNext(forward.valid)
450    val f1_fwdMask = RegEnable(f0_fwdMask, forward.valid)
451    val f1_fwdData = RegEnable(f0_fwdData, forward.valid)
452
453    forward.addrInvalid := false.B // addr in ubuffer is always ready
454    forward.dataInvalid := false.B // data in ubuffer is always ready
455    forward.matchInvalid := f0_tagMismatchVec(i) // paddr / vaddr cam result does not match
456    for (j <- 0 until VDataBytes) {
457      forward.forwardMaskFast(j) := f0_fwdMask(j)
458
459      forward.forwardMask(j) := false.B
460      forward.forwardData(j) := DontCare
461      when(f1_fwdMask(j) && f1_fwdValid) {
462        forward.forwardMask(j) := true.B
463        forward.forwardData(j) := f1_fwdData(j)
464      }
465    }
466
467  }
468
469
470  /******************************************************************
471   * Debug / Performance
472   ******************************************************************/
473
474  /* Debug Counters */
475  // print all input/output requests for debug purpose
476  // print req/resp
477  XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n",
478    req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask)
479  XSDebug(resp.fire, "data: %x\n", req.bits.data)
480  // print tilelink messages
481  when(mem_acquire.valid){
482    XSDebug("mem_acquire valid, ready=%d ", mem_acquire.ready)
483    mem_acquire.bits.dump
484  }
485  when (mem_grant.fire) {
486    XSDebug("mem_grant fire ")
487    mem_grant.bits.dump
488  }
489
490  /* Performance Counters */
491  XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
492  XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
493  XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
494  XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
495  XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
496  XSPerfAccumulate("vaddr_match_failed", PopCount(f0_tagMismatchVec))
497
498  val perfEvents = Seq(
499    ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
500    ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
501    ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
502    ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
503    ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
504  )
505
506  generatePerfEvent()
507  //  End
508}
509