xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/Uncache.scala (revision 94aa21c6009c2f39c5c5dae9c87260c78887efcc)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chisel3._
20import chisel3.util._
21import org.chipsalliance.cde.config.Parameters
22import utils._
23import utility._
24import xiangshan._
25import xiangshan.mem._
26import freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
27import freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
28
29class UncacheFlushBundle extends Bundle {
30  val valid = Output(Bool())
31  val empty = Input(Bool())
32}
33
34class UncacheEntry(implicit p: Parameters) extends DCacheBundle {
35  val cmd = UInt(M_SZ.W)
36  val addr = UInt(PAddrBits.W)
37  val vaddr = UInt(VAddrBits.W)
38  val data = UInt(XLEN.W)
39  val mask = UInt(DataBytes.W)
40  val id = UInt(uncacheIdxBits.W)
41  val nc = Bool()
42  val atomic = Bool()
43
44  val resp_nderr = Bool()
45
46  /* NOTE: if it support the internal forward logic, here can uncomment */
47  // val fwd_data = UInt(XLEN.W)
48  // val fwd_mask = UInt(DataBytes.W)
49
50  def set(x: UncacheWordReq): Unit = {
51    cmd := x.cmd
52    addr := x.addr
53    vaddr := x.vaddr
54    data := x.data
55    mask := x.mask
56    id := x.id
57    nc := x.nc
58    atomic := x.atomic
59    resp_nderr := false.B
60    // fwd_data := 0.U
61    // fwd_mask := 0.U
62  }
63
64  def update(x: TLBundleD): Unit = {
65    when(cmd === MemoryOpConstants.M_XRD) {
66      data := x.data
67    }
68    resp_nderr := x.denied
69  }
70
71  // def update(forwardData: UInt, forwardMask: UInt): Unit = {
72  //   fwd_data := forwardData
73  //   fwd_mask := forwardMask
74  // }
75
76  def toUncacheWordResp(): UncacheWordResp = {
77    // val resp_fwd_data = VecInit((0 until DataBytes).map(j =>
78    //   Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j))
79    // )).asUInt
80    val resp_fwd_data = data
81    val r = Wire(new UncacheWordResp)
82    r := DontCare
83    r.data := resp_fwd_data
84    r.id := id
85    r.nderr := resp_nderr
86    r.nc := nc
87    r.is2lq := cmd === MemoryOpConstants.M_XRD
88    r.miss := false.B
89    r.replay := false.B
90    r.tag_error := false.B
91    r.error := false.B
92    r
93  }
94}
95
96class UncacheEntryState(implicit p: Parameters) extends DCacheBundle {
97  // valid (-> waitSame) -> inflight -> waitReturn
98  val valid = Bool()
99  val inflight = Bool() // uncache -> L2
100  val waitSame = Bool()
101  val waitReturn = Bool() // uncache -> LSQ
102
103  def init: Unit = {
104    valid := false.B
105    inflight := false.B
106    waitSame := false.B
107    waitReturn := false.B
108  }
109
110  def isValid(): Bool = valid
111  def isInflight(): Bool = inflight
112  def isWaitReturn(): Bool = waitReturn
113  def isWaitSame(): Bool = waitSame
114  def can2Uncache(): Bool = valid && !inflight && !waitSame && !waitReturn
115  def can2Lsq(): Bool = valid && waitReturn
116
117  def setValid(x: Bool): Unit = { valid := x}
118  def setInflight(x: Bool): Unit = { inflight := x}
119  def setWaitReturn(x: Bool): Unit = { waitReturn := x }
120  def setWaitSame(x: Bool): Unit = { waitSame := x}
121
122  def updateUncacheResp(): Unit = {
123    assert(inflight, "The request was not sent and a response was received")
124    inflight := false.B
125    waitReturn := true.B
126  }
127  def updateReturn(): Unit = {
128    valid := false.B
129    inflight := false.B
130    waitSame := false.B
131    waitReturn := false.B
132  }
133}
134
135class UncacheIO(implicit p: Parameters) extends DCacheBundle {
136  val hartId = Input(UInt())
137  val enableOutstanding = Input(Bool())
138  val flush = Flipped(new UncacheFlushBundle)
139  val lsq = Flipped(new UncacheWordIO)
140  val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
141}
142
143// convert DCacheIO to TileLink
144// for Now, we only deal with TL-UL
145
146class Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter {
147  override def shouldBeInlined: Boolean = false
148  def idRange: Int = UncacheBufferSize
149
150  val clientParameters = TLMasterPortParameters.v1(
151    clients = Seq(TLMasterParameters.v1(
152      "uncache",
153      sourceId = IdRange(0, idRange)
154    ))
155  )
156  val clientNode = TLClientNode(Seq(clientParameters))
157
158  lazy val module = new UncacheImp(this)
159}
160
161/* Uncache Buffer */
162class UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
163  with HasTLDump
164  with HasXSParameter
165  with HasPerfEvents
166{
167  private val INDEX_WIDTH = log2Up(UncacheBufferSize)
168  println(s"Uncahe Buffer Size: $UncacheBufferSize entries")
169  val io = IO(new UncacheIO)
170
171  val (bus, edge) = outer.clientNode.out.head
172
173  val req  = io.lsq.req
174  val resp = io.lsq.resp
175  val mem_acquire = bus.a
176  val mem_grant   = bus.d
177  val req_ready = WireInit(false.B)
178
179  // assign default values to output signals
180  bus.b.ready := false.B
181  bus.c.valid := false.B
182  bus.c.bits  := DontCare
183  bus.d.ready := false.B
184  bus.e.valid := false.B
185  bus.e.bits  := DontCare
186  io.lsq.req.ready := req_ready
187  io.lsq.resp.valid := false.B
188  io.lsq.resp.bits := DontCare
189
190
191  /******************************************************************
192   * Data Structure
193   ******************************************************************/
194
195  val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry))
196  val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState))))
197  val fence = RegInit(Bool(), false.B)
198  val s_idle :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4)
199  val uState = RegInit(s_idle)
200
201  def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
202  def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
203  def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
204  def addrMatch(x: UncacheEntry, y: UncacheWordReq): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
205  def addrMatch(x: UncacheWordReq, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
206  def addrMatch(x: UncacheEntry, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
207  def addrMatch(x: UInt, y: UInt): Bool = x(PAddrBits - 1, 3) === y(PAddrBits - 1, 3)
208
209  // drain buffer
210  val empty = Wire(Bool())
211  val f1_needDrain = Wire(Bool())
212  val do_uarch_drain = RegNext(f1_needDrain)
213
214  val q0_entry = Wire(new UncacheEntry)
215  val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
216  val q0_canSent = Wire(Bool())
217
218
219  /******************************************************************
220   * uState for non-outstanding
221   ******************************************************************/
222
223  switch(uState){
224    is(s_idle){
225      when(req.fire){
226        uState := s_refill_req
227      }
228    }
229    is(s_refill_req){
230      when(mem_acquire.fire){
231        uState := s_refill_resp
232      }
233    }
234    is(s_refill_resp){
235      when(mem_grant.fire){
236        uState := s_send_resp
237      }
238    }
239    is(s_send_resp){
240      when(resp.fire){
241        uState := s_idle
242      }
243    }
244  }
245
246
247  /******************************************************************
248   * Enter Buffer
249   *  Version 0 (better timing)
250   *    e0 judge: alloc/merge write vec
251   *    e1 alloc
252   *
253   *  Version 1 (better performance)
254   *    solved in one cycle for achieving the original performance.
255   ******************************************************************/
256
257  /**
258    TODO lyq: how to merge
259    1. same addr
260    2. same cmd
261    3. valid
262    FIXME lyq: not merge now due to the following issues
263    1. load cann't be merged
264    2. how to merge store and response precisely
265  */
266
267  val e0_fire = req.fire
268  val e0_req_valid = req.valid
269  val e0_req = req.bits
270  /**
271    TODO lyq: block or wait or forward?
272    NOW: strict block by same address; otherwise: exhaustive consideration is needed.
273      - ld->ld wait
274      - ld->st forward
275      - st->ld forward
276      - st->st block
277  */
278  val e0_existSame = sizeMap(j => e0_req_valid && states(j).isValid() && addrMatch(e0_req, entries(j))).asUInt.orR
279  val e0_invalidVec = sizeMap(i => !states(i).isValid())
280  val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
281  val e0_alloc = e0_canAlloc && !e0_existSame && e0_fire
282  req_ready := e0_invalidVec.asUInt.orR && !e0_existSame && !do_uarch_drain
283
284  when (e0_alloc) {
285    entries(e0_allocIdx).set(e0_req)
286    states(e0_allocIdx).setValid(true.B)
287
288    // judge whether wait same block: e0 & q0
289    val waitSameVec = sizeMap(j =>
290      e0_req_valid && states(j).isValid() && states(j).isInflight() && addrMatch(e0_req, entries(j))
291    )
292    val waitQ0 = q0_canSent && addrMatch(e0_req, q0_entry)
293    when (waitSameVec.reduce(_ || _) || waitQ0) {
294      states(e0_allocIdx).setWaitSame(true.B)
295    }
296
297  }
298
299
300  /******************************************************************
301   * Uncache Req
302   *  Version 0 (better timing)
303   *    q0: choose which one is sent
304   *    q0: sent
305   *
306   *  Version 1 (better performance)
307   *    solved in one cycle for achieving the original performance.
308   *    NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline,
309   *          because there is no guarantee that mem_aquire will be always ready.
310   ******************************************************************/
311
312  val q0_canSentVec = sizeMap(i =>
313    (io.enableOutstanding || uState === s_refill_req) &&
314    states(i).can2Uncache()
315  )
316  val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
317  q0_canSentIdx := q0_res._1
318  q0_canSent := q0_res._2
319  q0_entry := entries(q0_canSentIdx)
320
321  val size = PopCount(q0_entry.mask)
322  val (lgSize, legal) = PriorityMuxWithFlag(Seq(
323    1.U -> 0.U,
324    2.U -> 1.U,
325    4.U -> 2.U,
326    8.U -> 3.U
327  ).map(m => (size===m._1) -> m._2))
328  assert(!(q0_canSent && !legal))
329
330  val q0_load = edge.Get(
331    fromSource      = q0_canSentIdx,
332    toAddress       = q0_entry.addr,
333    lgSize          = lgSize
334  )._2
335
336  val q0_store = edge.Put(
337    fromSource      = q0_canSentIdx,
338    toAddress       = q0_entry.addr,
339    lgSize          = lgSize,
340    data            = q0_entry.data,
341    mask            = q0_entry.mask
342  )._2
343
344  val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR
345
346  mem_acquire.valid := q0_canSent
347  mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load)
348  when(mem_acquire.fire){
349    states(q0_canSentIdx).setInflight(true.B)
350
351    // q0 should judge whether wait same block
352    (0 until UncacheBufferSize).map(j =>
353      when(states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){
354        states(j).setWaitSame(true.B)
355      }
356    )
357  }
358
359
360  /******************************************************************
361   * Uncache Resp
362   ******************************************************************/
363
364  val (_, _, refill_done, _) = edge.addr_inc(mem_grant)
365
366  mem_grant.ready := true.B
367  when (mem_grant.fire) {
368    val id = mem_grant.bits.source
369    entries(id).update(mem_grant.bits)
370    states(id).updateUncacheResp()
371    assert(refill_done, "Uncache response should be one beat only!")
372
373    // remove state of wait same block
374    (0 until UncacheBufferSize).map(j =>
375      when(states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){
376        states(j).setWaitSame(false.B)
377      }
378    )
379  }
380
381
382  /******************************************************************
383   * Return to LSQ
384   ******************************************************************/
385
386  val r0_canSentVec = sizeMap(i => states(i).can2Lsq())
387  val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec)
388  resp.valid := r0_canSent
389  resp.bits := entries(r0_canSentIdx).toUncacheWordResp()
390  when(resp.fire){
391    states(r0_canSentIdx).updateReturn()
392  }
393
394
395  /******************************************************************
396   * Buffer Flush
397   * 1. when io.flush.valid is true: drain store queue and ubuffer
398   * 2. when io.lsq.req.bits.atomic is true: not support temporarily
399   ******************************************************************/
400  empty := !VecInit(states.map(_.isValid())).asUInt.orR
401  io.flush.empty := empty
402
403
404  /******************************************************************
405   * Load Data Forward
406   *
407   * 0. ld in ldu pipeline
408   *    f0: vaddr match, mask & data select, fast resp
409   *    f1: paddr match, resp
410   *
411   * 1. ld in buffer (in "Enter Buffer")
412   *    ld(en) -> st(in): ld entry.update, state.updateUncacheResp
413   *    st(en) -> ld(in): ld entry.update, state.updateUncacheResp
414   *    NOW: strict block by same address; there is no such forward.
415   *
416   ******************************************************************/
417
418  val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid())
419  val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask))
420  val f0_fwdDataCandidates = VecInit(entries.map(e => e.data))
421  val f1_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool()))
422  f1_needDrain := f1_tagMismatchVec.asUInt.orR && !empty
423
424  for ((forward, i) <- io.forward.zipWithIndex) {
425    val f0_fwdValid = forward.valid
426    val f1_fwdValid = RegNext(f0_fwdValid)
427
428    // f0 vaddr match
429    val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr))
430    val f0_validTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid)
431    // f0 select
432    val f0_fwdMask = shiftMaskToHigh(
433      forward.vaddr,
434      Mux1H(f0_validTagMatches, f0_fwdMaskCandidates)
435    ).asTypeOf(Vec(VDataBytes, Bool()))
436    val f0_fwdData = shiftDataToHigh(
437      forward.vaddr,
438      Mux1H(f0_validTagMatches, f0_fwdDataCandidates)
439    ).asTypeOf(Vec(VDataBytes, UInt(8.W)))
440
441    // f1 paddr match
442    val f1_fwdMask = RegEnable(f0_fwdMask, f0_fwdValid)
443    val f1_fwdData = RegEnable(f0_fwdData, f0_fwdValid)
444    // forward.paddr from dtlb, which is far from uncache
445    val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), RegEnable(forward.paddr, f0_fwdValid)))
446    f1_tagMismatchVec(i) := sizeMap(w =>
447      RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid
448    ).asUInt.orR
449    when(f1_tagMismatchVec(i)) {
450      XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n",
451        f1_ptagMatches.asUInt,
452        RegEnable(f0_vtagMatches.asUInt, f0_fwdValid),
453        RegEnable(forward.vaddr, f0_fwdValid),
454        RegEnable(forward.paddr, f0_fwdValid)
455      )
456    }
457    // f1 output
458    forward.addrInvalid := false.B // addr in ubuffer is always ready
459    forward.dataInvalid := false.B // data in ubuffer is always ready
460    forward.matchInvalid := f1_tagMismatchVec(i) // paddr / vaddr cam result does not match
461    for (j <- 0 until VDataBytes) {
462      forward.forwardMaskFast(j) := f0_fwdMask(j)
463
464      forward.forwardData(j) := f1_fwdData(j)
465      forward.forwardMask(j) := false.B
466      when(f1_fwdMask(j) && f1_fwdValid) {
467        forward.forwardMask(j) := true.B
468      }
469    }
470
471  }
472
473
474  /******************************************************************
475   * Debug / Performance
476   ******************************************************************/
477
478  /* Debug Counters */
479  // print all input/output requests for debug purpose
480  // print req/resp
481  XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n",
482    req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask)
483  XSDebug(resp.fire, "data: %x\n", req.bits.data)
484  // print tilelink messages
485  XSDebug(mem_acquire.valid, "mem_acquire valid, ready=%d ", mem_acquire.ready)
486  mem_acquire.bits.dump(mem_acquire.valid)
487
488  XSDebug(mem_grant.fire, "mem_grant fire ")
489  mem_grant.bits.dump(mem_grant.fire)
490
491  /* Performance Counters */
492  XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
493  XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
494  XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
495  XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
496  XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
497  XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR)))
498  XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
499
500  val perfEvents = Seq(
501    ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
502    ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
503    ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
504    ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
505    ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire),
506    ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))),
507    ("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
508  )
509
510  generatePerfEvent()
511  //  End
512}
513