xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/Uncache.scala (revision db81ab702bc2a03d5784c67f4e4c8535182323d5)
11f0e2dc7SJiawei Lin/***************************************************************************************
21f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
31f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Peng Cheng Laboratory
41f0e2dc7SJiawei Lin*
51f0e2dc7SJiawei Lin* XiangShan is licensed under Mulan PSL v2.
61f0e2dc7SJiawei Lin* You can use this software according to the terms and conditions of the Mulan PSL v2.
71f0e2dc7SJiawei Lin* You may obtain a copy of Mulan PSL v2 at:
81f0e2dc7SJiawei Lin*          http://license.coscl.org.cn/MulanPSL2
91f0e2dc7SJiawei Lin*
101f0e2dc7SJiawei Lin* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
111f0e2dc7SJiawei Lin* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
121f0e2dc7SJiawei Lin* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
131f0e2dc7SJiawei Lin*
141f0e2dc7SJiawei Lin* See the Mulan PSL v2 for more details.
151f0e2dc7SJiawei Lin***************************************************************************************/
161f0e2dc7SJiawei Lin
171f0e2dc7SJiawei Linpackage xiangshan.cache
181f0e2dc7SJiawei Lin
191f0e2dc7SJiawei Linimport chisel3._
201f0e2dc7SJiawei Linimport chisel3.util._
218891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters
2237225120Ssfencevmaimport utils._
233c02ee8fSwakafaimport utility._
2437225120Ssfencevmaimport xiangshan._
25e04c5f64SYanqin Liimport xiangshan.mem._
26519244c7SYanqin Liimport coupledL2.MemBackTypeMM
27519244c7SYanqin Liimport coupledL2.MemPageTypeNC
281f0e2dc7SJiawei Linimport freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
291f0e2dc7SJiawei Linimport freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
30519244c7SYanqin Liimport coupledL2.{MemBackTypeMMField, MemPageTypeNCField}
3137225120Ssfencevma
3237225120Ssfencevmaclass UncacheFlushBundle extends Bundle {
3337225120Ssfencevma  val valid = Output(Bool())
3437225120Ssfencevma  val empty = Input(Bool())
3537225120Ssfencevma}
361f0e2dc7SJiawei Lin
37cfdd605fSYanqin Liclass UncacheEntry(implicit p: Parameters) extends DCacheBundle {
38cfdd605fSYanqin Li  val cmd = UInt(M_SZ.W)
39cfdd605fSYanqin Li  val addr = UInt(PAddrBits.W)
40e04c5f64SYanqin Li  val vaddr = UInt(VAddrBits.W)
41cfdd605fSYanqin Li  val data = UInt(XLEN.W)
42e04c5f64SYanqin Li  val mask = UInt(DataBytes.W)
43cfdd605fSYanqin Li  val id = UInt(uncacheIdxBits.W)
44cfdd605fSYanqin Li  val nc = Bool()
45cfdd605fSYanqin Li  val atomic = Bool()
46519244c7SYanqin Li  val memBackTypeMM = Bool()
471f0e2dc7SJiawei Lin
48cfdd605fSYanqin Li  val resp_nderr = Bool()
491f0e2dc7SJiawei Lin
5046236761SYanqin Li  /* NOTE: if it support the internal forward logic, here can uncomment */
5146236761SYanqin Li  // val fwd_data = UInt(XLEN.W)
5246236761SYanqin Li  // val fwd_mask = UInt(DataBytes.W)
53e04c5f64SYanqin Li
54cfdd605fSYanqin Li  def set(x: UncacheWordReq): Unit = {
55cfdd605fSYanqin Li    cmd := x.cmd
56cfdd605fSYanqin Li    addr := x.addr
57e04c5f64SYanqin Li    vaddr := x.vaddr
58cfdd605fSYanqin Li    data := x.data
59cfdd605fSYanqin Li    mask := x.mask
60cfdd605fSYanqin Li    id := x.id
61cfdd605fSYanqin Li    nc := x.nc
62519244c7SYanqin Li    memBackTypeMM := x.memBackTypeMM
63cfdd605fSYanqin Li    atomic := x.atomic
6458cb1b0bSzhanglinjuan    resp_nderr := false.B
6546236761SYanqin Li    // fwd_data := 0.U
6646236761SYanqin Li    // fwd_mask := 0.U
67cfdd605fSYanqin Li  }
68cfdd605fSYanqin Li
69cfdd605fSYanqin Li  def update(x: TLBundleD): Unit = {
7046236761SYanqin Li    when(cmd === MemoryOpConstants.M_XRD) {
7146236761SYanqin Li      data := x.data
7246236761SYanqin Li    }
73*db81ab70SYanqin Li    resp_nderr := x.denied || x.corrupt
74cfdd605fSYanqin Li  }
75cfdd605fSYanqin Li
7646236761SYanqin Li  // def update(forwardData: UInt, forwardMask: UInt): Unit = {
7746236761SYanqin Li  //   fwd_data := forwardData
7846236761SYanqin Li  //   fwd_mask := forwardMask
7946236761SYanqin Li  // }
80e04c5f64SYanqin Li
81cfdd605fSYanqin Li  def toUncacheWordResp(): UncacheWordResp = {
8246236761SYanqin Li    // val resp_fwd_data = VecInit((0 until DataBytes).map(j =>
8346236761SYanqin Li    //   Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j))
8446236761SYanqin Li    // )).asUInt
8546236761SYanqin Li    val resp_fwd_data = data
86cfdd605fSYanqin Li    val r = Wire(new UncacheWordResp)
87cfdd605fSYanqin Li    r := DontCare
88e04c5f64SYanqin Li    r.data := resp_fwd_data
89cfdd605fSYanqin Li    r.id := id
90cfdd605fSYanqin Li    r.nderr := resp_nderr
91cfdd605fSYanqin Li    r.nc := nc
92cfdd605fSYanqin Li    r.is2lq := cmd === MemoryOpConstants.M_XRD
93cfdd605fSYanqin Li    r.miss := false.B
94cfdd605fSYanqin Li    r.replay := false.B
95cfdd605fSYanqin Li    r.tag_error := false.B
96cfdd605fSYanqin Li    r.error := false.B
97cfdd605fSYanqin Li    r
981f0e2dc7SJiawei Lin  }
991f0e2dc7SJiawei Lin}
1001f0e2dc7SJiawei Lin
101cfdd605fSYanqin Liclass UncacheEntryState(implicit p: Parameters) extends DCacheBundle {
102cfdd605fSYanqin Li  // valid (-> waitSame) -> inflight -> waitReturn
103cfdd605fSYanqin Li  val valid = Bool()
104cfdd605fSYanqin Li  val inflight = Bool() // uncache -> L2
105cfdd605fSYanqin Li  val waitSame = Bool()
106cfdd605fSYanqin Li  val waitReturn = Bool() // uncache -> LSQ
1071f0e2dc7SJiawei Lin
108cfdd605fSYanqin Li  def init: Unit = {
109cfdd605fSYanqin Li    valid := false.B
110cfdd605fSYanqin Li    inflight := false.B
111cfdd605fSYanqin Li    waitSame := false.B
112cfdd605fSYanqin Li    waitReturn := false.B
1131f0e2dc7SJiawei Lin  }
1141f0e2dc7SJiawei Lin
115cfdd605fSYanqin Li  def isValid(): Bool = valid
116cfdd605fSYanqin Li  def isInflight(): Bool = inflight
117cfdd605fSYanqin Li  def isWaitReturn(): Bool = waitReturn
118cfdd605fSYanqin Li  def isWaitSame(): Bool = waitSame
119cfdd605fSYanqin Li  def can2Uncache(): Bool = valid && !inflight && !waitSame && !waitReturn
120cfdd605fSYanqin Li  def can2Lsq(): Bool = valid && waitReturn
1211f0e2dc7SJiawei Lin
122cfdd605fSYanqin Li  def setValid(x: Bool): Unit = { valid := x}
123cfdd605fSYanqin Li  def setInflight(x: Bool): Unit = { inflight := x}
124cfdd605fSYanqin Li  def setWaitReturn(x: Bool): Unit = { waitReturn := x }
125cfdd605fSYanqin Li  def setWaitSame(x: Bool): Unit = { waitSame := x}
1261f0e2dc7SJiawei Lin
127cfdd605fSYanqin Li  def updateUncacheResp(): Unit = {
128cfdd605fSYanqin Li    assert(inflight, "The request was not sent and a response was received")
129cfdd605fSYanqin Li    inflight := false.B
130cfdd605fSYanqin Li    waitReturn := true.B
1311f0e2dc7SJiawei Lin  }
132cfdd605fSYanqin Li  def updateReturn(): Unit = {
133cfdd605fSYanqin Li    valid := false.B
134cfdd605fSYanqin Li    inflight := false.B
135cfdd605fSYanqin Li    waitSame := false.B
136cfdd605fSYanqin Li    waitReturn := false.B
1371f0e2dc7SJiawei Lin  }
1381f0e2dc7SJiawei Lin}
1391f0e2dc7SJiawei Lin
1401f0e2dc7SJiawei Linclass UncacheIO(implicit p: Parameters) extends DCacheBundle {
14137225120Ssfencevma  val hartId = Input(UInt())
14237225120Ssfencevma  val enableOutstanding = Input(Bool())
14337225120Ssfencevma  val flush = Flipped(new UncacheFlushBundle)
1446786cfb7SWilliam Wang  val lsq = Flipped(new UncacheWordIO)
145e04c5f64SYanqin Li  val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
1461f0e2dc7SJiawei Lin}
1471f0e2dc7SJiawei Lin
1481f0e2dc7SJiawei Lin// convert DCacheIO to TileLink
1491f0e2dc7SJiawei Lin// for Now, we only deal with TL-UL
1501f0e2dc7SJiawei Lin
15137225120Ssfencevmaclass Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter {
15295e60e55STang Haojin  override def shouldBeInlined: Boolean = false
15337225120Ssfencevma  def idRange: Int = UncacheBufferSize
1541f0e2dc7SJiawei Lin
1551f0e2dc7SJiawei Lin  val clientParameters = TLMasterPortParameters.v1(
1561f0e2dc7SJiawei Lin    clients = Seq(TLMasterParameters.v1(
1571f0e2dc7SJiawei Lin      "uncache",
15837225120Ssfencevma      sourceId = IdRange(0, idRange)
159519244c7SYanqin Li    )),
160519244c7SYanqin Li    requestFields = Seq(MemBackTypeMMField(), MemPageTypeNCField())
1611f0e2dc7SJiawei Lin  )
1621f0e2dc7SJiawei Lin  val clientNode = TLClientNode(Seq(clientParameters))
1631f0e2dc7SJiawei Lin
1641f0e2dc7SJiawei Lin  lazy val module = new UncacheImp(this)
1651f0e2dc7SJiawei Lin}
1661f0e2dc7SJiawei Lin
167cfdd605fSYanqin Li/* Uncache Buffer */
16837225120Ssfencevmaclass UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
1691f0e2dc7SJiawei Lin  with HasTLDump
17037225120Ssfencevma  with HasXSParameter
17137225120Ssfencevma  with HasPerfEvents
1721f0e2dc7SJiawei Lin{
173cfdd605fSYanqin Li  private val INDEX_WIDTH = log2Up(UncacheBufferSize)
174cfdd605fSYanqin Li  println(s"Uncahe Buffer Size: $UncacheBufferSize entries")
1751f0e2dc7SJiawei Lin  val io = IO(new UncacheIO)
1761f0e2dc7SJiawei Lin
1771f0e2dc7SJiawei Lin  val (bus, edge) = outer.clientNode.out.head
1781f0e2dc7SJiawei Lin
1791f0e2dc7SJiawei Lin  val req  = io.lsq.req
1801f0e2dc7SJiawei Lin  val resp = io.lsq.resp
1811f0e2dc7SJiawei Lin  val mem_acquire = bus.a
1821f0e2dc7SJiawei Lin  val mem_grant   = bus.d
1831f0e2dc7SJiawei Lin  val req_ready = WireInit(false.B)
1841f0e2dc7SJiawei Lin
1851f0e2dc7SJiawei Lin  // assign default values to output signals
1861f0e2dc7SJiawei Lin  bus.b.ready := false.B
1871f0e2dc7SJiawei Lin  bus.c.valid := false.B
1881f0e2dc7SJiawei Lin  bus.c.bits  := DontCare
1891f0e2dc7SJiawei Lin  bus.d.ready := false.B
1901f0e2dc7SJiawei Lin  bus.e.valid := false.B
1911f0e2dc7SJiawei Lin  bus.e.bits  := DontCare
192cfdd605fSYanqin Li  io.lsq.req.ready := req_ready
19337225120Ssfencevma  io.lsq.resp.valid := false.B
19437225120Ssfencevma  io.lsq.resp.bits := DontCare
1951f0e2dc7SJiawei Lin
19637225120Ssfencevma
197cfdd605fSYanqin Li  /******************************************************************
198cfdd605fSYanqin Li   * Data Structure
199cfdd605fSYanqin Li   ******************************************************************/
20037225120Ssfencevma
201cfdd605fSYanqin Li  val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry))
202cfdd605fSYanqin Li  val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState))))
203cfdd605fSYanqin Li  val fence = RegInit(Bool(), false.B)
204cfdd605fSYanqin Li  val s_idle :: s_refill_req :: s_refill_resp :: s_send_resp :: Nil = Enum(4)
205cfdd605fSYanqin Li  val uState = RegInit(s_idle)
2061f0e2dc7SJiawei Lin
207cfdd605fSYanqin Li  def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
208e04c5f64SYanqin Li  def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
209e04c5f64SYanqin Li  def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
210e10e20c6SYanqin Li  def addrMatch(x: UncacheEntry, y: UncacheWordReq): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
211e10e20c6SYanqin Li  def addrMatch(x: UncacheWordReq, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
212e10e20c6SYanqin Li  def addrMatch(x: UncacheEntry, y: UncacheEntry): Bool = x.addr(PAddrBits - 1, 3) === y.addr(PAddrBits - 1, 3)
213e10e20c6SYanqin Li  def addrMatch(x: UInt, y: UInt): Bool = x(PAddrBits - 1, 3) === y(PAddrBits - 1, 3)
214e04c5f64SYanqin Li
215e04c5f64SYanqin Li  // drain buffer
216e04c5f64SYanqin Li  val empty = Wire(Bool())
217043d3da4SYanqin Li  val f1_needDrain = Wire(Bool())
218043d3da4SYanqin Li  val do_uarch_drain = RegNext(f1_needDrain)
2191f0e2dc7SJiawei Lin
220cfdd605fSYanqin Li  val q0_entry = Wire(new UncacheEntry)
221cfdd605fSYanqin Li  val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
222cfdd605fSYanqin Li  val q0_canSent = Wire(Bool())
223e04c5f64SYanqin Li
224e04c5f64SYanqin Li
225cfdd605fSYanqin Li  /******************************************************************
226cfdd605fSYanqin Li   * uState for non-outstanding
227cfdd605fSYanqin Li   ******************************************************************/
22837225120Ssfencevma
229cfdd605fSYanqin Li  switch(uState){
230cfdd605fSYanqin Li    is(s_idle){
23137225120Ssfencevma      when(req.fire){
232cfdd605fSYanqin Li        uState := s_refill_req
23337225120Ssfencevma      }
234cfdd605fSYanqin Li    }
235cfdd605fSYanqin Li    is(s_refill_req){
23637225120Ssfencevma      when(mem_acquire.fire){
237cfdd605fSYanqin Li        uState := s_refill_resp
23837225120Ssfencevma      }
23937225120Ssfencevma    }
240cfdd605fSYanqin Li    is(s_refill_resp){
24137225120Ssfencevma      when(mem_grant.fire){
242cfdd605fSYanqin Li        uState := s_send_resp
24337225120Ssfencevma      }
24437225120Ssfencevma    }
245cfdd605fSYanqin Li    is(s_send_resp){
246cfdd605fSYanqin Li      when(resp.fire){
247cfdd605fSYanqin Li        uState := s_idle
248cfdd605fSYanqin Li      }
24937225120Ssfencevma    }
25037225120Ssfencevma  }
25137225120Ssfencevma
252cfdd605fSYanqin Li
253cfdd605fSYanqin Li  /******************************************************************
254cfdd605fSYanqin Li   * Enter Buffer
255cfdd605fSYanqin Li   *  Version 0 (better timing)
256cfdd605fSYanqin Li   *    e0 judge: alloc/merge write vec
257cfdd605fSYanqin Li   *    e1 alloc
258cfdd605fSYanqin Li   *
259cfdd605fSYanqin Li   *  Version 1 (better performance)
260cfdd605fSYanqin Li   *    solved in one cycle for achieving the original performance.
261cfdd605fSYanqin Li   ******************************************************************/
262cfdd605fSYanqin Li
263cfdd605fSYanqin Li  /**
264cfdd605fSYanqin Li    TODO lyq: how to merge
265cfdd605fSYanqin Li    1. same addr
266cfdd605fSYanqin Li    2. same cmd
267cfdd605fSYanqin Li    3. valid
268cfdd605fSYanqin Li    FIXME lyq: not merge now due to the following issues
269cfdd605fSYanqin Li    1. load cann't be merged
270cfdd605fSYanqin Li    2. how to merge store and response precisely
271cfdd605fSYanqin Li  */
272cfdd605fSYanqin Li
273cfdd605fSYanqin Li  val e0_fire = req.fire
274e10e20c6SYanqin Li  val e0_req_valid = req.valid
275cfdd605fSYanqin Li  val e0_req = req.bits
276e04c5f64SYanqin Li  /**
27746236761SYanqin Li    TODO lyq: block or wait or forward?
278e04c5f64SYanqin Li    NOW: strict block by same address; otherwise: exhaustive consideration is needed.
279e04c5f64SYanqin Li      - ld->ld wait
280e04c5f64SYanqin Li      - ld->st forward
281e04c5f64SYanqin Li      - st->ld forward
282e04c5f64SYanqin Li      - st->st block
283e04c5f64SYanqin Li  */
284e10e20c6SYanqin Li  val e0_existSame = sizeMap(j => e0_req_valid && states(j).isValid() && addrMatch(e0_req, entries(j))).asUInt.orR
28546236761SYanqin Li  val e0_invalidVec = sizeMap(i => !states(i).isValid())
286e04c5f64SYanqin Li  val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
28746236761SYanqin Li  val e0_alloc = e0_canAlloc && !e0_existSame && e0_fire
28846236761SYanqin Li  req_ready := e0_invalidVec.asUInt.orR && !e0_existSame && !do_uarch_drain
289cfdd605fSYanqin Li
290e04c5f64SYanqin Li  when (e0_alloc) {
291e04c5f64SYanqin Li    entries(e0_allocIdx).set(e0_req)
292e04c5f64SYanqin Li    states(e0_allocIdx).setValid(true.B)
293cfdd605fSYanqin Li
294cfdd605fSYanqin Li    // judge whether wait same block: e0 & q0
295cfdd605fSYanqin Li    val waitSameVec = sizeMap(j =>
296e10e20c6SYanqin Li      e0_req_valid && states(j).isValid() && states(j).isInflight() && addrMatch(e0_req, entries(j))
297cfdd605fSYanqin Li    )
298e10e20c6SYanqin Li    val waitQ0 = q0_canSent && addrMatch(e0_req, q0_entry)
299cfdd605fSYanqin Li    when (waitSameVec.reduce(_ || _) || waitQ0) {
300e04c5f64SYanqin Li      states(e0_allocIdx).setWaitSame(true.B)
301cfdd605fSYanqin Li    }
302e04c5f64SYanqin Li
303cfdd605fSYanqin Li  }
304cfdd605fSYanqin Li
305cfdd605fSYanqin Li
306cfdd605fSYanqin Li  /******************************************************************
307cfdd605fSYanqin Li   * Uncache Req
308cfdd605fSYanqin Li   *  Version 0 (better timing)
309cfdd605fSYanqin Li   *    q0: choose which one is sent
310cfdd605fSYanqin Li   *    q0: sent
311cfdd605fSYanqin Li   *
312cfdd605fSYanqin Li   *  Version 1 (better performance)
313cfdd605fSYanqin Li   *    solved in one cycle for achieving the original performance.
314cfdd605fSYanqin Li   *    NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline,
315cfdd605fSYanqin Li   *          because there is no guarantee that mem_aquire will be always ready.
316cfdd605fSYanqin Li   ******************************************************************/
317cfdd605fSYanqin Li
318cfdd605fSYanqin Li  val q0_canSentVec = sizeMap(i =>
319e04c5f64SYanqin Li    (io.enableOutstanding || uState === s_refill_req) &&
320cfdd605fSYanqin Li    states(i).can2Uncache()
321cfdd605fSYanqin Li  )
322cfdd605fSYanqin Li  val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
323cfdd605fSYanqin Li  q0_canSentIdx := q0_res._1
324cfdd605fSYanqin Li  q0_canSent := q0_res._2
325cfdd605fSYanqin Li  q0_entry := entries(q0_canSentIdx)
326cfdd605fSYanqin Li
327cfdd605fSYanqin Li  val size = PopCount(q0_entry.mask)
328cfdd605fSYanqin Li  val (lgSize, legal) = PriorityMuxWithFlag(Seq(
329cfdd605fSYanqin Li    1.U -> 0.U,
330cfdd605fSYanqin Li    2.U -> 1.U,
331cfdd605fSYanqin Li    4.U -> 2.U,
332cfdd605fSYanqin Li    8.U -> 3.U
333cfdd605fSYanqin Li  ).map(m => (size===m._1) -> m._2))
334cfdd605fSYanqin Li  assert(!(q0_canSent && !legal))
335cfdd605fSYanqin Li
336cfdd605fSYanqin Li  val q0_load = edge.Get(
337cfdd605fSYanqin Li    fromSource      = q0_canSentIdx,
338cfdd605fSYanqin Li    toAddress       = q0_entry.addr,
339cfdd605fSYanqin Li    lgSize          = lgSize
340cfdd605fSYanqin Li  )._2
341cfdd605fSYanqin Li
342cfdd605fSYanqin Li  val q0_store = edge.Put(
343cfdd605fSYanqin Li    fromSource      = q0_canSentIdx,
344cfdd605fSYanqin Li    toAddress       = q0_entry.addr,
345cfdd605fSYanqin Li    lgSize          = lgSize,
346cfdd605fSYanqin Li    data            = q0_entry.data,
347cfdd605fSYanqin Li    mask            = q0_entry.mask
348cfdd605fSYanqin Li  )._2
349cfdd605fSYanqin Li
350cfdd605fSYanqin Li  val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR
351cfdd605fSYanqin Li
352cfdd605fSYanqin Li  mem_acquire.valid := q0_canSent
353cfdd605fSYanqin Li  mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load)
354519244c7SYanqin Li  mem_acquire.bits.user.lift(MemBackTypeMM).foreach(_ := q0_entry.memBackTypeMM)
355519244c7SYanqin Li  mem_acquire.bits.user.lift(MemPageTypeNC).foreach(_ := q0_entry.nc)
356cfdd605fSYanqin Li  when(mem_acquire.fire){
357cfdd605fSYanqin Li    states(q0_canSentIdx).setInflight(true.B)
358cfdd605fSYanqin Li
359cfdd605fSYanqin Li    // q0 should judge whether wait same block
360cfdd605fSYanqin Li    (0 until UncacheBufferSize).map(j =>
361e10e20c6SYanqin Li      when(states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){
362cfdd605fSYanqin Li        states(j).setWaitSame(true.B)
363cfdd605fSYanqin Li      }
364cfdd605fSYanqin Li    )
365cfdd605fSYanqin Li  }
366cfdd605fSYanqin Li
367cfdd605fSYanqin Li
368cfdd605fSYanqin Li  /******************************************************************
369cfdd605fSYanqin Li   * Uncache Resp
370cfdd605fSYanqin Li   ******************************************************************/
371cfdd605fSYanqin Li
372cfdd605fSYanqin Li  val (_, _, refill_done, _) = edge.addr_inc(mem_grant)
373cfdd605fSYanqin Li
374cfdd605fSYanqin Li  mem_grant.ready := true.B
375cfdd605fSYanqin Li  when (mem_grant.fire) {
376cfdd605fSYanqin Li    val id = mem_grant.bits.source
377cfdd605fSYanqin Li    entries(id).update(mem_grant.bits)
378cfdd605fSYanqin Li    states(id).updateUncacheResp()
379cfdd605fSYanqin Li    assert(refill_done, "Uncache response should be one beat only!")
380cfdd605fSYanqin Li
381cfdd605fSYanqin Li    // remove state of wait same block
382cfdd605fSYanqin Li    (0 until UncacheBufferSize).map(j =>
383e10e20c6SYanqin Li      when(states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){
384cfdd605fSYanqin Li        states(j).setWaitSame(false.B)
385cfdd605fSYanqin Li      }
386cfdd605fSYanqin Li    )
387cfdd605fSYanqin Li  }
388cfdd605fSYanqin Li
389cfdd605fSYanqin Li
390cfdd605fSYanqin Li  /******************************************************************
391cfdd605fSYanqin Li   * Return to LSQ
392cfdd605fSYanqin Li   ******************************************************************/
393cfdd605fSYanqin Li
394cfdd605fSYanqin Li  val r0_canSentVec = sizeMap(i => states(i).can2Lsq())
395cfdd605fSYanqin Li  val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec)
396cfdd605fSYanqin Li  resp.valid := r0_canSent
397cfdd605fSYanqin Li  resp.bits := entries(r0_canSentIdx).toUncacheWordResp()
398cfdd605fSYanqin Li  when(resp.fire){
399cfdd605fSYanqin Li    states(r0_canSentIdx).updateReturn()
400cfdd605fSYanqin Li  }
401cfdd605fSYanqin Li
402cfdd605fSYanqin Li
403cfdd605fSYanqin Li  /******************************************************************
404cfdd605fSYanqin Li   * Buffer Flush
40546236761SYanqin Li   * 1. when io.flush.valid is true: drain store queue and ubuffer
40646236761SYanqin Li   * 2. when io.lsq.req.bits.atomic is true: not support temporarily
407cfdd605fSYanqin Li   ******************************************************************/
408e04c5f64SYanqin Li  empty := !VecInit(states.map(_.isValid())).asUInt.orR
409e04c5f64SYanqin Li  io.flush.empty := empty
410cfdd605fSYanqin Li
411e04c5f64SYanqin Li
412e04c5f64SYanqin Li  /******************************************************************
413e04c5f64SYanqin Li   * Load Data Forward
414e04c5f64SYanqin Li   *
415e04c5f64SYanqin Li   * 0. ld in ldu pipeline
416043d3da4SYanqin Li   *    f0: vaddr match, mask & data select, fast resp
417043d3da4SYanqin Li   *    f1: paddr match, resp
418e04c5f64SYanqin Li   *
419e04c5f64SYanqin Li   * 1. ld in buffer (in "Enter Buffer")
420e04c5f64SYanqin Li   *    ld(en) -> st(in): ld entry.update, state.updateUncacheResp
421e04c5f64SYanqin Li   *    st(en) -> ld(in): ld entry.update, state.updateUncacheResp
422e04c5f64SYanqin Li   *    NOW: strict block by same address; there is no such forward.
423e04c5f64SYanqin Li   *
424e04c5f64SYanqin Li   ******************************************************************/
425e04c5f64SYanqin Li
426e04c5f64SYanqin Li  val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid())
427e04c5f64SYanqin Li  val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask))
428e04c5f64SYanqin Li  val f0_fwdDataCandidates = VecInit(entries.map(e => e.data))
429043d3da4SYanqin Li  val f1_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool()))
430043d3da4SYanqin Li  f1_needDrain := f1_tagMismatchVec.asUInt.orR && !empty
431043d3da4SYanqin Li
432043d3da4SYanqin Li  for ((forward, i) <- io.forward.zipWithIndex) {
433043d3da4SYanqin Li    val f0_fwdValid = forward.valid
434043d3da4SYanqin Li    val f1_fwdValid = RegNext(f0_fwdValid)
435043d3da4SYanqin Li
436043d3da4SYanqin Li    // f0 vaddr match
437e10e20c6SYanqin Li    val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr))
438043d3da4SYanqin Li    val f0_validTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid)
439043d3da4SYanqin Li    // f0 select
440e04c5f64SYanqin Li    val f0_fwdMask = shiftMaskToHigh(
441043d3da4SYanqin Li      forward.vaddr,
442e04c5f64SYanqin Li      Mux1H(f0_validTagMatches, f0_fwdMaskCandidates)
443e04c5f64SYanqin Li    ).asTypeOf(Vec(VDataBytes, Bool()))
444e04c5f64SYanqin Li    val f0_fwdData = shiftDataToHigh(
445043d3da4SYanqin Li      forward.vaddr,
446e04c5f64SYanqin Li      Mux1H(f0_validTagMatches, f0_fwdDataCandidates)
447e04c5f64SYanqin Li    ).asTypeOf(Vec(VDataBytes, UInt(8.W)))
448e04c5f64SYanqin Li
449043d3da4SYanqin Li    // f1 paddr match
450043d3da4SYanqin Li    val f1_fwdMask = RegEnable(f0_fwdMask, f0_fwdValid)
451043d3da4SYanqin Li    val f1_fwdData = RegEnable(f0_fwdData, f0_fwdValid)
452043d3da4SYanqin Li    // forward.paddr from dtlb, which is far from uncache
453e10e20c6SYanqin Li    val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), RegEnable(forward.paddr, f0_fwdValid)))
454e10e20c6SYanqin Li    f1_tagMismatchVec(i) := sizeMap(w =>
455043d3da4SYanqin Li      RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid
456043d3da4SYanqin Li    ).asUInt.orR
457043d3da4SYanqin Li    when(f1_tagMismatchVec(i)) {
458043d3da4SYanqin Li      XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n",
459043d3da4SYanqin Li        f1_ptagMatches.asUInt,
460043d3da4SYanqin Li        RegEnable(f0_vtagMatches.asUInt, f0_fwdValid),
461043d3da4SYanqin Li        RegEnable(forward.vaddr, f0_fwdValid),
462043d3da4SYanqin Li        RegEnable(forward.paddr, f0_fwdValid)
463043d3da4SYanqin Li      )
464043d3da4SYanqin Li    }
465043d3da4SYanqin Li    // f1 output
466e04c5f64SYanqin Li    forward.addrInvalid := false.B // addr in ubuffer is always ready
467e04c5f64SYanqin Li    forward.dataInvalid := false.B // data in ubuffer is always ready
468043d3da4SYanqin Li    forward.matchInvalid := f1_tagMismatchVec(i) // paddr / vaddr cam result does not match
469e04c5f64SYanqin Li    for (j <- 0 until VDataBytes) {
470e04c5f64SYanqin Li      forward.forwardMaskFast(j) := f0_fwdMask(j)
471e04c5f64SYanqin Li
472e10e20c6SYanqin Li      forward.forwardData(j) := f1_fwdData(j)
473e04c5f64SYanqin Li      forward.forwardMask(j) := false.B
474e04c5f64SYanqin Li      when(f1_fwdMask(j) && f1_fwdValid) {
475e04c5f64SYanqin Li        forward.forwardMask(j) := true.B
476e04c5f64SYanqin Li      }
477e04c5f64SYanqin Li    }
478e04c5f64SYanqin Li
479e04c5f64SYanqin Li  }
4801f0e2dc7SJiawei Lin
4811f0e2dc7SJiawei Lin
482cfdd605fSYanqin Li  /******************************************************************
483cfdd605fSYanqin Li   * Debug / Performance
484cfdd605fSYanqin Li   ******************************************************************/
485cfdd605fSYanqin Li
486cfdd605fSYanqin Li  /* Debug Counters */
4871f0e2dc7SJiawei Lin  // print all input/output requests for debug purpose
4881f0e2dc7SJiawei Lin  // print req/resp
489935edac4STang Haojin  XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n",
4901f0e2dc7SJiawei Lin    req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask)
491935edac4STang Haojin  XSDebug(resp.fire, "data: %x\n", req.bits.data)
4921f0e2dc7SJiawei Lin  // print tilelink messages
4938b33cd30Sklin02  XSDebug(mem_acquire.valid, "mem_acquire valid, ready=%d ", mem_acquire.ready)
4948b33cd30Sklin02  mem_acquire.bits.dump(mem_acquire.valid)
4958b33cd30Sklin02
4968b33cd30Sklin02  XSDebug(mem_grant.fire, "mem_grant fire ")
4978b33cd30Sklin02  mem_grant.bits.dump(mem_grant.fire)
49837225120Ssfencevma
499cfdd605fSYanqin Li  /* Performance Counters */
500519244c7SYanqin Li  XSPerfAccumulate("uncache_memBackTypeMM", io.lsq.req.fire && io.lsq.req.bits.memBackTypeMM)
501e04c5f64SYanqin Li  XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
502e04c5f64SYanqin Li  XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
503e04c5f64SYanqin Li  XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
504e04c5f64SYanqin Li  XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
505cfdd605fSYanqin Li  XSPerfAccumulate("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire)
50646236761SYanqin Li  XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR)))
507043d3da4SYanqin Li  XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
508cfdd605fSYanqin Li
50937225120Ssfencevma  val perfEvents = Seq(
510e04c5f64SYanqin Li    ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
511e04c5f64SYanqin Li    ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
512e04c5f64SYanqin Li    ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
513e04c5f64SYanqin Li    ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
51446236761SYanqin Li    ("uncache_outstanding", uState =/= s_refill_req && mem_acquire.fire),
51546236761SYanqin Li    ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))),
516043d3da4SYanqin Li    ("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
51737225120Ssfencevma  )
51837225120Ssfencevma
51937225120Ssfencevma  generatePerfEvent()
52037225120Ssfencevma  //  End
5211f0e2dc7SJiawei Lin}
522