xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/Uncache.scala (revision ccd7d22863b78b44d7bfff564a9abfe770a3f75f)
11f0e2dc7SJiawei Lin/***************************************************************************************
21f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
31f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Peng Cheng Laboratory
41f0e2dc7SJiawei Lin*
51f0e2dc7SJiawei Lin* XiangShan is licensed under Mulan PSL v2.
61f0e2dc7SJiawei Lin* You can use this software according to the terms and conditions of the Mulan PSL v2.
71f0e2dc7SJiawei Lin* You may obtain a copy of Mulan PSL v2 at:
81f0e2dc7SJiawei Lin*          http://license.coscl.org.cn/MulanPSL2
91f0e2dc7SJiawei Lin*
101f0e2dc7SJiawei Lin* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
111f0e2dc7SJiawei Lin* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
121f0e2dc7SJiawei Lin* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
131f0e2dc7SJiawei Lin*
141f0e2dc7SJiawei Lin* See the Mulan PSL v2 for more details.
151f0e2dc7SJiawei Lin***************************************************************************************/
161f0e2dc7SJiawei Lin
171f0e2dc7SJiawei Linpackage xiangshan.cache
181f0e2dc7SJiawei Lin
199e12e8edScz4eimport org.chipsalliance.cde.config.Parameters
201f0e2dc7SJiawei Linimport chisel3._
211f0e2dc7SJiawei Linimport chisel3.util._
2237225120Ssfencevmaimport utils._
233c02ee8fSwakafaimport utility._
241f0e2dc7SJiawei Linimport freechips.rocketchip.diplomacy.{IdRange, LazyModule, LazyModuleImp, TransferSizes}
251f0e2dc7SJiawei Linimport freechips.rocketchip.tilelink.{TLArbiter, TLBundleA, TLBundleD, TLClientNode, TLEdgeOut, TLMasterParameters, TLMasterPortParameters}
269e12e8edScz4eimport xiangshan._
279e12e8edScz4eimport xiangshan.mem._
289e12e8edScz4eimport xiangshan.mem.Bundles._
299e12e8edScz4eimport coupledL2.{MemBackTypeMM, MemBackTypeMMField, MemPageTypeNC, MemPageTypeNCField}
3037225120Ssfencevma
3174050fc0SYanqin Litrait HasUncacheBufferParameters extends HasXSParameter with HasDCacheParameters {
3274050fc0SYanqin Li
3374050fc0SYanqin Li  def doMerge(oldData: UInt, oldMask: UInt, newData:UInt, newMask: UInt):(UInt, UInt) = {
3474050fc0SYanqin Li    val resData = VecInit((0 until DataBytes).map(j =>
3574050fc0SYanqin Li      Mux(newMask(j), newData(8*(j+1)-1, 8*j), oldData(8*(j+1)-1, 8*j))
3674050fc0SYanqin Li    )).asUInt
3774050fc0SYanqin Li    val resMask = newMask | oldMask
3874050fc0SYanqin Li    (resData, resMask)
3974050fc0SYanqin Li  }
4074050fc0SYanqin Li
4174050fc0SYanqin Li  def INDEX_WIDTH = log2Up(UncacheBufferSize)
4274050fc0SYanqin Li  def BLOCK_OFFSET = log2Up(XLEN / 8)
4374050fc0SYanqin Li  def getBlockAddr(x: UInt) = x >> BLOCK_OFFSET
4474050fc0SYanqin Li}
4574050fc0SYanqin Li
4674050fc0SYanqin Liabstract class UncacheBundle(implicit p: Parameters) extends XSBundle with HasUncacheBufferParameters
4774050fc0SYanqin Li
4874050fc0SYanqin Liabstract class UncacheModule(implicit p: Parameters) extends XSModule with HasUncacheBufferParameters
4974050fc0SYanqin Li
5074050fc0SYanqin Li
5137225120Ssfencevmaclass UncacheFlushBundle extends Bundle {
5237225120Ssfencevma  val valid = Output(Bool())
5337225120Ssfencevma  val empty = Input(Bool())
5437225120Ssfencevma}
551f0e2dc7SJiawei Lin
5674050fc0SYanqin Liclass UncacheEntry(implicit p: Parameters) extends UncacheBundle {
57cfdd605fSYanqin Li  val cmd = UInt(M_SZ.W)
58cfdd605fSYanqin Li  val addr = UInt(PAddrBits.W)
59e04c5f64SYanqin Li  val vaddr = UInt(VAddrBits.W)
60cfdd605fSYanqin Li  val data = UInt(XLEN.W)
61e04c5f64SYanqin Li  val mask = UInt(DataBytes.W)
62cfdd605fSYanqin Li  val nc = Bool()
63cfdd605fSYanqin Li  val atomic = Bool()
64519244c7SYanqin Li  val memBackTypeMM = Bool()
651f0e2dc7SJiawei Lin
66cfdd605fSYanqin Li  val resp_nderr = Bool()
671f0e2dc7SJiawei Lin
6846236761SYanqin Li  /* NOTE: if it support the internal forward logic, here can uncomment */
6946236761SYanqin Li  // val fwd_data = UInt(XLEN.W)
7046236761SYanqin Li  // val fwd_mask = UInt(DataBytes.W)
71e04c5f64SYanqin Li
72cfdd605fSYanqin Li  def set(x: UncacheWordReq): Unit = {
73cfdd605fSYanqin Li    cmd := x.cmd
74cfdd605fSYanqin Li    addr := x.addr
75e04c5f64SYanqin Li    vaddr := x.vaddr
76cfdd605fSYanqin Li    data := x.data
77cfdd605fSYanqin Li    mask := x.mask
78cfdd605fSYanqin Li    nc := x.nc
79519244c7SYanqin Li    memBackTypeMM := x.memBackTypeMM
80cfdd605fSYanqin Li    atomic := x.atomic
8158cb1b0bSzhanglinjuan    resp_nderr := false.B
8246236761SYanqin Li    // fwd_data := 0.U
8346236761SYanqin Li    // fwd_mask := 0.U
84cfdd605fSYanqin Li  }
85cfdd605fSYanqin Li
8674050fc0SYanqin Li  def update(x: UncacheWordReq): Unit = {
8774050fc0SYanqin Li    val (resData, resMask) = doMerge(data, mask, x.data, x.mask)
8874050fc0SYanqin Li    // mask -> get the first position as 1 -> for address align
8974050fc0SYanqin Li    val (resOffset, resFlag) = PriorityEncoderWithFlag(resMask)
9074050fc0SYanqin Li    data := resData
9174050fc0SYanqin Li    mask := resMask
9274050fc0SYanqin Li    when(resFlag){
9374050fc0SYanqin Li      addr := (getBlockAddr(addr) << BLOCK_OFFSET) | resOffset
9474050fc0SYanqin Li      vaddr := (getBlockAddr(vaddr) << BLOCK_OFFSET) | resOffset
9574050fc0SYanqin Li    }
9674050fc0SYanqin Li  }
9774050fc0SYanqin Li
98cfdd605fSYanqin Li  def update(x: TLBundleD): Unit = {
9946236761SYanqin Li    when(cmd === MemoryOpConstants.M_XRD) {
10046236761SYanqin Li      data := x.data
10146236761SYanqin Li    }
102db81ab70SYanqin Li    resp_nderr := x.denied || x.corrupt
103cfdd605fSYanqin Li  }
104cfdd605fSYanqin Li
10546236761SYanqin Li  // def update(forwardData: UInt, forwardMask: UInt): Unit = {
10646236761SYanqin Li  //   fwd_data := forwardData
10746236761SYanqin Li  //   fwd_mask := forwardMask
10846236761SYanqin Li  // }
109e04c5f64SYanqin Li
11074050fc0SYanqin Li  def toUncacheWordResp(eid: UInt): UncacheWordResp = {
11146236761SYanqin Li    // val resp_fwd_data = VecInit((0 until DataBytes).map(j =>
11246236761SYanqin Li    //   Mux(fwd_mask(j), fwd_data(8*(j+1)-1, 8*j), data(8*(j+1)-1, 8*j))
11346236761SYanqin Li    // )).asUInt
11446236761SYanqin Li    val resp_fwd_data = data
115cfdd605fSYanqin Li    val r = Wire(new UncacheWordResp)
116cfdd605fSYanqin Li    r := DontCare
117e04c5f64SYanqin Li    r.data := resp_fwd_data
11874050fc0SYanqin Li    r.id := eid
119cfdd605fSYanqin Li    r.nderr := resp_nderr
120cfdd605fSYanqin Li    r.nc := nc
121cfdd605fSYanqin Li    r.is2lq := cmd === MemoryOpConstants.M_XRD
122cfdd605fSYanqin Li    r.miss := false.B
123cfdd605fSYanqin Li    r.replay := false.B
124cfdd605fSYanqin Li    r.tag_error := false.B
125cfdd605fSYanqin Li    r.error := false.B
126cfdd605fSYanqin Li    r
1271f0e2dc7SJiawei Lin  }
1281f0e2dc7SJiawei Lin}
1291f0e2dc7SJiawei Lin
130cfdd605fSYanqin Liclass UncacheEntryState(implicit p: Parameters) extends DCacheBundle {
131cfdd605fSYanqin Li  // valid (-> waitSame) -> inflight -> waitReturn
132cfdd605fSYanqin Li  val valid = Bool()
133cfdd605fSYanqin Li  val inflight = Bool() // uncache -> L2
134cfdd605fSYanqin Li  val waitSame = Bool()
135cfdd605fSYanqin Li  val waitReturn = Bool() // uncache -> LSQ
1361f0e2dc7SJiawei Lin
137cfdd605fSYanqin Li  def init: Unit = {
138cfdd605fSYanqin Li    valid := false.B
139cfdd605fSYanqin Li    inflight := false.B
140cfdd605fSYanqin Li    waitSame := false.B
141cfdd605fSYanqin Li    waitReturn := false.B
1421f0e2dc7SJiawei Lin  }
1431f0e2dc7SJiawei Lin
144cfdd605fSYanqin Li  def isValid(): Bool = valid
14574050fc0SYanqin Li  def isInflight(): Bool = valid && inflight
14674050fc0SYanqin Li  def isWaitReturn(): Bool = valid && waitReturn
14774050fc0SYanqin Li  def isWaitSame(): Bool = valid && waitSame
14874050fc0SYanqin Li  def can2Bus(): Bool = valid && !inflight && !waitSame && !waitReturn
149cfdd605fSYanqin Li  def can2Lsq(): Bool = valid && waitReturn
150d74a7897SYanqin Li  def canMerge(): Bool = valid && !inflight
151d74a7897SYanqin Li  def isFwdOld(): Bool = valid && (inflight || waitReturn)
152d74a7897SYanqin Li  def isFwdNew(): Bool = valid && !inflight && !waitReturn
1531f0e2dc7SJiawei Lin
154cfdd605fSYanqin Li  def setValid(x: Bool): Unit = { valid := x}
155cfdd605fSYanqin Li  def setInflight(x: Bool): Unit = { inflight := x}
156cfdd605fSYanqin Li  def setWaitReturn(x: Bool): Unit = { waitReturn := x }
157cfdd605fSYanqin Li  def setWaitSame(x: Bool): Unit = { waitSame := x}
1581f0e2dc7SJiawei Lin
159cfdd605fSYanqin Li  def updateUncacheResp(): Unit = {
160cfdd605fSYanqin Li    assert(inflight, "The request was not sent and a response was received")
161cfdd605fSYanqin Li    inflight := false.B
162cfdd605fSYanqin Li    waitReturn := true.B
1631f0e2dc7SJiawei Lin  }
164cfdd605fSYanqin Li  def updateReturn(): Unit = {
165cfdd605fSYanqin Li    valid := false.B
166cfdd605fSYanqin Li    inflight := false.B
167cfdd605fSYanqin Li    waitSame := false.B
168cfdd605fSYanqin Li    waitReturn := false.B
1691f0e2dc7SJiawei Lin  }
1701f0e2dc7SJiawei Lin}
1711f0e2dc7SJiawei Lin
1721f0e2dc7SJiawei Linclass UncacheIO(implicit p: Parameters) extends DCacheBundle {
17337225120Ssfencevma  val hartId = Input(UInt())
17437225120Ssfencevma  val enableOutstanding = Input(Bool())
17537225120Ssfencevma  val flush = Flipped(new UncacheFlushBundle)
1766786cfb7SWilliam Wang  val lsq = Flipped(new UncacheWordIO)
177e04c5f64SYanqin Li  val forward = Vec(LoadPipelineWidth, Flipped(new LoadForwardQueryIO))
1781f0e2dc7SJiawei Lin}
1791f0e2dc7SJiawei Lin
1801f0e2dc7SJiawei Lin// convert DCacheIO to TileLink
1811f0e2dc7SJiawei Lin// for Now, we only deal with TL-UL
1821f0e2dc7SJiawei Lin
18337225120Ssfencevmaclass Uncache()(implicit p: Parameters) extends LazyModule with HasXSParameter {
18495e60e55STang Haojin  override def shouldBeInlined: Boolean = false
18537225120Ssfencevma  def idRange: Int = UncacheBufferSize
1861f0e2dc7SJiawei Lin
1871f0e2dc7SJiawei Lin  val clientParameters = TLMasterPortParameters.v1(
1881f0e2dc7SJiawei Lin    clients = Seq(TLMasterParameters.v1(
1891f0e2dc7SJiawei Lin      "uncache",
19037225120Ssfencevma      sourceId = IdRange(0, idRange)
191519244c7SYanqin Li    )),
192519244c7SYanqin Li    requestFields = Seq(MemBackTypeMMField(), MemPageTypeNCField())
1931f0e2dc7SJiawei Lin  )
1941f0e2dc7SJiawei Lin  val clientNode = TLClientNode(Seq(clientParameters))
1951f0e2dc7SJiawei Lin
1961f0e2dc7SJiawei Lin  lazy val module = new UncacheImp(this)
1971f0e2dc7SJiawei Lin}
1981f0e2dc7SJiawei Lin
199cfdd605fSYanqin Li/* Uncache Buffer */
20037225120Ssfencevmaclass UncacheImp(outer: Uncache)extends LazyModuleImp(outer)
2011f0e2dc7SJiawei Lin  with HasTLDump
20237225120Ssfencevma  with HasXSParameter
20374050fc0SYanqin Li  with HasUncacheBufferParameters
20437225120Ssfencevma  with HasPerfEvents
2051f0e2dc7SJiawei Lin{
206cfdd605fSYanqin Li  println(s"Uncahe Buffer Size: $UncacheBufferSize entries")
2071f0e2dc7SJiawei Lin  val io = IO(new UncacheIO)
2081f0e2dc7SJiawei Lin
2091f0e2dc7SJiawei Lin  val (bus, edge) = outer.clientNode.out.head
2101f0e2dc7SJiawei Lin
2111f0e2dc7SJiawei Lin  val req  = io.lsq.req
2121f0e2dc7SJiawei Lin  val resp = io.lsq.resp
2131f0e2dc7SJiawei Lin  val mem_acquire = bus.a
2141f0e2dc7SJiawei Lin  val mem_grant   = bus.d
2151f0e2dc7SJiawei Lin  val req_ready = WireInit(false.B)
2161f0e2dc7SJiawei Lin
2171f0e2dc7SJiawei Lin  // assign default values to output signals
2181f0e2dc7SJiawei Lin  bus.b.ready := false.B
2191f0e2dc7SJiawei Lin  bus.c.valid := false.B
2201f0e2dc7SJiawei Lin  bus.c.bits  := DontCare
2211f0e2dc7SJiawei Lin  bus.d.ready := false.B
2221f0e2dc7SJiawei Lin  bus.e.valid := false.B
2231f0e2dc7SJiawei Lin  bus.e.bits  := DontCare
224cfdd605fSYanqin Li  io.lsq.req.ready := req_ready
22537225120Ssfencevma  io.lsq.resp.valid := false.B
22637225120Ssfencevma  io.lsq.resp.bits := DontCare
2271f0e2dc7SJiawei Lin
22837225120Ssfencevma
229cfdd605fSYanqin Li  /******************************************************************
230cfdd605fSYanqin Li   * Data Structure
231cfdd605fSYanqin Li   ******************************************************************/
23237225120Ssfencevma
233cfdd605fSYanqin Li  val entries = Reg(Vec(UncacheBufferSize, new UncacheEntry))
234cfdd605fSYanqin Li  val states = RegInit(VecInit(Seq.fill(UncacheBufferSize)(0.U.asTypeOf(new UncacheEntryState))))
23574050fc0SYanqin Li  val s_idle :: s_inflight :: s_wait_return :: Nil = Enum(3)
236cfdd605fSYanqin Li  val uState = RegInit(s_idle)
2371f0e2dc7SJiawei Lin
238e04c5f64SYanqin Li  // drain buffer
239e04c5f64SYanqin Li  val empty = Wire(Bool())
240043d3da4SYanqin Li  val f1_needDrain = Wire(Bool())
241*ccd7d228SYanqin Li  val do_uarch_drain = RegInit(false.B)
242*ccd7d228SYanqin Li  when((f1_needDrain || io.flush.valid) && !empty){
243*ccd7d228SYanqin Li    do_uarch_drain := true.B
244*ccd7d228SYanqin Li  }.elsewhen(empty){
245*ccd7d228SYanqin Li    do_uarch_drain := false.B
246*ccd7d228SYanqin Li  }.otherwise{
247*ccd7d228SYanqin Li    do_uarch_drain := false.B
248*ccd7d228SYanqin Li  }
2491f0e2dc7SJiawei Lin
250cfdd605fSYanqin Li  val q0_entry = Wire(new UncacheEntry)
251cfdd605fSYanqin Li  val q0_canSentIdx = Wire(UInt(INDEX_WIDTH.W))
252cfdd605fSYanqin Li  val q0_canSent = Wire(Bool())
253e04c5f64SYanqin Li
254e04c5f64SYanqin Li
255cfdd605fSYanqin Li  /******************************************************************
25674050fc0SYanqin Li   * Functions
25774050fc0SYanqin Li   ******************************************************************/
25874050fc0SYanqin Li  def sizeMap[T <: Data](f: Int => T) = VecInit((0 until UncacheBufferSize).map(f))
25974050fc0SYanqin Li  def sizeForeach[T <: Data](f: Int => Unit) = (0 until UncacheBufferSize).map(f)
26074050fc0SYanqin Li  def isStore(e: UncacheEntry): Bool = e.cmd === MemoryOpConstants.M_XWR
26174050fc0SYanqin Li  def isStore(x: UInt): Bool = x === MemoryOpConstants.M_XWR
26274050fc0SYanqin Li  def addrMatch(x: UncacheEntry, y: UncacheWordReq) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
26374050fc0SYanqin Li  def addrMatch(x: UncacheWordReq, y: UncacheEntry) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
26474050fc0SYanqin Li  def addrMatch(x: UncacheEntry, y: UncacheEntry) : Bool = getBlockAddr(x.addr) === getBlockAddr(y.addr)
26574050fc0SYanqin Li  def addrMatch(x: UInt, y: UInt) : Bool = getBlockAddr(x) === getBlockAddr(y)
26674050fc0SYanqin Li
26774050fc0SYanqin Li  def continueAndAlign(mask: UInt): Bool = {
26874050fc0SYanqin Li    val res =
26974050fc0SYanqin Li      PopCount(mask) === 1.U ||
27074050fc0SYanqin Li      mask === 0b00000011.U ||
27174050fc0SYanqin Li      mask === 0b00001100.U ||
27274050fc0SYanqin Li      mask === 0b00110000.U ||
27374050fc0SYanqin Li      mask === 0b11000000.U ||
27474050fc0SYanqin Li      mask === 0b00001111.U ||
27574050fc0SYanqin Li      mask === 0b11110000.U ||
27674050fc0SYanqin Li      mask === 0b11111111.U
27774050fc0SYanqin Li    res
27874050fc0SYanqin Li  }
27974050fc0SYanqin Li
280d74a7897SYanqin Li  def canMergePrimary(x: UncacheWordReq, e: UncacheEntry, eid: UInt): Bool = {
28174050fc0SYanqin Li    // vaddr same, properties same
28274050fc0SYanqin Li    getBlockAddr(x.vaddr) === getBlockAddr(e.vaddr) &&
28374050fc0SYanqin Li      x.cmd === e.cmd && x.nc && e.nc &&
28474050fc0SYanqin Li      x.memBackTypeMM === e.memBackTypeMM && !x.atomic && !e.atomic &&
285d74a7897SYanqin Li      continueAndAlign(x.mask | e.mask) &&
286d74a7897SYanqin Li    // not receiving uncache response, not waitReturn -> no wake-up signal in these cases
287d74a7897SYanqin Li      !(mem_grant.fire && mem_grant.bits.source === eid || states(eid).isWaitReturn())
28874050fc0SYanqin Li  }
28974050fc0SYanqin Li
29074050fc0SYanqin Li  def canMergeSecondary(eid: UInt): Bool = {
29174050fc0SYanqin Li    // old entry is not inflight and senting
292d74a7897SYanqin Li    states(eid).canMerge() && !(q0_canSent && q0_canSentIdx === eid)
29374050fc0SYanqin Li  }
29474050fc0SYanqin Li
29574050fc0SYanqin Li  /******************************************************************
296cfdd605fSYanqin Li   * uState for non-outstanding
297cfdd605fSYanqin Li   ******************************************************************/
29837225120Ssfencevma
299cfdd605fSYanqin Li  switch(uState){
300cfdd605fSYanqin Li    is(s_idle){
30137225120Ssfencevma      when(mem_acquire.fire){
30274050fc0SYanqin Li        uState := s_inflight
30337225120Ssfencevma      }
30437225120Ssfencevma    }
30574050fc0SYanqin Li    is(s_inflight){
30637225120Ssfencevma      when(mem_grant.fire){
30774050fc0SYanqin Li        uState := s_wait_return
30837225120Ssfencevma      }
30937225120Ssfencevma    }
31074050fc0SYanqin Li    is(s_wait_return){
311cfdd605fSYanqin Li      when(resp.fire){
312cfdd605fSYanqin Li        uState := s_idle
313cfdd605fSYanqin Li      }
31437225120Ssfencevma    }
31537225120Ssfencevma  }
31637225120Ssfencevma
317cfdd605fSYanqin Li
318cfdd605fSYanqin Li  /******************************************************************
319cfdd605fSYanqin Li   * Enter Buffer
320cfdd605fSYanqin Li   *  Version 0 (better timing)
321cfdd605fSYanqin Li   *    e0 judge: alloc/merge write vec
322cfdd605fSYanqin Li   *    e1 alloc
323cfdd605fSYanqin Li   *
324cfdd605fSYanqin Li   *  Version 1 (better performance)
32574050fc0SYanqin Li   *    e0: solved in one cycle for achieving the original performance.
32674050fc0SYanqin Li   *    e1: return idResp to set sid for handshake
327cfdd605fSYanqin Li   ******************************************************************/
328cfdd605fSYanqin Li
32974050fc0SYanqin Li  /* e0: merge/alloc */
330cfdd605fSYanqin Li  val e0_fire = req.fire
331e10e20c6SYanqin Li  val e0_req_valid = req.valid
332cfdd605fSYanqin Li  val e0_req = req.bits
333cfdd605fSYanqin Li
33474050fc0SYanqin Li  val e0_rejectVec = Wire(Vec(UncacheBufferSize, Bool()))
33574050fc0SYanqin Li  val e0_mergeVec = Wire(Vec(UncacheBufferSize, Bool()))
33674050fc0SYanqin Li  val e0_allocWaitSameVec = Wire(Vec(UncacheBufferSize, Bool()))
33774050fc0SYanqin Li  sizeForeach(i => {
33874050fc0SYanqin Li    val valid = e0_req_valid && states(i).isValid()
33974050fc0SYanqin Li    val isAddrMatch = addrMatch(e0_req, entries(i))
340d74a7897SYanqin Li    val canMerge1 = canMergePrimary(e0_req, entries(i), i.U)
34174050fc0SYanqin Li    val canMerge2 = canMergeSecondary(i.U)
34274050fc0SYanqin Li    e0_rejectVec(i) := valid && isAddrMatch && !canMerge1
34374050fc0SYanqin Li    e0_mergeVec(i) := valid && isAddrMatch && canMerge1 && canMerge2
34474050fc0SYanqin Li    e0_allocWaitSameVec(i) := valid && isAddrMatch && canMerge1 && !canMerge2
34574050fc0SYanqin Li  })
34674050fc0SYanqin Li  assert(PopCount(e0_mergeVec) <= 1.U, "Uncache buffer should not merge multiple entries")
34774050fc0SYanqin Li
34874050fc0SYanqin Li  val e0_invalidVec = sizeMap(i => !states(i).isValid())
34974050fc0SYanqin Li  val e0_reject = do_uarch_drain || !e0_invalidVec.asUInt.orR || e0_rejectVec.reduce(_ || _)
35074050fc0SYanqin Li  val (e0_mergeIdx, e0_canMerge) = PriorityEncoderWithFlag(e0_mergeVec)
35174050fc0SYanqin Li  val (e0_allocIdx, e0_canAlloc) = PriorityEncoderWithFlag(e0_invalidVec)
35274050fc0SYanqin Li  val e0_allocWaitSame = e0_allocWaitSameVec.reduce(_ || _)
35374050fc0SYanqin Li  val e0_sid = Mux(e0_canMerge, e0_mergeIdx, e0_allocIdx)
35474050fc0SYanqin Li
35574050fc0SYanqin Li  // e0_fire is used to guarantee that it will not be rejected
35674050fc0SYanqin Li  when(e0_canMerge && e0_fire){
35774050fc0SYanqin Li    entries(e0_mergeIdx).update(e0_req)
35874050fc0SYanqin Li  }.elsewhen(e0_canAlloc && e0_fire){
359e04c5f64SYanqin Li    entries(e0_allocIdx).set(e0_req)
360e04c5f64SYanqin Li    states(e0_allocIdx).setValid(true.B)
36174050fc0SYanqin Li    when(e0_allocWaitSame){
362e04c5f64SYanqin Li      states(e0_allocIdx).setWaitSame(true.B)
363cfdd605fSYanqin Li    }
364cfdd605fSYanqin Li  }
365cfdd605fSYanqin Li
36674050fc0SYanqin Li  req_ready := !e0_reject
36774050fc0SYanqin Li
36874050fc0SYanqin Li  /* e1: return accept */
36974050fc0SYanqin Li  io.lsq.idResp.valid := RegNext(e0_fire)
37074050fc0SYanqin Li  io.lsq.idResp.bits.mid := RegEnable(e0_req.id, e0_fire)
37174050fc0SYanqin Li  io.lsq.idResp.bits.sid := RegEnable(e0_sid, e0_fire)
37274050fc0SYanqin Li  io.lsq.idResp.bits.is2lq := RegEnable(!isStore(e0_req.cmd), e0_fire)
37374050fc0SYanqin Li  io.lsq.idResp.bits.nc := RegEnable(e0_req.nc, e0_fire)
374cfdd605fSYanqin Li
375cfdd605fSYanqin Li  /******************************************************************
376cfdd605fSYanqin Li   * Uncache Req
377cfdd605fSYanqin Li   *  Version 0 (better timing)
378cfdd605fSYanqin Li   *    q0: choose which one is sent
379cfdd605fSYanqin Li   *    q0: sent
380cfdd605fSYanqin Li   *
381cfdd605fSYanqin Li   *  Version 1 (better performance)
382cfdd605fSYanqin Li   *    solved in one cycle for achieving the original performance.
383cfdd605fSYanqin Li   *    NOTE: "Enter Buffer" & "Uncache Req" not a continuous pipeline,
384cfdd605fSYanqin Li   *          because there is no guarantee that mem_aquire will be always ready.
385cfdd605fSYanqin Li   ******************************************************************/
386cfdd605fSYanqin Li
387cfdd605fSYanqin Li  val q0_canSentVec = sizeMap(i =>
38874050fc0SYanqin Li    (io.enableOutstanding || uState === s_idle) &&
38974050fc0SYanqin Li    states(i).can2Bus()
390cfdd605fSYanqin Li  )
391cfdd605fSYanqin Li  val q0_res = PriorityEncoderWithFlag(q0_canSentVec)
392cfdd605fSYanqin Li  q0_canSentIdx := q0_res._1
393cfdd605fSYanqin Li  q0_canSent := q0_res._2
394cfdd605fSYanqin Li  q0_entry := entries(q0_canSentIdx)
395cfdd605fSYanqin Li
396cfdd605fSYanqin Li  val size = PopCount(q0_entry.mask)
397cfdd605fSYanqin Li  val (lgSize, legal) = PriorityMuxWithFlag(Seq(
398cfdd605fSYanqin Li    1.U -> 0.U,
399cfdd605fSYanqin Li    2.U -> 1.U,
400cfdd605fSYanqin Li    4.U -> 2.U,
401cfdd605fSYanqin Li    8.U -> 3.U
402cfdd605fSYanqin Li  ).map(m => (size===m._1) -> m._2))
403cfdd605fSYanqin Li  assert(!(q0_canSent && !legal))
404cfdd605fSYanqin Li
405cfdd605fSYanqin Li  val q0_load = edge.Get(
406cfdd605fSYanqin Li    fromSource      = q0_canSentIdx,
407cfdd605fSYanqin Li    toAddress       = q0_entry.addr,
408cfdd605fSYanqin Li    lgSize          = lgSize
409cfdd605fSYanqin Li  )._2
410cfdd605fSYanqin Li
411cfdd605fSYanqin Li  val q0_store = edge.Put(
412cfdd605fSYanqin Li    fromSource      = q0_canSentIdx,
413cfdd605fSYanqin Li    toAddress       = q0_entry.addr,
414cfdd605fSYanqin Li    lgSize          = lgSize,
415cfdd605fSYanqin Li    data            = q0_entry.data,
416cfdd605fSYanqin Li    mask            = q0_entry.mask
417cfdd605fSYanqin Li  )._2
418cfdd605fSYanqin Li
419cfdd605fSYanqin Li  val q0_isStore = q0_entry.cmd === MemoryOpConstants.M_XWR
420cfdd605fSYanqin Li
421cfdd605fSYanqin Li  mem_acquire.valid := q0_canSent
422cfdd605fSYanqin Li  mem_acquire.bits := Mux(q0_isStore, q0_store, q0_load)
423519244c7SYanqin Li  mem_acquire.bits.user.lift(MemBackTypeMM).foreach(_ := q0_entry.memBackTypeMM)
424519244c7SYanqin Li  mem_acquire.bits.user.lift(MemPageTypeNC).foreach(_ := q0_entry.nc)
425cfdd605fSYanqin Li  when(mem_acquire.fire){
426cfdd605fSYanqin Li    states(q0_canSentIdx).setInflight(true.B)
427cfdd605fSYanqin Li
428cfdd605fSYanqin Li    // q0 should judge whether wait same block
429cfdd605fSYanqin Li    (0 until UncacheBufferSize).map(j =>
430e10e20c6SYanqin Li      when(states(j).isValid() && !states(j).isWaitReturn() && addrMatch(q0_entry, entries(j))){
431cfdd605fSYanqin Li        states(j).setWaitSame(true.B)
432cfdd605fSYanqin Li      }
433cfdd605fSYanqin Li    )
434cfdd605fSYanqin Li  }
435cfdd605fSYanqin Li
436cfdd605fSYanqin Li
437cfdd605fSYanqin Li  /******************************************************************
438cfdd605fSYanqin Li   * Uncache Resp
439cfdd605fSYanqin Li   ******************************************************************/
440cfdd605fSYanqin Li
441cfdd605fSYanqin Li  val (_, _, refill_done, _) = edge.addr_inc(mem_grant)
442cfdd605fSYanqin Li
443cfdd605fSYanqin Li  mem_grant.ready := true.B
444cfdd605fSYanqin Li  when (mem_grant.fire) {
445cfdd605fSYanqin Li    val id = mem_grant.bits.source
446cfdd605fSYanqin Li    entries(id).update(mem_grant.bits)
447cfdd605fSYanqin Li    states(id).updateUncacheResp()
448cfdd605fSYanqin Li    assert(refill_done, "Uncache response should be one beat only!")
449cfdd605fSYanqin Li
450cfdd605fSYanqin Li    // remove state of wait same block
451cfdd605fSYanqin Li    (0 until UncacheBufferSize).map(j =>
452e10e20c6SYanqin Li      when(states(j).isValid() && states(j).isWaitSame() && addrMatch(entries(id), entries(j))){
453cfdd605fSYanqin Li        states(j).setWaitSame(false.B)
454cfdd605fSYanqin Li      }
455cfdd605fSYanqin Li    )
456cfdd605fSYanqin Li  }
457cfdd605fSYanqin Li
458cfdd605fSYanqin Li
459cfdd605fSYanqin Li  /******************************************************************
460cfdd605fSYanqin Li   * Return to LSQ
461cfdd605fSYanqin Li   ******************************************************************/
462cfdd605fSYanqin Li
463cfdd605fSYanqin Li  val r0_canSentVec = sizeMap(i => states(i).can2Lsq())
464cfdd605fSYanqin Li  val (r0_canSentIdx, r0_canSent) = PriorityEncoderWithFlag(r0_canSentVec)
465cfdd605fSYanqin Li  resp.valid := r0_canSent
46674050fc0SYanqin Li  resp.bits := entries(r0_canSentIdx).toUncacheWordResp(r0_canSentIdx)
467cfdd605fSYanqin Li  when(resp.fire){
468cfdd605fSYanqin Li    states(r0_canSentIdx).updateReturn()
469cfdd605fSYanqin Li  }
470cfdd605fSYanqin Li
471cfdd605fSYanqin Li
472cfdd605fSYanqin Li  /******************************************************************
473cfdd605fSYanqin Li   * Buffer Flush
47446236761SYanqin Li   * 1. when io.flush.valid is true: drain store queue and ubuffer
47546236761SYanqin Li   * 2. when io.lsq.req.bits.atomic is true: not support temporarily
476cfdd605fSYanqin Li   ******************************************************************/
477e04c5f64SYanqin Li  empty := !VecInit(states.map(_.isValid())).asUInt.orR
478e04c5f64SYanqin Li  io.flush.empty := empty
479cfdd605fSYanqin Li
480e04c5f64SYanqin Li
481e04c5f64SYanqin Li  /******************************************************************
48274050fc0SYanqin Li   * Load Data Forward to loadunit
48374050fc0SYanqin Li   *  f0: vaddr match, fast resp
48474050fc0SYanqin Li   *  f1: mask & data select, merge; paddr match; resp
48574050fc0SYanqin Li   *      NOTE: forward.paddr from dtlb, which is far from uncache f0
486e04c5f64SYanqin Li   ******************************************************************/
487e04c5f64SYanqin Li
488e04c5f64SYanqin Li  val f0_validMask = sizeMap(i => isStore(entries(i)) && states(i).isValid())
489e04c5f64SYanqin Li  val f0_fwdMaskCandidates = VecInit(entries.map(e => e.mask))
490e04c5f64SYanqin Li  val f0_fwdDataCandidates = VecInit(entries.map(e => e.data))
49174050fc0SYanqin Li  val f1_fwdMaskCandidates = sizeMap(i => RegEnable(entries(i).mask, f0_validMask(i)))
49274050fc0SYanqin Li  val f1_fwdDataCandidates = sizeMap(i => RegEnable(entries(i).data, f0_validMask(i)))
493043d3da4SYanqin Li  val f1_tagMismatchVec = Wire(Vec(LoadPipelineWidth, Bool()))
494043d3da4SYanqin Li  f1_needDrain := f1_tagMismatchVec.asUInt.orR && !empty
495043d3da4SYanqin Li
496043d3da4SYanqin Li  for ((forward, i) <- io.forward.zipWithIndex) {
497043d3da4SYanqin Li    val f0_fwdValid = forward.valid
498043d3da4SYanqin Li    val f1_fwdValid = RegNext(f0_fwdValid)
499043d3da4SYanqin Li
50074050fc0SYanqin Li    /* f0 */
50174050fc0SYanqin Li    // vaddr match
502e10e20c6SYanqin Li    val f0_vtagMatches = sizeMap(w => addrMatch(entries(w).vaddr, forward.vaddr))
503d74a7897SYanqin Li    val f0_flyTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid && states(i).isFwdOld)
504d74a7897SYanqin Li    val f0_idleTagMatches = sizeMap(w => f0_vtagMatches(w) && f0_validMask(w) && f0_fwdValid && states(i).isFwdNew)
50574050fc0SYanqin Li    // ONLY for fast use to get better timing
50674050fc0SYanqin Li    val f0_flyMaskFast = shiftMaskToHigh(
507043d3da4SYanqin Li      forward.vaddr,
50874050fc0SYanqin Li      Mux1H(f0_flyTagMatches, f0_fwdMaskCandidates)
509e04c5f64SYanqin Li    ).asTypeOf(Vec(VDataBytes, Bool()))
51074050fc0SYanqin Li    val f0_idleMaskFast = shiftMaskToHigh(
511043d3da4SYanqin Li      forward.vaddr,
51274050fc0SYanqin Li      Mux1H(f0_idleTagMatches, f0_fwdMaskCandidates)
51374050fc0SYanqin Li    ).asTypeOf(Vec(VDataBytes, Bool()))
514e04c5f64SYanqin Li
51574050fc0SYanqin Li    /* f1 */
51674050fc0SYanqin Li    val f1_flyTagMatches = RegEnable(f0_flyTagMatches, f0_fwdValid)
51774050fc0SYanqin Li    val f1_idleTagMatches = RegEnable(f0_idleTagMatches, f0_fwdValid)
51874050fc0SYanqin Li    val f1_fwdPAddr = RegEnable(forward.paddr, f0_fwdValid)
51974050fc0SYanqin Li    // select
52074050fc0SYanqin Li    val f1_flyMask = Mux1H(f1_flyTagMatches, f1_fwdMaskCandidates)
52174050fc0SYanqin Li    val f1_flyData = Mux1H(f1_flyTagMatches, f1_fwdDataCandidates)
52274050fc0SYanqin Li    val f1_idleMask = Mux1H(f1_idleTagMatches, f1_fwdMaskCandidates)
52374050fc0SYanqin Li    val f1_idleData = Mux1H(f1_idleTagMatches, f1_fwdDataCandidates)
52474050fc0SYanqin Li    // merge old(inflight) and new(idle)
52574050fc0SYanqin Li    val (f1_fwdDataTmp, f1_fwdMaskTmp) = doMerge(f1_flyData, f1_flyMask, f1_idleData, f1_idleMask)
52674050fc0SYanqin Li    val f1_fwdMask = shiftMaskToHigh(f1_fwdPAddr, f1_fwdMaskTmp).asTypeOf(Vec(VDataBytes, Bool()))
52774050fc0SYanqin Li    val f1_fwdData = shiftDataToHigh(f1_fwdPAddr, f1_fwdDataTmp).asTypeOf(Vec(VDataBytes, UInt(8.W)))
52874050fc0SYanqin Li    // paddr match and mismatch judge
52974050fc0SYanqin Li    val f1_ptagMatches = sizeMap(w => addrMatch(RegEnable(entries(w).addr, f0_fwdValid), f1_fwdPAddr))
530e10e20c6SYanqin Li    f1_tagMismatchVec(i) := sizeMap(w =>
531043d3da4SYanqin Li      RegEnable(f0_vtagMatches(w), f0_fwdValid) =/= f1_ptagMatches(w) && RegEnable(f0_validMask(w), f0_fwdValid) && f1_fwdValid
532043d3da4SYanqin Li    ).asUInt.orR
533043d3da4SYanqin Li    when(f1_tagMismatchVec(i)) {
534043d3da4SYanqin Li      XSDebug("forward tag mismatch: pmatch %x vmatch %x vaddr %x paddr %x\n",
535043d3da4SYanqin Li        f1_ptagMatches.asUInt,
536043d3da4SYanqin Li        RegEnable(f0_vtagMatches.asUInt, f0_fwdValid),
537043d3da4SYanqin Li        RegEnable(forward.vaddr, f0_fwdValid),
538043d3da4SYanqin Li        RegEnable(forward.paddr, f0_fwdValid)
539043d3da4SYanqin Li      )
540043d3da4SYanqin Li    }
54174050fc0SYanqin Li    // response
542e04c5f64SYanqin Li    forward.addrInvalid := false.B // addr in ubuffer is always ready
543e04c5f64SYanqin Li    forward.dataInvalid := false.B // data in ubuffer is always ready
544043d3da4SYanqin Li    forward.matchInvalid := f1_tagMismatchVec(i) // paddr / vaddr cam result does not match
545e04c5f64SYanqin Li    for (j <- 0 until VDataBytes) {
54674050fc0SYanqin Li      forward.forwardMaskFast(j) := f0_flyMaskFast(j) || f0_idleMaskFast(j)
547e04c5f64SYanqin Li
548e10e20c6SYanqin Li      forward.forwardData(j) := f1_fwdData(j)
549e04c5f64SYanqin Li      forward.forwardMask(j) := false.B
550e04c5f64SYanqin Li      when(f1_fwdMask(j) && f1_fwdValid) {
551e04c5f64SYanqin Li        forward.forwardMask(j) := true.B
552e04c5f64SYanqin Li      }
553e04c5f64SYanqin Li    }
554e04c5f64SYanqin Li
555e04c5f64SYanqin Li  }
5561f0e2dc7SJiawei Lin
5571f0e2dc7SJiawei Lin
558cfdd605fSYanqin Li  /******************************************************************
559cfdd605fSYanqin Li   * Debug / Performance
560cfdd605fSYanqin Li   ******************************************************************/
561cfdd605fSYanqin Li
562cfdd605fSYanqin Li  /* Debug Counters */
5631f0e2dc7SJiawei Lin  // print all input/output requests for debug purpose
5641f0e2dc7SJiawei Lin  // print req/resp
565935edac4STang Haojin  XSDebug(req.fire, "req cmd: %x addr: %x data: %x mask: %x\n",
5661f0e2dc7SJiawei Lin    req.bits.cmd, req.bits.addr, req.bits.data, req.bits.mask)
567935edac4STang Haojin  XSDebug(resp.fire, "data: %x\n", req.bits.data)
5681f0e2dc7SJiawei Lin  // print tilelink messages
5698b33cd30Sklin02  XSDebug(mem_acquire.valid, "mem_acquire valid, ready=%d ", mem_acquire.ready)
5708b33cd30Sklin02  mem_acquire.bits.dump(mem_acquire.valid)
5718b33cd30Sklin02
5728b33cd30Sklin02  XSDebug(mem_grant.fire, "mem_grant fire ")
5738b33cd30Sklin02  mem_grant.bits.dump(mem_grant.fire)
57437225120Ssfencevma
575cfdd605fSYanqin Li  /* Performance Counters */
57674050fc0SYanqin Li  XSPerfAccumulate("e0_reject", e0_reject && e0_req_valid)
57774050fc0SYanqin Li  XSPerfAccumulate("e0_total_enter", e0_fire)
57874050fc0SYanqin Li  XSPerfAccumulate("e0_merge", e0_fire && e0_canMerge)
57974050fc0SYanqin Li  XSPerfAccumulate("e0_alloc_simple", e0_fire && e0_canAlloc && !e0_allocWaitSame)
58074050fc0SYanqin Li  XSPerfAccumulate("e0_alloc_wait_same", e0_fire && e0_canAlloc && e0_allocWaitSame)
58174050fc0SYanqin Li  XSPerfAccumulate("q0_acquire", q0_canSent)
58274050fc0SYanqin Li  XSPerfAccumulate("q0_acquire_store", q0_canSent && q0_isStore)
58374050fc0SYanqin Li  XSPerfAccumulate("q0_acquire_load", q0_canSent && !q0_isStore)
584519244c7SYanqin Li  XSPerfAccumulate("uncache_memBackTypeMM", io.lsq.req.fire && io.lsq.req.bits.memBackTypeMM)
585e04c5f64SYanqin Li  XSPerfAccumulate("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
586e04c5f64SYanqin Li  XSPerfAccumulate("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc)
587e04c5f64SYanqin Li  XSPerfAccumulate("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
588e04c5f64SYanqin Li  XSPerfAccumulate("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc)
58974050fc0SYanqin Li  XSPerfAccumulate("uncache_outstanding", uState =/= s_idle && mem_acquire.fire)
59046236761SYanqin Li  XSPerfAccumulate("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR)))
591043d3da4SYanqin Li  XSPerfAccumulate("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
592cfdd605fSYanqin Li
59337225120Ssfencevma  val perfEvents = Seq(
594e04c5f64SYanqin Li    ("uncache_mmio_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
595e04c5f64SYanqin Li    ("uncache_mmio_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && !io.lsq.req.bits.nc),
596e04c5f64SYanqin Li    ("uncache_nc_store", io.lsq.req.fire && isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
597e04c5f64SYanqin Li    ("uncache_nc_load", io.lsq.req.fire && !isStore(io.lsq.req.bits.cmd) && io.lsq.req.bits.nc),
59874050fc0SYanqin Li    ("uncache_outstanding", uState =/= s_idle && mem_acquire.fire),
59946236761SYanqin Li    ("forward_count", PopCount(io.forward.map(_.forwardMask.asUInt.orR))),
600043d3da4SYanqin Li    ("forward_vaddr_match_failed", PopCount(f1_tagMismatchVec))
60137225120Ssfencevma  )
60237225120Ssfencevma
60337225120Ssfencevma  generatePerfEvent()
60437225120Ssfencevma  //  End
6051f0e2dc7SJiawei Lin}
606