xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/Probe.scala (revision 8b33cd30e0034914b58520e0dc3c0c4b1aad6a03)
11f0e2dc7SJiawei Lin/***************************************************************************************
21f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
31f0e2dc7SJiawei Lin* Copyright (c) 2020-2021 Peng Cheng Laboratory
41f0e2dc7SJiawei Lin*
51f0e2dc7SJiawei Lin* XiangShan is licensed under Mulan PSL v2.
61f0e2dc7SJiawei Lin* You can use this software according to the terms and conditions of the Mulan PSL v2.
71f0e2dc7SJiawei Lin* You may obtain a copy of Mulan PSL v2 at:
81f0e2dc7SJiawei Lin*          http://license.coscl.org.cn/MulanPSL2
91f0e2dc7SJiawei Lin*
101f0e2dc7SJiawei Lin* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
111f0e2dc7SJiawei Lin* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
121f0e2dc7SJiawei Lin* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
131f0e2dc7SJiawei Lin*
141f0e2dc7SJiawei Lin* See the Mulan PSL v2 for more details.
151f0e2dc7SJiawei Lin***************************************************************************************/
161f0e2dc7SJiawei Lin
171f0e2dc7SJiawei Linpackage xiangshan.cache
181f0e2dc7SJiawei Lin
198891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters
201f0e2dc7SJiawei Linimport chisel3._
211f0e2dc7SJiawei Linimport chisel3.util._
221ca0e4f3SYinan Xuimport freechips.rocketchip.tilelink.{TLBundleB, TLEdgeOut, TLMessages, TLPermissions}
2344f2941bSJiru Sunimport utils.HasTLDump
2444f2941bSJiru Sunimport utility.{XSDebug, XSPerfAccumulate, HasPerfEvents}
251f0e2dc7SJiawei Lin
261f0e2dc7SJiawei Linclass ProbeReq(implicit p: Parameters) extends DCacheBundle
271f0e2dc7SJiawei Lin{
281f0e2dc7SJiawei Lin  val source = UInt()
291f0e2dc7SJiawei Lin  val opcode = UInt()
301f0e2dc7SJiawei Lin  val addr   = UInt(PAddrBits.W)
31f4d8d00eSWilliam Wang  val vaddr  = UInt(VAddrBits.W) // l2 uses vaddr index to probe l1
321f0e2dc7SJiawei Lin  val param  = UInt(TLPermissions.bdWidth.W)
331f0e2dc7SJiawei Lin  val needData = Bool()
341f0e2dc7SJiawei Lin
35f4d8d00eSWilliam Wang  // probe queue entry ID
36f4d8d00eSWilliam Wang  val id = UInt(log2Up(cfg.nProbeEntries).W)
37f4d8d00eSWilliam Wang
38*8b33cd30Sklin02  def dump(cond: Bool) = {
39*8b33cd30Sklin02    XSDebug(cond, "ProbeReq source: %d opcode: %d addr: %x param: %d\n",
401f0e2dc7SJiawei Lin      source, opcode, addr, param)
411f0e2dc7SJiawei Lin  }
421f0e2dc7SJiawei Lin}
431f0e2dc7SJiawei Lin
44f4d8d00eSWilliam Wangclass ProbeResp(implicit p: Parameters) extends DCacheBundle {
45f4d8d00eSWilliam Wang  // probe queue entry ID
46f4d8d00eSWilliam Wang  val id = UInt(log2Up(cfg.nProbeEntries).W)
47f4d8d00eSWilliam Wang}
48f4d8d00eSWilliam Wang
491f0e2dc7SJiawei Linclass ProbeEntry(implicit p: Parameters) extends DCacheModule {
501f0e2dc7SJiawei Lin  val io = IO(new Bundle {
511f0e2dc7SJiawei Lin    val req = Flipped(Decoupled(new ProbeReq))
521f0e2dc7SJiawei Lin    val pipe_req  = DecoupledIO(new MainPipeReq)
53f4d8d00eSWilliam Wang    val pipe_resp = Input(Valid(new ProbeResp))
541f0e2dc7SJiawei Lin    val lrsc_locked_block = Input(Valid(UInt()))
55f4d8d00eSWilliam Wang    val id = Input(UInt(log2Up(cfg.nProbeEntries).W))
561f0e2dc7SJiawei Lin
571f0e2dc7SJiawei Lin    // the block we are probing
581f0e2dc7SJiawei Lin    val block_addr  = Output(Valid(UInt()))
591f0e2dc7SJiawei Lin  })
601f0e2dc7SJiawei Lin
61f4d8d00eSWilliam Wang  val s_invalid :: s_pipe_req :: s_wait_resp :: Nil = Enum(3)
621f0e2dc7SJiawei Lin
631f0e2dc7SJiawei Lin  val state = RegInit(s_invalid)
641f0e2dc7SJiawei Lin
651f0e2dc7SJiawei Lin  val req = Reg(new ProbeReq)
661f0e2dc7SJiawei Lin
671f0e2dc7SJiawei Lin  // assign default values to signals
681f0e2dc7SJiawei Lin  io.req.ready      := false.B
691f0e2dc7SJiawei Lin  io.pipe_req.valid := false.B
701f0e2dc7SJiawei Lin  io.pipe_req.bits  := DontCare
711f0e2dc7SJiawei Lin
721f0e2dc7SJiawei Lin  io.block_addr.valid := state =/= s_invalid
731f0e2dc7SJiawei Lin  io.block_addr.bits  := req.addr
741f0e2dc7SJiawei Lin
75*8b33cd30Sklin02  XSDebug(state =/= s_invalid, "state: %d\n", state)
761f0e2dc7SJiawei Lin
77*8b33cd30Sklin02  XSDebug(state =/= s_invalid, "ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits)
781f0e2dc7SJiawei Lin
791f0e2dc7SJiawei Lin  when (state === s_invalid) {
801f0e2dc7SJiawei Lin    io.req.ready := true.B
81935edac4STang Haojin    when (io.req.fire) {
821f0e2dc7SJiawei Lin      req := io.req.bits
831f0e2dc7SJiawei Lin      state := s_pipe_req
841f0e2dc7SJiawei Lin    }
851f0e2dc7SJiawei Lin  }
861f0e2dc7SJiawei Lin
87f4d8d00eSWilliam Wang  val lrsc_blocked = Mux(
88935edac4STang Haojin    io.req.fire,
896b6d88e6SWilliam Wang    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(io.req.bits.addr),
906b6d88e6SWilliam Wang    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr)
91f4d8d00eSWilliam Wang  )
92f4d8d00eSWilliam Wang
931f0e2dc7SJiawei Lin  when (state === s_pipe_req) {
94300ded30SWilliam Wang    // Note that probe req will be blocked in the next cycle if a lr updates lrsc_locked_block addr
95300ded30SWilliam Wang    // in this way, we can RegNext(lrsc_blocked) for better timing
96300ded30SWilliam Wang    io.pipe_req.valid := !RegNext(lrsc_blocked)
971f0e2dc7SJiawei Lin
981f0e2dc7SJiawei Lin    val pipe_req = io.pipe_req.bits
991f0e2dc7SJiawei Lin    pipe_req := DontCare
1001f0e2dc7SJiawei Lin    pipe_req.miss := false.B
1011f0e2dc7SJiawei Lin    pipe_req.probe := true.B
1021f0e2dc7SJiawei Lin    pipe_req.probe_param := req.param
1031f0e2dc7SJiawei Lin    pipe_req.addr   := req.addr
1041f0e2dc7SJiawei Lin    pipe_req.vaddr  := req.vaddr
1051f0e2dc7SJiawei Lin    pipe_req.probe_need_data := req.needData
1060f59c834SWilliam Wang    pipe_req.error := false.B
107f4d8d00eSWilliam Wang    pipe_req.id := io.id
1081f0e2dc7SJiawei Lin
109935edac4STang Haojin    when (io.pipe_req.fire) {
110f4d8d00eSWilliam Wang      state := s_wait_resp
111f4d8d00eSWilliam Wang    }
112f4d8d00eSWilliam Wang  }
113f4d8d00eSWilliam Wang
114f4d8d00eSWilliam Wang  when (state === s_wait_resp) {
115f4d8d00eSWilliam Wang    when (io.pipe_resp.valid && io.id === io.pipe_resp.bits.id) {
1161f0e2dc7SJiawei Lin      state := s_invalid
1171f0e2dc7SJiawei Lin    }
1181f0e2dc7SJiawei Lin  }
1191f0e2dc7SJiawei Lin
1201f0e2dc7SJiawei Lin  // perfoemance counters
121935edac4STang Haojin  XSPerfAccumulate("probe_req", state === s_invalid && io.req.fire)
1221f0e2dc7SJiawei Lin  XSPerfAccumulate("probe_penalty", state =/= s_invalid)
1236b6d88e6SWilliam Wang  XSPerfAccumulate("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr))
1241f0e2dc7SJiawei Lin  XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready)
1251f0e2dc7SJiawei Lin}
1261f0e2dc7SJiawei Lin
1271ca0e4f3SYinan Xuclass ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents
1281f0e2dc7SJiawei Lin{
1291f0e2dc7SJiawei Lin  val io = IO(new Bundle {
1301f0e2dc7SJiawei Lin    val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle)))
1311f0e2dc7SJiawei Lin    val pipe_req  = DecoupledIO(new MainPipeReq)
1321f0e2dc7SJiawei Lin    val lrsc_locked_block = Input(Valid(UInt()))
133300ded30SWilliam Wang    val update_resv_set = Input(Bool())
1341f0e2dc7SJiawei Lin  })
1351f0e2dc7SJiawei Lin
136300ded30SWilliam Wang  val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries))
1371f0e2dc7SJiawei Lin
1381f0e2dc7SJiawei Lin  // allocate a free entry for incoming request
1391f0e2dc7SJiawei Lin  val primary_ready  = Wire(Vec(cfg.nProbeEntries, Bool()))
1401f0e2dc7SJiawei Lin  val allocate = primary_ready.asUInt.orR
1411f0e2dc7SJiawei Lin  val alloc_idx = PriorityEncoder(primary_ready)
1421f0e2dc7SJiawei Lin
1431f0e2dc7SJiawei Lin  // translate to inner req
1441f0e2dc7SJiawei Lin  val req = Wire(new ProbeReq)
1451f0e2dc7SJiawei Lin  val alias_addr_frag = io.mem_probe.bits.data(2, 1) // add extra 2 bits from vaddr to get vindex
1461f0e2dc7SJiawei Lin  req.source := io.mem_probe.bits.source
1471f0e2dc7SJiawei Lin  req.opcode := io.mem_probe.bits.opcode
1481f0e2dc7SJiawei Lin  req.addr := io.mem_probe.bits.address
1491f0e2dc7SJiawei Lin  if(DCacheAboveIndexOffset > DCacheTagOffset) {
1501f0e2dc7SJiawei Lin    // have alias problem, extra alias bits needed for index
1511f0e2dc7SJiawei Lin    req.vaddr := Cat(
1522f30d658SYinan Xu      io.mem_probe.bits.address(PAddrBits - 1, DCacheAboveIndexOffset), // dontcare
1531f0e2dc7SJiawei Lin      alias_addr_frag(DCacheAboveIndexOffset - DCacheTagOffset - 1, 0), // index
1541f0e2dc7SJiawei Lin      io.mem_probe.bits.address(DCacheTagOffset - 1, 0)                 // index & others
1551f0e2dc7SJiawei Lin    )
1561f0e2dc7SJiawei Lin  } else { // no alias problem
1571f0e2dc7SJiawei Lin    req.vaddr := io.mem_probe.bits.address
1581f0e2dc7SJiawei Lin  }
1591f0e2dc7SJiawei Lin  req.param := io.mem_probe.bits.param
1601f0e2dc7SJiawei Lin  req.needData := io.mem_probe.bits.data(0)
161f4d8d00eSWilliam Wang  req.id := DontCare
1621f0e2dc7SJiawei Lin
1631f0e2dc7SJiawei Lin  io.mem_probe.ready := allocate
1641f0e2dc7SJiawei Lin
1651f0e2dc7SJiawei Lin  val entries = (0 until cfg.nProbeEntries) map { i =>
1661f0e2dc7SJiawei Lin    val entry = Module(new ProbeEntry)
167f4d8d00eSWilliam Wang    entry.io.id := i.U
1681f0e2dc7SJiawei Lin
1691f0e2dc7SJiawei Lin    // entry req
1701f0e2dc7SJiawei Lin    entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid
1711f0e2dc7SJiawei Lin    primary_ready(i)   := entry.io.req.ready
1721f0e2dc7SJiawei Lin    entry.io.req.bits  := req
1731f0e2dc7SJiawei Lin
1741f0e2dc7SJiawei Lin    // pipe_req
1751f0e2dc7SJiawei Lin    pipe_req_arb.io.in(i) <> entry.io.pipe_req
1761f0e2dc7SJiawei Lin
177f4d8d00eSWilliam Wang    // pipe_resp
178935edac4STang Haojin    entry.io.pipe_resp.valid := io.pipe_req.fire
179f4d8d00eSWilliam Wang    entry.io.pipe_resp.bits.id := io.pipe_req.bits.id
180f4d8d00eSWilliam Wang
1811f0e2dc7SJiawei Lin    entry.io.lrsc_locked_block := io.lrsc_locked_block
1821f0e2dc7SJiawei Lin
1831f0e2dc7SJiawei Lin    entry
1841f0e2dc7SJiawei Lin  }
1851f0e2dc7SJiawei Lin
186f4d8d00eSWilliam Wang  // delay probe req for 1 cycle
18753e88463SWilliam Wang  val selected_req_valid = RegInit(false.B)
188935edac4STang Haojin  val selected_req_bits = RegEnable(pipe_req_arb.io.out.bits, pipe_req_arb.io.out.fire)
189f4d8d00eSWilliam Wang  val selected_lrsc_blocked = Mux(
190935edac4STang Haojin    pipe_req_arb.io.out.fire,
1916b6d88e6SWilliam Wang    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(pipe_req_arb.io.out.bits.addr),
1926b6d88e6SWilliam Wang    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(selected_req_bits.addr) && selected_req_valid
193f4d8d00eSWilliam Wang  )
194f4d8d00eSWilliam Wang  val resvsetProbeBlock = RegNext(io.update_resv_set || selected_lrsc_blocked)
195300ded30SWilliam Wang  // When we update update_resv_set, block all probe req in the next cycle
196300ded30SWilliam Wang  // It should give Probe reservation set addr compare an independent cycle,
197300ded30SWilliam Wang  // which will lead to better timing
198935edac4STang Haojin  pipe_req_arb.io.out.ready := !selected_req_valid || io.pipe_req.fire
19953e88463SWilliam Wang  io.pipe_req.valid := selected_req_valid && !resvsetProbeBlock
20053e88463SWilliam Wang  io.pipe_req.bits := selected_req_bits
201935edac4STang Haojin  when(io.pipe_req.fire){
20253e88463SWilliam Wang    selected_req_valid := false.B
20353e88463SWilliam Wang  }
204935edac4STang Haojin  when(pipe_req_arb.io.out.fire){
20553e88463SWilliam Wang    selected_req_valid := true.B
206300ded30SWilliam Wang  }
2071f0e2dc7SJiawei Lin
2081f0e2dc7SJiawei Lin  // print all input/output requests for debug purpose
2091f0e2dc7SJiawei Lin  when (io.mem_probe.valid) {
2101f0e2dc7SJiawei Lin    // before a probe finishes, L2 should not further issue probes on this block
2116b6d88e6SWilliam Wang    val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && get_block(e.io.block_addr.bits) === get_block(io.mem_probe.bits.address))).asUInt.orR
2121f0e2dc7SJiawei Lin    assert (!probe_conflict)
2131f0e2dc7SJiawei Lin    // for now, we can only deal with ProbeBlock
2141f0e2dc7SJiawei Lin    assert (io.mem_probe.bits.opcode === TLMessages.Probe)
2151f0e2dc7SJiawei Lin  }
2161f0e2dc7SJiawei Lin
2171f0e2dc7SJiawei Lin  // debug output
218*8b33cd30Sklin02  XSDebug(io.mem_probe.fire, "mem_probe: ")
219*8b33cd30Sklin02  io.mem_probe.bits.dump(io.mem_probe.fire)
2201f0e2dc7SJiawei Lin
221*8b33cd30Sklin02// io.pipe_req.bits.dump(io.pipe_req.fire)
2221f0e2dc7SJiawei Lin
223*8b33cd30Sklin02  XSDebug(io.lrsc_locked_block.valid, "lrsc_locked_block: %x\n", io.lrsc_locked_block.bits)
224d2b20d1aSTang Haojin  XSPerfAccumulate("ProbeL1DCache", io.mem_probe.fire)
2251ca0e4f3SYinan Xu
226b6d53cefSWilliam Wang  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
227cd365d4cSrvcoresjw  val perfEvents = Seq(
228935edac4STang Haojin    ("dcache_probq_req      ", io.pipe_req.fire),
229b6d53cefSWilliam Wang    ("dcache_probq_1_4_valid", (perfValidCount < (cfg.nProbeEntries.U/4.U))),
230b6d53cefSWilliam Wang    ("dcache_probq_2_4_valid", (perfValidCount > (cfg.nProbeEntries.U/4.U)) & (perfValidCount <= (cfg.nProbeEntries.U/2.U))),
231b6d53cefSWilliam Wang    ("dcache_probq_3_4_valid", (perfValidCount > (cfg.nProbeEntries.U/2.U)) & (perfValidCount <= (cfg.nProbeEntries.U*3.U/4.U))),
232b6d53cefSWilliam Wang    ("dcache_probq_4_4_valid", (perfValidCount > (cfg.nProbeEntries.U*3.U/4.U))),
233cd365d4cSrvcoresjw  )
2341ca0e4f3SYinan Xu  generatePerfEvent()
2351f0e2dc7SJiawei Lin}
236