xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/Probe.scala (revision 1ca0e4f33f402f31daec0e57d270079d2db13562)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.{TLBundleB, TLEdgeOut, TLMessages, TLPermissions}
23import utils.{HasPerfEvents, HasTLDump, XSDebug, XSPerfAccumulate}
24
25class ProbeReq(implicit p: Parameters) extends DCacheBundle
26{
27  val source = UInt()
28  val opcode = UInt()
29  val addr   = UInt(PAddrBits.W)
30  val vaddr  = UInt(VAddrBits.W) // l2 uses vaddr index to probe l1
31  val param  = UInt(TLPermissions.bdWidth.W)
32  val needData = Bool()
33
34  // probe queue entry ID
35  val id = UInt(log2Up(cfg.nProbeEntries).W)
36
37  def dump() = {
38    XSDebug("ProbeReq source: %d opcode: %d addr: %x param: %d\n",
39      source, opcode, addr, param)
40  }
41}
42
43class ProbeResp(implicit p: Parameters) extends DCacheBundle {
44  // probe queue entry ID
45  val id = UInt(log2Up(cfg.nProbeEntries).W)
46}
47
48class ProbeEntry(implicit p: Parameters) extends DCacheModule {
49  val io = IO(new Bundle {
50    val req = Flipped(Decoupled(new ProbeReq))
51    val pipe_req  = DecoupledIO(new MainPipeReq)
52    val pipe_resp = Input(Valid(new ProbeResp))
53    val lrsc_locked_block = Input(Valid(UInt()))
54    val id = Input(UInt(log2Up(cfg.nProbeEntries).W))
55
56    // the block we are probing
57    val block_addr  = Output(Valid(UInt()))
58  })
59
60  val s_invalid :: s_pipe_req :: s_wait_resp :: Nil = Enum(3)
61
62  val state = RegInit(s_invalid)
63
64  val req = Reg(new ProbeReq)
65
66  // assign default values to signals
67  io.req.ready      := false.B
68  io.pipe_req.valid := false.B
69  io.pipe_req.bits  := DontCare
70
71  io.block_addr.valid := state =/= s_invalid
72  io.block_addr.bits  := req.addr
73
74  when (state =/= s_invalid) {
75    XSDebug("state: %d\n", state)
76  }
77
78  when (state =/= s_invalid) {
79    XSDebug("ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits)
80  }
81
82  when (state === s_invalid) {
83    io.req.ready := true.B
84    when (io.req.fire()) {
85      req := io.req.bits
86      state := s_pipe_req
87    }
88  }
89
90  val lrsc_blocked = Mux(
91    io.req.fire(),
92    io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === io.req.bits.addr,
93    io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr
94  )
95
96  when (state === s_pipe_req) {
97    // Note that probe req will be blocked in the next cycle if a lr updates lrsc_locked_block addr
98    // in this way, we can RegNext(lrsc_blocked) for better timing
99    io.pipe_req.valid := !RegNext(lrsc_blocked)
100
101    val pipe_req = io.pipe_req.bits
102    pipe_req := DontCare
103    pipe_req.miss := false.B
104    pipe_req.probe := true.B
105    pipe_req.probe_param := req.param
106    pipe_req.addr   := req.addr
107    pipe_req.vaddr  := req.vaddr
108    pipe_req.probe_need_data := req.needData
109    pipe_req.id := io.id
110
111    when (io.pipe_req.fire()) {
112      state := s_wait_resp
113    }
114  }
115
116  when (state === s_wait_resp) {
117    when (io.pipe_resp.valid && io.id === io.pipe_resp.bits.id) {
118      state := s_invalid
119    }
120  }
121
122  // perfoemance counters
123  XSPerfAccumulate("probe_req", state === s_invalid && io.req.fire())
124  XSPerfAccumulate("probe_penalty", state =/= s_invalid)
125  XSPerfAccumulate("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr)
126  XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready)
127}
128
129class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents
130{
131  val io = IO(new Bundle {
132    val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle)))
133    val pipe_req  = DecoupledIO(new MainPipeReq)
134    val lrsc_locked_block = Input(Valid(UInt()))
135    val update_resv_set = Input(Bool())
136  })
137
138  val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries))
139
140  // allocate a free entry for incoming request
141  val primary_ready  = Wire(Vec(cfg.nProbeEntries, Bool()))
142  val allocate = primary_ready.asUInt.orR
143  val alloc_idx = PriorityEncoder(primary_ready)
144
145  // translate to inner req
146  val req = Wire(new ProbeReq)
147  val alias_addr_frag = io.mem_probe.bits.data(2, 1) // add extra 2 bits from vaddr to get vindex
148  req.source := io.mem_probe.bits.source
149  req.opcode := io.mem_probe.bits.opcode
150  req.addr := io.mem_probe.bits.address
151  if(DCacheAboveIndexOffset > DCacheTagOffset) {
152    // have alias problem, extra alias bits needed for index
153    req.vaddr := Cat(
154      io.mem_probe.bits.address(PAddrBits - 1, DCacheAboveIndexOffset), // dontcare
155      alias_addr_frag(DCacheAboveIndexOffset - DCacheTagOffset - 1, 0), // index
156      io.mem_probe.bits.address(DCacheTagOffset - 1, 0)                 // index & others
157    )
158  } else { // no alias problem
159    req.vaddr := io.mem_probe.bits.address
160  }
161  req.param := io.mem_probe.bits.param
162  req.needData := io.mem_probe.bits.data(0)
163  req.id := DontCare
164
165  io.mem_probe.ready := allocate
166
167  val entries = (0 until cfg.nProbeEntries) map { i =>
168    val entry = Module(new ProbeEntry)
169    entry.io.id := i.U
170
171    // entry req
172    entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid
173    primary_ready(i)   := entry.io.req.ready
174    entry.io.req.bits  := req
175
176    // pipe_req
177    pipe_req_arb.io.in(i) <> entry.io.pipe_req
178
179    // pipe_resp
180    entry.io.pipe_resp.valid := io.pipe_req.fire()
181    entry.io.pipe_resp.bits.id := io.pipe_req.bits.id
182
183    entry.io.lrsc_locked_block := io.lrsc_locked_block
184
185    entry
186  }
187
188  // delay probe req for 1 cycle
189  val selected_req_valid = RegInit(false.B)
190  val selected_req_bits = RegEnable(pipe_req_arb.io.out.bits, pipe_req_arb.io.out.fire())
191  val selected_lrsc_blocked = Mux(
192    pipe_req_arb.io.out.fire(),
193    io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === pipe_req_arb.io.out.bits.addr,
194    io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === selected_req_bits.addr && selected_req_valid
195  )
196  val resvsetProbeBlock = RegNext(io.update_resv_set || selected_lrsc_blocked)
197  // When we update update_resv_set, block all probe req in the next cycle
198  // It should give Probe reservation set addr compare an independent cycle,
199  // which will lead to better timing
200  pipe_req_arb.io.out.ready := !selected_req_valid || io.pipe_req.fire()
201  io.pipe_req.valid := selected_req_valid && !resvsetProbeBlock
202  io.pipe_req.bits := selected_req_bits
203  when(io.pipe_req.fire()){
204    selected_req_valid := false.B
205  }
206  when(pipe_req_arb.io.out.fire()){
207    selected_req_valid := true.B
208  }
209
210  // print all input/output requests for debug purpose
211  when (io.mem_probe.valid) {
212    // before a probe finishes, L2 should not further issue probes on this block
213    val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.mem_probe.bits.address)).asUInt.orR
214    assert (!probe_conflict)
215    // for now, we can only deal with ProbeBlock
216    assert (io.mem_probe.bits.opcode === TLMessages.Probe)
217  }
218
219  // debug output
220  when (io.mem_probe.fire()) {
221    XSDebug("mem_probe: ")
222    io.mem_probe.bits.dump
223  }
224
225//  when (io.pipe_req.fire()) {
226//    io.pipe_req.bits.dump()
227//  }
228
229  when (io.lrsc_locked_block.valid) {
230    XSDebug("lrsc_locked_block: %x\n", io.lrsc_locked_block.bits)
231  }
232
233  val perfEvents = Seq(
234    ("dcache_probq_req      ", io.pipe_req.fire()                                                                                                                                                                       ),
235    ("dcache_probq_1_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nProbeEntries.U/4.U))                                                                                       ),
236    ("dcache_probq_2_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U/2.U))    ),
237    ("dcache_probq_3_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U*3.U/4.U))),
238    ("dcache_probq_4_4_valid", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U*3.U/4.U))                                                                                   ),
239  )
240  generatePerfEvent()
241}
242