xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/Probe.scala (revision d6477c69bc3348d63058f8f4cebbf80cad7ca1e0)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import chipsalliance.rocketchip.config.Parameters
20import chisel3._
21import chisel3.util._
22
23import freechips.rocketchip.tilelink.{TLEdgeOut, TLBundleB, TLMessages, TLPermissions}
24
25import utils.{HasTLDump, XSDebug, XSPerfAccumulate, PerfEventsBundle, PipelineConnect}
26
27class ProbeReq(implicit p: Parameters) extends DCacheBundle
28{
29  val source = UInt()
30  val opcode = UInt()
31  val addr   = UInt(PAddrBits.W)
32  val vaddr  = UInt(VAddrBits.W) // l2 uses vaddr index to probe l1
33  val param  = UInt(TLPermissions.bdWidth.W)
34  val needData = Bool()
35
36  // probe queue entry ID
37  val id = UInt(log2Up(cfg.nProbeEntries).W)
38
39  def dump() = {
40    XSDebug("ProbeReq source: %d opcode: %d addr: %x param: %d\n",
41      source, opcode, addr, param)
42  }
43}
44
45class ProbeResp(implicit p: Parameters) extends DCacheBundle {
46  // probe queue entry ID
47  val id = UInt(log2Up(cfg.nProbeEntries).W)
48}
49
50class ProbeEntry(implicit p: Parameters) extends DCacheModule {
51  val io = IO(new Bundle {
52    val req = Flipped(Decoupled(new ProbeReq))
53    val pipe_req  = DecoupledIO(new MainPipeReq)
54    val pipe_resp = Input(Valid(new ProbeResp))
55    val lrsc_locked_block = Input(Valid(UInt()))
56    val id = Input(UInt(log2Up(cfg.nProbeEntries).W))
57
58    // the block we are probing
59    val block_addr  = Output(Valid(UInt()))
60  })
61
62  val s_invalid :: s_pipe_req :: s_wait_resp :: Nil = Enum(3)
63
64  val state = RegInit(s_invalid)
65
66  val req = Reg(new ProbeReq)
67
68  // assign default values to signals
69  io.req.ready      := false.B
70  io.pipe_req.valid := false.B
71  io.pipe_req.bits  := DontCare
72
73  io.block_addr.valid := state =/= s_invalid
74  io.block_addr.bits  := req.addr
75
76  when (state =/= s_invalid) {
77    XSDebug("state: %d\n", state)
78  }
79
80  when (state =/= s_invalid) {
81    XSDebug("ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits)
82  }
83
84  when (state === s_invalid) {
85    io.req.ready := true.B
86    when (io.req.fire()) {
87      req := io.req.bits
88      state := s_pipe_req
89    }
90  }
91
92  val lrsc_blocked = Mux(
93    io.req.fire(),
94    io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === io.req.bits.addr,
95    io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr
96  )
97
98  when (state === s_pipe_req) {
99    // Note that probe req will be blocked in the next cycle if a lr updates lrsc_locked_block addr
100    // in this way, we can RegNext(lrsc_blocked) for better timing
101    io.pipe_req.valid := !RegNext(lrsc_blocked)
102
103    val pipe_req = io.pipe_req.bits
104    pipe_req := DontCare
105    pipe_req.miss := false.B
106    pipe_req.probe := true.B
107    pipe_req.probe_param := req.param
108    pipe_req.addr   := req.addr
109    pipe_req.vaddr  := req.vaddr
110    pipe_req.probe_need_data := req.needData
111    pipe_req.id := io.id
112
113    when (io.pipe_req.fire()) {
114      state := s_wait_resp
115    }
116  }
117
118  when (state === s_wait_resp) {
119    when (io.pipe_resp.valid && io.id === io.pipe_resp.bits.id) {
120      state := s_invalid
121    }
122  }
123
124  // perfoemance counters
125  XSPerfAccumulate("probe_req", state === s_invalid && io.req.fire())
126  XSPerfAccumulate("probe_penalty", state =/= s_invalid)
127  XSPerfAccumulate("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === req.addr)
128  XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready)
129}
130
131class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump
132{
133  val io = IO(new Bundle {
134    val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle)))
135    val pipe_req  = DecoupledIO(new MainPipeReq)
136    val lrsc_locked_block = Input(Valid(UInt()))
137    val update_resv_set = Input(Bool())
138  })
139
140  val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries))
141
142  // allocate a free entry for incoming request
143  val primary_ready  = Wire(Vec(cfg.nProbeEntries, Bool()))
144  val allocate = primary_ready.asUInt.orR
145  val alloc_idx = PriorityEncoder(primary_ready)
146
147  // translate to inner req
148  val req = Wire(new ProbeReq)
149  val alias_addr_frag = io.mem_probe.bits.data(2, 1) // add extra 2 bits from vaddr to get vindex
150  req.source := io.mem_probe.bits.source
151  req.opcode := io.mem_probe.bits.opcode
152  req.addr := io.mem_probe.bits.address
153  if(DCacheAboveIndexOffset > DCacheTagOffset) {
154    // have alias problem, extra alias bits needed for index
155    req.vaddr := Cat(
156      io.mem_probe.bits.address(PAddrBits - 1, DCacheAboveIndexOffset), // dontcare
157      alias_addr_frag(DCacheAboveIndexOffset - DCacheTagOffset - 1, 0), // index
158      io.mem_probe.bits.address(DCacheTagOffset - 1, 0)                 // index & others
159    )
160  } else { // no alias problem
161    req.vaddr := io.mem_probe.bits.address
162  }
163  req.param := io.mem_probe.bits.param
164  req.needData := io.mem_probe.bits.data(0)
165  req.id := DontCare
166
167  io.mem_probe.ready := allocate
168
169  val entries = (0 until cfg.nProbeEntries) map { i =>
170    val entry = Module(new ProbeEntry)
171    entry.io.id := i.U
172
173    // entry req
174    entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid
175    primary_ready(i)   := entry.io.req.ready
176    entry.io.req.bits  := req
177
178    // pipe_req
179    pipe_req_arb.io.in(i) <> entry.io.pipe_req
180
181    // pipe_resp
182    entry.io.pipe_resp.valid := io.pipe_req.fire()
183    entry.io.pipe_resp.bits.id := io.pipe_req.bits.id
184
185    entry.io.lrsc_locked_block := io.lrsc_locked_block
186
187    entry
188  }
189
190  // delay probe req for 1 cycle
191  val selected_lrsc_blocked = Mux(
192    pipe_req_arb.io.out.fire(),
193    io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === pipe_req_arb.io.out.bits.addr,
194    io.lrsc_locked_block.valid && io.lrsc_locked_block.bits === io.pipe_req.bits.addr && io.pipe_req.valid
195  )
196  val resvsetProbeBlock = RegNext(io.update_resv_set || selected_lrsc_blocked)
197  PipelineConnect(pipe_req_arb.io.out, io.pipe_req, io.pipe_req.fire() && !resvsetProbeBlock, false.B, resvsetProbeBlock)
198  // When we update update_resv_set, block all probe req in the next cycle
199  // It should give Probe reservation set addr compare an independent cycle,
200  // which will lead to better timing
201  when(resvsetProbeBlock){
202    io.pipe_req.valid := false.B
203  }
204
205  // print all input/output requests for debug purpose
206  when (io.mem_probe.valid) {
207    // before a probe finishes, L2 should not further issue probes on this block
208    val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && e.io.block_addr.bits === io.mem_probe.bits.address)).asUInt.orR
209    assert (!probe_conflict)
210    // for now, we can only deal with ProbeBlock
211    assert (io.mem_probe.bits.opcode === TLMessages.Probe)
212  }
213
214  // debug output
215  when (io.mem_probe.fire()) {
216    XSDebug("mem_probe: ")
217    io.mem_probe.bits.dump
218  }
219
220//  when (io.pipe_req.fire()) {
221//    io.pipe_req.bits.dump()
222//  }
223
224  when (io.lrsc_locked_block.valid) {
225    XSDebug("lrsc_locked_block: %x\n", io.lrsc_locked_block.bits)
226  }
227  val perfinfo = IO(new Bundle(){
228    val perfEvents = Output(new PerfEventsBundle(5))
229  })
230  val perfEvents = Seq(
231    ("dcache_probq_req          ", io.pipe_req.fire()                                                                                                                                                                       ),
232    ("dcache_probq_1/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) < (cfg.nProbeEntries.U/4.U))                                                                                       ),
233    ("dcache_probq_2/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/4.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U/2.U))    ),
234    ("dcache_probq_3/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U/2.U)) & (PopCount(entries.map(e => e.io.block_addr.valid)) <= (cfg.nProbeEntries.U*3.U/4.U))),
235    ("dcache_probq_4/4_valid    ", (PopCount(entries.map(e => e.io.block_addr.valid)) > (cfg.nProbeEntries.U*3.U/4.U))                                                                                   ),
236  )
237
238  for (((perf_out,(perf_name,perf)),i) <- perfinfo.perfEvents.perf_events.zip(perfEvents).zipWithIndex) {
239    perf_out.incr_step := RegNext(perf)
240  }
241}
242