xref: /XiangShan/src/main/scala/xiangshan/cache/dcache/mainpipe/Probe.scala (revision bb2f3f51dd67f6e16e0cc1ffe43368c9fc7e4aef)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.{TLBundleB, TLEdgeOut, TLMessages, TLPermissions}
23import utils.{HasPerfEvents, HasTLDump}
24import utility.{XSDebug, XSPerfAccumulate}
25
26class ProbeReq(implicit p: Parameters) extends DCacheBundle
27{
28  val source = UInt()
29  val opcode = UInt()
30  val addr   = UInt(PAddrBits.W)
31  val vaddr  = UInt(VAddrBits.W) // l2 uses vaddr index to probe l1
32  val param  = UInt(TLPermissions.bdWidth.W)
33  val needData = Bool()
34
35  // probe queue entry ID
36  val id = UInt(log2Up(cfg.nProbeEntries).W)
37
38  def dump() = {
39    XSDebug("ProbeReq source: %d opcode: %d addr: %x param: %d\n",
40      source, opcode, addr, param)
41  }
42}
43
44class ProbeResp(implicit p: Parameters) extends DCacheBundle {
45  // probe queue entry ID
46  val id = UInt(log2Up(cfg.nProbeEntries).W)
47}
48
49class ProbeEntry(implicit p: Parameters) extends DCacheModule {
50  val io = IO(new Bundle {
51    val req = Flipped(Decoupled(new ProbeReq))
52    val pipe_req  = DecoupledIO(new MainPipeReq)
53    val pipe_resp = Input(Valid(new ProbeResp))
54    val lrsc_locked_block = Input(Valid(UInt()))
55    val id = Input(UInt(log2Up(cfg.nProbeEntries).W))
56
57    // the block we are probing
58    val block_addr  = Output(Valid(UInt()))
59  })
60
61  val s_invalid :: s_pipe_req :: s_wait_resp :: Nil = Enum(3)
62
63  val state = RegInit(s_invalid)
64
65  val req = Reg(new ProbeReq)
66
67  // assign default values to signals
68  io.req.ready      := false.B
69  io.pipe_req.valid := false.B
70  io.pipe_req.bits  := DontCare
71
72  io.block_addr.valid := state =/= s_invalid
73  io.block_addr.bits  := req.addr
74
75  when (state =/= s_invalid) {
76    XSDebug("state: %d\n", state)
77  }
78
79  when (state =/= s_invalid) {
80    XSDebug("ProbeEntry: state: %d block_addr: %x\n", state, io.block_addr.bits)
81  }
82
83  when (state === s_invalid) {
84    io.req.ready := true.B
85    when (io.req.fire) {
86      req := io.req.bits
87      state := s_pipe_req
88    }
89  }
90
91  val lrsc_blocked = Mux(
92    io.req.fire,
93    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(io.req.bits.addr),
94    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr)
95  )
96
97  when (state === s_pipe_req) {
98    // Note that probe req will be blocked in the next cycle if a lr updates lrsc_locked_block addr
99    // in this way, we can RegNext(lrsc_blocked) for better timing
100    io.pipe_req.valid := !RegNext(lrsc_blocked)
101
102    val pipe_req = io.pipe_req.bits
103    pipe_req := DontCare
104    pipe_req.miss := false.B
105    pipe_req.probe := true.B
106    pipe_req.probe_param := req.param
107    pipe_req.addr   := req.addr
108    pipe_req.vaddr  := req.vaddr
109    pipe_req.probe_need_data := req.needData
110    pipe_req.error := false.B
111    pipe_req.id := io.id
112
113    when (io.pipe_req.fire) {
114      state := s_wait_resp
115    }
116  }
117
118  when (state === s_wait_resp) {
119    when (io.pipe_resp.valid && io.id === io.pipe_resp.bits.id) {
120      state := s_invalid
121    }
122  }
123
124  // perfoemance counters
125  XSPerfAccumulate("probe_req", state === s_invalid && io.req.fire)
126  XSPerfAccumulate("probe_penalty", state =/= s_invalid)
127  XSPerfAccumulate("probe_penalty_blocked_by_lrsc", state === s_pipe_req && io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(req.addr))
128  XSPerfAccumulate("probe_penalty_blocked_by_pipeline", state === s_pipe_req && io.pipe_req.valid && !io.pipe_req.ready)
129}
130
131class ProbeQueue(edge: TLEdgeOut)(implicit p: Parameters) extends DCacheModule with HasTLDump with HasPerfEvents
132{
133  val io = IO(new Bundle {
134    val mem_probe = Flipped(Decoupled(new TLBundleB(edge.bundle)))
135    val pipe_req  = DecoupledIO(new MainPipeReq)
136    val lrsc_locked_block = Input(Valid(UInt()))
137    val update_resv_set = Input(Bool())
138  })
139
140  val pipe_req_arb = Module(new Arbiter(new MainPipeReq, cfg.nProbeEntries))
141
142  // allocate a free entry for incoming request
143  val primary_ready  = Wire(Vec(cfg.nProbeEntries, Bool()))
144  val allocate = primary_ready.asUInt.orR
145  val alloc_idx = PriorityEncoder(primary_ready)
146
147  // translate to inner req
148  val req = Wire(new ProbeReq)
149  val alias_addr_frag = io.mem_probe.bits.data(2, 1) // add extra 2 bits from vaddr to get vindex
150  req.source := io.mem_probe.bits.source
151  req.opcode := io.mem_probe.bits.opcode
152  req.addr := io.mem_probe.bits.address
153  if(DCacheAboveIndexOffset > DCacheTagOffset) {
154    // have alias problem, extra alias bits needed for index
155    req.vaddr := Cat(
156      io.mem_probe.bits.address(PAddrBits - 1, DCacheAboveIndexOffset), // dontcare
157      alias_addr_frag(DCacheAboveIndexOffset - DCacheTagOffset - 1, 0), // index
158      io.mem_probe.bits.address(DCacheTagOffset - 1, 0)                 // index & others
159    )
160  } else { // no alias problem
161    req.vaddr := io.mem_probe.bits.address
162  }
163  req.param := io.mem_probe.bits.param
164  req.needData := io.mem_probe.bits.data(0)
165  req.id := DontCare
166
167  io.mem_probe.ready := allocate
168
169  val entries = (0 until cfg.nProbeEntries) map { i =>
170    val entry = Module(new ProbeEntry)
171    entry.io.id := i.U
172
173    // entry req
174    entry.io.req.valid := (i.U === alloc_idx) && allocate && io.mem_probe.valid
175    primary_ready(i)   := entry.io.req.ready
176    entry.io.req.bits  := req
177
178    // pipe_req
179    pipe_req_arb.io.in(i) <> entry.io.pipe_req
180
181    // pipe_resp
182    entry.io.pipe_resp.valid := io.pipe_req.fire
183    entry.io.pipe_resp.bits.id := io.pipe_req.bits.id
184
185    entry.io.lrsc_locked_block := io.lrsc_locked_block
186
187    entry
188  }
189
190  // delay probe req for 1 cycle
191  val selected_req_valid = RegInit(false.B)
192  val selected_req_bits = RegEnable(pipe_req_arb.io.out.bits, pipe_req_arb.io.out.fire)
193  val selected_lrsc_blocked = Mux(
194    pipe_req_arb.io.out.fire,
195    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(pipe_req_arb.io.out.bits.addr),
196    io.lrsc_locked_block.valid && get_block(io.lrsc_locked_block.bits) === get_block(selected_req_bits.addr) && selected_req_valid
197  )
198  val resvsetProbeBlock = RegNext(io.update_resv_set || selected_lrsc_blocked)
199  // When we update update_resv_set, block all probe req in the next cycle
200  // It should give Probe reservation set addr compare an independent cycle,
201  // which will lead to better timing
202  pipe_req_arb.io.out.ready := !selected_req_valid || io.pipe_req.fire
203  io.pipe_req.valid := selected_req_valid && !resvsetProbeBlock
204  io.pipe_req.bits := selected_req_bits
205  when(io.pipe_req.fire){
206    selected_req_valid := false.B
207  }
208  when(pipe_req_arb.io.out.fire){
209    selected_req_valid := true.B
210  }
211
212  // print all input/output requests for debug purpose
213  when (io.mem_probe.valid) {
214    // before a probe finishes, L2 should not further issue probes on this block
215    val probe_conflict = VecInit(entries.map(e => e.io.block_addr.valid && get_block(e.io.block_addr.bits) === get_block(io.mem_probe.bits.address))).asUInt.orR
216    assert (!probe_conflict)
217    // for now, we can only deal with ProbeBlock
218    assert (io.mem_probe.bits.opcode === TLMessages.Probe)
219  }
220
221  // debug output
222  when (io.mem_probe.fire) {
223    XSDebug("mem_probe: ")
224    io.mem_probe.bits.dump
225  }
226
227//  when (io.pipe_req.fire) {
228//    io.pipe_req.bits.dump()
229//  }
230
231  when (io.lrsc_locked_block.valid) {
232    XSDebug("lrsc_locked_block: %x\n", io.lrsc_locked_block.bits)
233  }
234  XSPerfAccumulate("ProbeL1DCache", io.mem_probe.fire)
235
236  val perfValidCount = RegNext(PopCount(entries.map(e => e.io.block_addr.valid)))
237  val perfEvents = Seq(
238    ("dcache_probq_req      ", io.pipe_req.fire),
239    ("dcache_probq_1_4_valid", (perfValidCount < (cfg.nProbeEntries.U/4.U))),
240    ("dcache_probq_2_4_valid", (perfValidCount > (cfg.nProbeEntries.U/4.U)) & (perfValidCount <= (cfg.nProbeEntries.U/2.U))),
241    ("dcache_probq_3_4_valid", (perfValidCount > (cfg.nProbeEntries.U/2.U)) & (perfValidCount <= (cfg.nProbeEntries.U*3.U/4.U))),
242    ("dcache_probq_4_4_valid", (perfValidCount > (cfg.nProbeEntries.U*3.U/4.U))),
243  )
244  generatePerfEvent()
245}
246