xref: /XiangShan/src/main/scala/xiangshan/mem/prefetch/FDP.scala (revision 83ba63b34cf09b33c0a9e1b3203138e51af4491b)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem.prefetch
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import freechips.rocketchip.tilelink.ClientStates._
23import freechips.rocketchip.tilelink.MemoryOpCategories._
24import freechips.rocketchip.tilelink.TLPermissions._
25import freechips.rocketchip.tilelink.{ClientMetadata, ClientStates, TLPermissions}
26import xiangshan.backend.rob.RobDebugRollingIO
27import utils._
28import utility._
29import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey}
30import xiangshan.mem.HasL1PrefetchSourceParameter
31import utility.{CircularQueuePtr}
32import xiangshan.cache._
33import xiangshan.{XSBundle, XSModule}
34
35//----------------------------------------
36// Feedback Direct Prefetching
37class CounterFilterDataBundle(implicit p: Parameters) extends DCacheBundle {
38  val idx = UInt(idxBits.W)
39  val way = UInt(wayBits.W)
40}
41
42class CounterFilterQueryBundle(implicit p: Parameters) extends DCacheBundle {
43  val req = ValidIO(new CounterFilterDataBundle())
44  val resp = Input(Bool())
45}
46
47// no Set Blocking in LoadPipe, so when counting useful prefetch counter, duplicate result occurs
48// s0    s1     s2     s3
49// r                   w
50// if 3 load insts is accessing the same cache line(set0, way0) in s0, s1, s2
51// they think they all prefetch hit, increment useful prefetch counter 3 times
52// so when load arrives at s3, save it's set&way to an FIFO, all loads will search this FIFO to avoid this case
53class CounterFilter()(implicit p: Parameters) extends DCacheModule {
54  private val LduCnt = backendParams.LduCnt
55
56  val io = IO(new Bundle() {
57    // input, only from load for now
58    val ld_in = Flipped(Vec(LduCnt, ValidIO(new CounterFilterDataBundle())))
59    val query = Flipped(Vec(LduCnt, new CounterFilterQueryBundle()))
60  })
61
62  val LduStages = 4
63  val SIZE = (LduStages) * LduCnt
64  class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr]( p => SIZE ){}
65  object Ptr {
66    def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = {
67      val ptr = Wire(new Ptr)
68      ptr.flag := f
69      ptr.value := v
70      ptr
71    }
72  }
73
74  val entries = RegInit(VecInit(Seq.fill(SIZE){ (0.U.asTypeOf(new CounterFilterDataBundle())) }))
75  val valids = RegInit(VecInit(Seq.fill(SIZE){ (false.B) }))
76
77  // enq
78  val enqLen = LduCnt
79  val deqLen = LduCnt
80  val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr))))
81  val deqPtrExt = RegInit(VecInit((0 until deqLen).map(_.U.asTypeOf(new Ptr))))
82
83  val deqPtr = WireInit(deqPtrExt(0).value)
84
85  val reqs_l = io.ld_in.map(_.bits)
86  val reqs_vl = io.ld_in.map(_.valid)
87  val needAlloc = Wire(Vec(enqLen, Bool()))
88  val canAlloc = Wire(Vec(enqLen, Bool()))
89  val last3CycleAlloc = RegInit(0.U(log2Ceil(LduCnt + 1).W))
90
91  for(i <- (0 until enqLen)) {
92    val req = reqs_l(i)
93    val req_v = reqs_vl(i)
94    val index = PopCount(needAlloc.take(i))
95    val allocPtr = enqPtrExt(index)
96
97    needAlloc(i) := req_v
98    canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt(0)
99
100    when(canAlloc(i)) {
101      valids(allocPtr.value) := true.B
102      entries(allocPtr.value) := req
103    }
104
105    assert(!needAlloc(i) || canAlloc(i), s"port${i} can not accept CounterFilter enq request, check if SIZE >= (Ldu stages - 2) * LduCnt")
106  }
107  val allocNum = PopCount(canAlloc)
108
109  enqPtrExt.foreach{case x => x := x + allocNum}
110  last3CycleAlloc := RegNext(RegNext(allocNum))
111
112  // deq
113  for(i <- (0 until deqLen)) {
114    when(i.U < last3CycleAlloc) {
115      valids(deqPtrExt(i).value) := false.B
116    }
117  }
118
119  deqPtrExt.foreach{case x => x := x + last3CycleAlloc}
120
121  // query
122  val querys_l = io.query.map(_.req.bits)
123  val querys_vl = io.query.map(_.req.valid)
124  for(i <- (0 until LduCnt)) {
125    val q = querys_l(i)
126    val q_v = querys_vl(i)
127
128    val entry_match = Cat(entries.zip(valids).map {
129      case(e, v) => v && (q.idx === e.idx) && (q.way === e.way)
130    }).orR
131
132    io.query(i).resp := q_v && entry_match
133  }
134
135  XSPerfAccumulate("req_nums", PopCount(io.query.map(_.req.valid)))
136  XSPerfAccumulate("req_set_way_match", PopCount(io.query.map(_.resp)))
137}
138
139class BloomQueryBundle(n: Int)(implicit p: Parameters) extends DCacheBundle {
140  val addr = UInt(BLOOMADDRWIDTH.W)
141
142  def BLOOMADDRWIDTH = log2Ceil(n)
143
144  def get_addr(paddr: UInt): UInt = {
145    assert(paddr.getWidth == PAddrBits)
146    assert(paddr.getWidth >= (blockOffBits + 2 * BLOOMADDRWIDTH))
147    val block_paddr = paddr(paddr.getWidth - 1, blockOffBits)
148    val low_part = block_paddr(BLOOMADDRWIDTH - 1, 0)
149    val high_part = block_paddr(2 * BLOOMADDRWIDTH - 1, BLOOMADDRWIDTH)
150    low_part ^ high_part
151  }
152}
153
154class BloomRespBundle(implicit p: Parameters) extends DCacheBundle {
155  val res = Bool()
156}
157class BloomFilter(n: Int, bypass: Boolean = true)(implicit p: Parameters) extends DCacheModule {
158  val io = IO(new DCacheBundle {
159    val set = Flipped(ValidIO(new BloomQueryBundle(n)))
160    val clr = Flipped(ValidIO(new BloomQueryBundle(n)))
161    val query = Vec(LoadPipelineWidth, Flipped(ValidIO(new BloomQueryBundle(n))))
162    val resp = Vec(LoadPipelineWidth, ValidIO(new BloomRespBundle))
163  })
164
165  val data = RegInit(0.U(n.W))
166  val data_next = Wire(Vec(n, Bool()))
167
168  for (i <- 0 until n) {
169    when(io.clr.valid && i.U === io.clr.bits.addr) {
170      data_next(i) := false.B
171    }.elsewhen(io.set.valid && i.U === io.set.bits.addr) {
172      data_next(i) := true.B
173    }.otherwise {
174      data_next(i) := data(i).asBool
175    }
176  }
177
178  // resp will valid in next cycle
179  for(i <- 0 until LoadPipelineWidth) {
180    io.resp(i).valid := RegNext(io.query(i).valid)
181    if(bypass) {
182      io.resp(i).bits.res := RegEnable(data_next(io.query(i).bits.addr), io.query(i).valid)
183    }else {
184      io.resp(i).bits.res := RegEnable(data(io.query(i).bits.addr), io.query(i).valid)
185    }
186  }
187
188  data := data_next.asUInt
189
190  assert(PopCount(data ^ data_next.asUInt) <= 2.U)
191
192  XSPerfHistogram("valid_nums", PopCount(data), true.B, 0, n + 1, 20)
193}
194
195class FDPrefetcherMonitorBundle()(implicit p: Parameters) extends XSBundle {
196  val refill = Input(Bool()) // from refill pipe, fire
197  val accuracy = new XSBundle {
198    val total_prefetch = Input(Bool()) // from mshr enq, fire, alloc, prefetch
199    val useful_prefetch = Vec(LoadPipelineWidth, Input(Bool())) // from load pipeline, prefetch hit
200  }
201
202  val timely = new XSBundle {
203    val late_prefetch = Input(Bool()) // from mshr enq, a load matches a mshr caused by prefetch
204  }
205
206  val pollution = new XSBundle {
207    val demand_miss = Vec(LoadPipelineWidth, Input(Bool())) // from load pipeline, fisrt miss
208    val cache_pollution = Vec(LoadPipelineWidth, Input(Bool())) // from load pipeline, fisrt miss and pollution caused
209  }
210
211  val pf_ctrl = Output(new PrefetchControlBundle)
212  val debugRolling = Flipped(new RobDebugRollingIO)
213}
214
215class FDPrefetcherMonitor()(implicit p: Parameters) extends XSModule {
216  val io = IO(new FDPrefetcherMonitorBundle)
217
218  val INTERVAL = 8192
219  val CNTWIDTH = log2Up(INTERVAL) + 1
220
221  io.pf_ctrl := DontCare
222
223  val refill_cnt = RegInit(0.U(CNTWIDTH.W))
224
225  val total_prefetch_prev_cnt = RegInit(0.U(CNTWIDTH.W))
226  val useful_prefetch_prev_cnt = RegInit(0.U(CNTWIDTH.W))
227  val late_prefetch_prev_cnt = RegInit(0.U(CNTWIDTH.W))
228  val demand_miss_prev_cnt = RegInit(0.U(CNTWIDTH.W))
229  val pollution_prev_cnt = RegInit(0.U(CNTWIDTH.W))
230  val prev_cnts = Seq(total_prefetch_prev_cnt, useful_prefetch_prev_cnt, late_prefetch_prev_cnt, demand_miss_prev_cnt, pollution_prev_cnt)
231
232  val total_prefetch_interval_cnt = RegInit(0.U(CNTWIDTH.W))
233  val useful_prefetch_interval_cnt = RegInit(0.U(CNTWIDTH.W))
234  val late_prefetch_interval_cnt = RegInit(0.U(CNTWIDTH.W))
235  val demand_miss_interval_cnt = RegInit(0.U(CNTWIDTH.W))
236  val pollution_interval_cnt = RegInit(0.U(CNTWIDTH.W))
237  val interval_cnts = Seq(total_prefetch_interval_cnt, useful_prefetch_interval_cnt, late_prefetch_interval_cnt, demand_miss_interval_cnt, pollution_interval_cnt)
238
239  val interval_trigger = refill_cnt === INTERVAL.U
240
241  val io_ens = Seq(io.accuracy.total_prefetch, io.accuracy.useful_prefetch, io.timely.late_prefetch, io.pollution.demand_miss, io.pollution.cache_pollution)
242
243  for((interval, en) <- interval_cnts.zip(io_ens)) {
244    interval := interval + PopCount(en.asUInt)
245  }
246
247  when(io.refill) {
248    refill_cnt := refill_cnt + 1.U
249  }
250
251  when(interval_trigger) {
252    refill_cnt := 0.U
253    for((prev, interval) <- prev_cnts.zip(interval_cnts)) {
254      prev := Cat(0.U(1.W), prev(prev.getWidth - 1, 1)) + Cat(0.U(1.W), interval(interval.getWidth - 1, 1))
255      interval := 0.U
256    }
257  }
258
259  // rolling by instr
260  XSPerfRolling(
261    "L1PrefetchAccuracyIns",
262    PopCount(io.accuracy.useful_prefetch), PopCount(io.accuracy.total_prefetch),
263    1000, io.debugRolling.robTrueCommit, clock, reset
264  )
265
266  XSPerfRolling(
267    "L1PrefetchLatenessIns",
268    PopCount(io.timely.late_prefetch), PopCount(io.accuracy.total_prefetch),
269    1000, io.debugRolling.robTrueCommit, clock, reset
270  )
271
272  XSPerfRolling(
273    "L1PrefetchPollutionIns",
274    PopCount(io.pollution.cache_pollution), PopCount(io.pollution.demand_miss),
275    1000, io.debugRolling.robTrueCommit, clock, reset
276  )
277
278  XSPerfRolling(
279    "IPCIns",
280    io.debugRolling.robTrueCommit, 1.U,
281    1000, io.debugRolling.robTrueCommit, clock, reset
282  )
283
284  XSPerfAccumulate("io_refill", io.refill)
285  XSPerfAccumulate("total_prefetch_en", io.accuracy.total_prefetch)
286  XSPerfAccumulate("useful_prefetch_en", PopCount(io.accuracy.useful_prefetch) + io.timely.late_prefetch)
287  XSPerfAccumulate("late_prefetch_en", io.timely.late_prefetch)
288  XSPerfAccumulate("demand_miss_en", PopCount(io.pollution.demand_miss))
289  XSPerfAccumulate("cache_pollution_en", PopCount(io.pollution.cache_pollution))
290}