xref: /XiangShan/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala (revision edbf1204ab927cca43afe4c2b53a96c042f208aa)
1package xiangshan.mem.prefetch
2
3import chipsalliance.rocketchip.config.Parameters
4import chisel3._
5import chisel3.util._
6import xiangshan._
7import utils._
8import utility._
9import xiangshan.cache.HasDCacheParameters
10import xiangshan.cache.mmu._
11import xiangshan.mem.L1PrefetchReq
12import xiangshan.mem.trace._
13
14case class SMSParams
15(
16  region_size: Int = 1024,
17  vaddr_hash_width: Int = 5,
18  block_addr_raw_width: Int = 10,
19  stride_pc_bits: Int = 10,
20  max_stride: Int = 1024,
21  stride_entries: Int = 16,
22  active_gen_table_size: Int = 16,
23  pht_size: Int = 64,
24  pht_ways: Int = 2,
25  pht_hist_bits: Int = 2,
26  pht_tag_bits: Int = 13,
27  pht_lookup_queue_size: Int = 4,
28  pf_filter_size: Int = 16
29) extends PrefetcherParams
30
31trait HasSMSModuleHelper extends HasCircularQueuePtrHelper with HasDCacheParameters
32{ this: HasXSParameter =>
33  val smsParams = coreParams.prefetcher.get.asInstanceOf[SMSParams]
34  val BLK_ADDR_WIDTH = VAddrBits - log2Up(dcacheParameters.blockBytes)
35  val REGION_SIZE = smsParams.region_size
36  val REGION_BLKS = smsParams.region_size / dcacheParameters.blockBytes
37  val REGION_ADDR_BITS = VAddrBits - log2Up(REGION_SIZE)
38  val REGION_OFFSET = log2Up(REGION_BLKS)
39  val VADDR_HASH_WIDTH = smsParams.vaddr_hash_width
40  val BLK_ADDR_RAW_WIDTH = smsParams.block_addr_raw_width
41  val REGION_ADDR_RAW_WIDTH = BLK_ADDR_RAW_WIDTH - REGION_OFFSET
42  val BLK_TAG_WIDTH = BLK_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH
43  val REGION_TAG_WIDTH = REGION_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH
44  val PHT_INDEX_BITS = log2Up(smsParams.pht_size / smsParams.pht_ways)
45  val PHT_TAG_BITS = smsParams.pht_tag_bits
46  val PHT_HIST_BITS = smsParams.pht_hist_bits
47  // page bit index in block addr
48  val BLOCK_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / dcacheParameters.blockBytes)
49  val REGION_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / smsParams.region_size)
50  val STRIDE_PC_BITS = smsParams.stride_pc_bits
51  val STRIDE_BLK_ADDR_BITS = log2Up(smsParams.max_stride)
52
53  def block_addr(x: UInt): UInt = {
54    val offset = log2Up(dcacheParameters.blockBytes)
55    x(x.getWidth - 1, offset)
56  }
57
58  def region_addr(x: UInt): UInt = {
59    val offset = log2Up(REGION_SIZE)
60    x(x.getWidth - 1, offset)
61  }
62
63  def region_offset_to_bits(off: UInt): UInt = {
64    (1.U << off).asUInt
65  }
66
67  def region_hash_tag(rg_addr: UInt): UInt = {
68    val low = rg_addr(REGION_ADDR_RAW_WIDTH - 1, 0)
69    val high = rg_addr(REGION_ADDR_RAW_WIDTH + 3 * VADDR_HASH_WIDTH - 1, REGION_ADDR_RAW_WIDTH)
70    val high_hash = vaddr_hash(high)
71    Cat(high_hash, low)
72  }
73
74  def page_bit(region_addr: UInt): UInt = {
75    region_addr(log2Up(dcacheParameters.pageSize/REGION_SIZE))
76  }
77
78  def block_hash_tag(x: UInt): UInt = {
79    val blk_addr = block_addr(x)
80    val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0)
81    val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
82    val high_hash = vaddr_hash(high)
83    Cat(high_hash, low)
84  }
85
86  def vaddr_hash(x: UInt): UInt = {
87    val width = VADDR_HASH_WIDTH
88    val low = x(width - 1, 0)
89    val mid = x(2 * width - 1, width)
90    val high = x(3 * width - 1, 2 * width)
91    low ^ mid ^ high
92  }
93
94  def pht_index(pc: UInt): UInt = {
95    val low_bits = pc(PHT_INDEX_BITS, 2)
96    val hi_bit = pc(1) ^ pc(PHT_INDEX_BITS+1)
97    Cat(hi_bit, low_bits)
98  }
99
100  def pht_tag(pc: UInt): UInt = {
101    pc(PHT_INDEX_BITS + 2 + PHT_TAG_BITS - 1, PHT_INDEX_BITS + 2)
102  }
103
104  def get_alias_bits(region_vaddr: UInt): UInt = region_vaddr(7, 6)
105}
106
107class StridePF()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
108  val io = IO(new Bundle() {
109    val stride_en = Input(Bool())
110    val s0_lookup = Flipped(new ValidIO(new Bundle() {
111      val pc = UInt(STRIDE_PC_BITS.W)
112      val vaddr = UInt(VAddrBits.W)
113      val paddr = UInt(PAddrBits.W)
114    }))
115    val s1_valid = Input(Bool())
116    val s2_gen_req = ValidIO(new PfGenReq())
117  })
118
119  val prev_valid = RegNext(io.s0_lookup.valid, false.B)
120  val prev_pc = RegEnable(io.s0_lookup.bits.pc, io.s0_lookup.valid)
121
122  val s0_valid = io.s0_lookup.valid && !(prev_valid && prev_pc === io.s0_lookup.bits.pc)
123
124  def entry_map[T](fn: Int => T) = (0 until smsParams.stride_entries).map(fn)
125
126  val replacement = ReplacementPolicy.fromString("plru", smsParams.stride_entries)
127  val valids = entry_map(_ => RegInit(false.B))
128  val entries_pc = entry_map(_ => Reg(UInt(STRIDE_PC_BITS.W)) )
129  val entries_conf = entry_map(_ => RegInit(1.U(2.W)))
130  val entries_last_addr = entry_map(_ => Reg(UInt(STRIDE_BLK_ADDR_BITS.W)) )
131  val entries_stride = entry_map(_ => Reg(SInt((STRIDE_BLK_ADDR_BITS+1).W)))
132
133
134  val s0_match_vec = valids.zip(entries_pc).map({
135    case (v, pc) => v && pc === io.s0_lookup.bits.pc
136  })
137
138  val s0_hit = s0_valid && Cat(s0_match_vec).orR
139  val s0_miss = s0_valid && !s0_hit
140  val s0_matched_conf = Mux1H(s0_match_vec, entries_conf)
141  val s0_matched_last_addr = Mux1H(s0_match_vec, entries_last_addr)
142  val s0_matched_last_stride = Mux1H(s0_match_vec, entries_stride)
143
144
145  val s1_vaddr = RegEnable(io.s0_lookup.bits.vaddr, s0_valid)
146  val s1_paddr = RegEnable(io.s0_lookup.bits.paddr, s0_valid)
147  val s1_hit = RegNext(s0_hit) && io.s1_valid
148  val s1_alloc = RegNext(s0_miss) && io.s1_valid
149  val s1_conf = RegNext(s0_matched_conf)
150  val s1_last_addr = RegNext(s0_matched_last_addr)
151  val s1_last_stride = RegNext(s0_matched_last_stride)
152  val s1_match_vec = RegNext(VecInit(s0_match_vec))
153
154  val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes)
155  val s1_new_stride_vaddr = s1_vaddr(BLOCK_OFFSET + STRIDE_BLK_ADDR_BITS - 1, BLOCK_OFFSET)
156  val s1_new_stride = (0.U(1.W) ## s1_new_stride_vaddr).asSInt - (0.U(1.W) ## s1_last_addr).asSInt
157  val s1_stride_non_zero = s1_last_stride =/= 0.S
158  val s1_stride_match = s1_new_stride === s1_last_stride && s1_stride_non_zero
159  val s1_replace_idx = replacement.way
160
161  for(i <- 0 until smsParams.stride_entries){
162    val alloc = s1_alloc && i.U === s1_replace_idx
163    val update = s1_hit && s1_match_vec(i)
164    when(update){
165      assert(valids(i))
166      entries_conf(i) := Mux(s1_stride_match,
167        Mux(s1_conf === 3.U, 3.U, s1_conf + 1.U),
168        Mux(s1_conf === 0.U, 0.U, s1_conf - 1.U)
169      )
170      entries_last_addr(i) := s1_new_stride_vaddr
171      when(!s1_conf(1)){
172        entries_stride(i) := s1_new_stride
173      }
174    }
175    when(alloc){
176      valids(i) := true.B
177      entries_pc(i) := prev_pc
178      entries_conf(i) := 0.U
179      entries_last_addr(i) := s1_new_stride_vaddr
180      entries_stride(i) := 0.S
181    }
182    assert(!(update && alloc))
183  }
184  when(s1_hit){
185    replacement.access(OHToUInt(s1_match_vec.asUInt))
186  }.elsewhen(s1_alloc){
187    replacement.access(s1_replace_idx)
188  }
189
190  val s1_block_vaddr = block_addr(s1_vaddr)
191  val s1_pf_block_vaddr = (s1_block_vaddr.asSInt + s1_last_stride).asUInt
192  val s1_pf_cross_page = s1_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT) =/= s1_block_vaddr(BLOCK_ADDR_PAGE_BIT)
193
194  val s2_pf_gen_valid = RegNext(s1_hit && s1_stride_match, false.B)
195  val s2_pf_gen_paddr_valid = RegEnable(!s1_pf_cross_page, s1_hit && s1_stride_match)
196  val s2_pf_block_vaddr = RegEnable(s1_pf_block_vaddr, s1_hit && s1_stride_match)
197  val s2_block_paddr = RegEnable(block_addr(s1_paddr), s1_hit && s1_stride_match)
198
199  val s2_pf_block_addr = Mux(s2_pf_gen_paddr_valid,
200    Cat(
201      s2_block_paddr(PAddrBits - BLOCK_OFFSET - 1, BLOCK_ADDR_PAGE_BIT),
202      s2_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT - 1, 0)
203    ),
204    s2_pf_block_vaddr
205  )
206  val s2_pf_full_addr = Wire(UInt(VAddrBits.W))
207  s2_pf_full_addr := s2_pf_block_addr ## 0.U(BLOCK_OFFSET.W)
208
209  val s2_pf_region_addr = region_addr(s2_pf_full_addr)
210  val s2_pf_region_offset = s2_pf_block_addr(REGION_OFFSET - 1, 0)
211
212  val s2_full_vaddr = Wire(UInt(VAddrBits.W))
213  s2_full_vaddr := s2_pf_block_vaddr ## 0.U(BLOCK_OFFSET.W)
214
215  val s2_region_tag = region_hash_tag(region_addr(s2_full_vaddr))
216
217  io.s2_gen_req.valid := s2_pf_gen_valid && io.stride_en
218  io.s2_gen_req.bits.region_tag := s2_region_tag
219  io.s2_gen_req.bits.region_addr := s2_pf_region_addr
220  io.s2_gen_req.bits.alias_bits := get_alias_bits(region_addr(s2_full_vaddr))
221  io.s2_gen_req.bits.region_bits := region_offset_to_bits(s2_pf_region_offset)
222  io.s2_gen_req.bits.paddr_valid := s2_pf_gen_paddr_valid
223  io.s2_gen_req.bits.decr_mode := false.B
224  io.s2_gen_req.bits.debug_source_type := HW_PREFETCH_STRIDE.U
225
226}
227
228class StridePF()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
229  val io = IO(new Bundle() {
230    val stride_en = Input(Bool())
231    val s0_lookup = Flipped(new ValidIO(new Bundle() {
232      val pc = UInt(STRIDE_PC_BITS.W)
233      val vaddr = UInt(VAddrBits.W)
234      val paddr = UInt(PAddrBits.W)
235    }))
236    val s1_valid = Input(Bool())
237    val s2_gen_req = ValidIO(new PfGenReq())
238  })
239
240  val prev_valid = RegNext(io.s0_lookup.valid, false.B)
241  val prev_pc = RegEnable(io.s0_lookup.bits.pc, io.s0_lookup.valid)
242
243  val s0_valid = io.s0_lookup.valid && !(prev_valid && prev_pc === io.s0_lookup.bits.pc)
244
245  def entry_map[T](fn: Int => T) = (0 until smsParams.stride_entries).map(fn)
246
247  val replacement = ReplacementPolicy.fromString("plru", smsParams.stride_entries)
248  val valids = entry_map(_ => RegInit(false.B))
249  val entries_pc = entry_map(_ => Reg(UInt(STRIDE_PC_BITS.W)) )
250  val entries_conf = entry_map(_ => RegInit(1.U(2.W)))
251  val entries_last_addr = entry_map(_ => Reg(UInt(STRIDE_BLK_ADDR_BITS.W)) )
252  val entries_stride = entry_map(_ => Reg(SInt((STRIDE_BLK_ADDR_BITS+1).W)))
253
254
255  val s0_match_vec = valids.zip(entries_pc).map({
256    case (v, pc) => v && pc === io.s0_lookup.bits.pc
257  })
258
259  val s0_hit = s0_valid && Cat(s0_match_vec).orR
260  val s0_miss = s0_valid && !s0_hit
261  val s0_matched_conf = Mux1H(s0_match_vec, entries_conf)
262  val s0_matched_last_addr = Mux1H(s0_match_vec, entries_last_addr)
263  val s0_matched_last_stride = Mux1H(s0_match_vec, entries_stride)
264
265
266  val s1_vaddr = RegEnable(io.s0_lookup.bits.vaddr, s0_valid)
267  val s1_paddr = RegEnable(io.s0_lookup.bits.paddr, s0_valid)
268  val s1_hit = RegNext(s0_hit) && io.s1_valid
269  val s1_alloc = RegNext(s0_miss) && io.s1_valid
270  val s1_conf = RegNext(s0_matched_conf)
271  val s1_last_addr = RegNext(s0_matched_last_addr)
272  val s1_last_stride = RegNext(s0_matched_last_stride)
273  val s1_match_vec = RegNext(VecInit(s0_match_vec))
274
275  val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes)
276  val s1_new_stride_vaddr = s1_vaddr(BLOCK_OFFSET + STRIDE_BLK_ADDR_BITS - 1, BLOCK_OFFSET)
277  val s1_new_stride = (0.U(1.W) ## s1_new_stride_vaddr).asSInt - (0.U(1.W) ## s1_last_addr).asSInt
278  val s1_stride_non_zero = s1_last_stride =/= 0.S
279  val s1_stride_match = s1_new_stride === s1_last_stride && s1_stride_non_zero
280  val s1_replace_idx = replacement.way
281
282  for(i <- 0 until smsParams.stride_entries){
283    val alloc = s1_alloc && i.U === s1_replace_idx
284    val update = s1_hit && s1_match_vec(i)
285    when(update){
286      assert(valids(i))
287      entries_conf(i) := Mux(s1_stride_match,
288        Mux(s1_conf === 3.U, 3.U, s1_conf + 1.U),
289        Mux(s1_conf === 0.U, 0.U, s1_conf - 1.U)
290      )
291      entries_last_addr(i) := s1_new_stride_vaddr
292      when(!s1_conf(1)){
293        entries_stride(i) := s1_new_stride
294      }
295    }
296    when(alloc){
297      valids(i) := true.B
298      entries_pc(i) := prev_pc
299      entries_conf(i) := 0.U
300      entries_last_addr(i) := s1_new_stride_vaddr
301      entries_stride(i) := 0.S
302    }
303    assert(!(update && alloc))
304  }
305  when(s1_hit){
306    replacement.access(OHToUInt(s1_match_vec.asUInt))
307  }.elsewhen(s1_alloc){
308    replacement.access(s1_replace_idx)
309  }
310
311  val s1_block_vaddr = block_addr(s1_vaddr)
312  val s1_pf_block_vaddr = (s1_block_vaddr.asSInt + s1_last_stride).asUInt
313  val s1_pf_cross_page = s1_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT) =/= s1_block_vaddr(BLOCK_ADDR_PAGE_BIT)
314
315  val s2_pf_gen_valid = RegNext(s1_hit && s1_stride_match, false.B)
316  val s2_pf_gen_paddr_valid = RegEnable(!s1_pf_cross_page, s1_hit && s1_stride_match)
317  val s2_pf_block_vaddr = RegEnable(s1_pf_block_vaddr, s1_hit && s1_stride_match)
318  val s2_block_paddr = RegEnable(block_addr(s1_paddr), s1_hit && s1_stride_match)
319
320  val s2_pf_block_addr = Mux(s2_pf_gen_paddr_valid,
321    Cat(
322      s2_block_paddr(PAddrBits - BLOCK_OFFSET - 1, BLOCK_ADDR_PAGE_BIT),
323      s2_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT - 1, 0)
324    ),
325    s2_pf_block_vaddr
326  )
327  val s2_pf_full_addr = Wire(UInt(VAddrBits.W))
328  s2_pf_full_addr := s2_pf_block_addr ## 0.U(BLOCK_OFFSET.W)
329
330  val s2_pf_region_addr = region_addr(s2_pf_full_addr)
331  val s2_pf_region_offset = s2_pf_block_addr(REGION_OFFSET - 1, 0)
332
333  val s2_full_vaddr = Wire(UInt(VAddrBits.W))
334  s2_full_vaddr := s2_pf_block_vaddr ## 0.U(BLOCK_OFFSET.W)
335
336  val s2_region_tag = region_hash_tag(region_addr(s2_full_vaddr))
337
338  io.s2_gen_req.valid := s2_pf_gen_valid && io.stride_en
339  io.s2_gen_req.bits.region_tag := s2_region_tag
340  io.s2_gen_req.bits.region_addr := s2_pf_region_addr
341  io.s2_gen_req.bits.region_bits := region_offset_to_bits(s2_pf_region_offset)
342  io.s2_gen_req.bits.paddr_valid := s2_pf_gen_paddr_valid
343  io.s2_gen_req.bits.decr_mode := false.B
344
345}
346
347class AGTEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
348  val pht_index = UInt(PHT_INDEX_BITS.W)
349  val pht_tag = UInt(PHT_TAG_BITS.W)
350  val region_bits = UInt(REGION_BLKS.W)
351  val region_tag = UInt(REGION_TAG_WIDTH.W)
352  val region_offset = UInt(REGION_OFFSET.W)
353  val access_cnt = UInt((REGION_BLKS-1).U.getWidth.W)
354  val decr_mode = Bool()
355}
356
357class PfGenReq()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
358  val region_tag = UInt(REGION_TAG_WIDTH.W)
359  val region_addr = UInt(REGION_ADDR_BITS.W)
360  val region_bits = UInt(REGION_BLKS.W)
361  val paddr_valid = Bool()
362  val decr_mode = Bool()
363  val alias_bits = UInt(2.W)
364  val debug_source_type = UInt(log2Up(nSourceType).W)
365}
366
367class ActiveGenerationTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
368  val io = IO(new Bundle() {
369    val agt_en = Input(Bool())
370    val s0_lookup = Flipped(ValidIO(new Bundle() {
371      val region_tag = UInt(REGION_TAG_WIDTH.W)
372      val region_p1_tag = UInt(REGION_TAG_WIDTH.W)
373      val region_m1_tag = UInt(REGION_TAG_WIDTH.W)
374      val region_offset = UInt(REGION_OFFSET.W)
375      val pht_index = UInt(PHT_INDEX_BITS.W)
376      val pht_tag = UInt(PHT_TAG_BITS.W)
377      val allow_cross_region_p1 = Bool()
378      val allow_cross_region_m1 = Bool()
379      val region_p1_cross_page = Bool()
380      val region_m1_cross_page = Bool()
381      val region_paddr = UInt(REGION_ADDR_BITS.W)
382      val region_vaddr = UInt(REGION_ADDR_BITS.W)
383    }))
384    val s1_sel_stride = Output(Bool())
385    val s2_stride_hit = Input(Bool())
386    // if agt/stride missed, try lookup pht
387    val s2_pht_lookup = ValidIO(new PhtLookup())
388    // evict entry to pht
389    val s2_evict = ValidIO(new AGTEntry())
390    val s2_pf_gen_req = ValidIO(new PfGenReq())
391    val act_threshold = Input(UInt(REGION_OFFSET.W))
392    val act_stride = Input(UInt(6.W))
393  })
394
395  val entries = Seq.fill(smsParams.active_gen_table_size){ Reg(new AGTEntry()) }
396  val valids = Seq.fill(smsParams.active_gen_table_size){ RegInit(false.B) }
397  val replacement = ReplacementPolicy.fromString("plru", smsParams.active_gen_table_size)
398
399  val s1_replace_mask_w = Wire(UInt(smsParams.active_gen_table_size.W))
400
401  val s0_lookup = io.s0_lookup.bits
402  val s0_lookup_valid = io.s0_lookup.valid
403
404  val prev_lookup = RegEnable(s0_lookup, s0_lookup_valid)
405  val prev_lookup_valid = RegNext(s0_lookup_valid, false.B)
406
407  val s0_match_prev = prev_lookup_valid && s0_lookup.region_tag === prev_lookup.region_tag
408
409  def gen_match_vec(region_tag: UInt): Seq[Bool] = {
410    entries.zip(valids).map({
411      case (ent, v) => v && ent.region_tag === region_tag
412    })
413  }
414
415  val region_match_vec_s0 = gen_match_vec(s0_lookup.region_tag)
416  val region_p1_match_vec_s0 = gen_match_vec(s0_lookup.region_p1_tag)
417  val region_m1_match_vec_s0 = gen_match_vec(s0_lookup.region_m1_tag)
418
419  val any_region_match = Cat(region_match_vec_s0).orR
420  val any_region_p1_match = Cat(region_p1_match_vec_s0).orR && s0_lookup.allow_cross_region_p1
421  val any_region_m1_match = Cat(region_m1_match_vec_s0).orR && s0_lookup.allow_cross_region_m1
422
423  val s0_region_hit = any_region_match
424  val s0_cross_region_hit = any_region_m1_match || any_region_p1_match
425  val s0_alloc = s0_lookup_valid && !s0_region_hit && !s0_match_prev
426  val s0_pf_gen_match_vec = valids.indices.map(i => {
427    Mux(any_region_match,
428      region_match_vec_s0(i),
429      Mux(any_region_m1_match,
430        region_m1_match_vec_s0(i), region_p1_match_vec_s0(i)
431      )
432    )
433  })
434  val s0_agt_entry = Wire(new AGTEntry())
435
436  s0_agt_entry.pht_index := s0_lookup.pht_index
437  s0_agt_entry.pht_tag := s0_lookup.pht_tag
438  s0_agt_entry.region_bits := region_offset_to_bits(s0_lookup.region_offset)
439  s0_agt_entry.region_tag := s0_lookup.region_tag
440  s0_agt_entry.region_offset := s0_lookup.region_offset
441  s0_agt_entry.access_cnt := 1.U
442  // lookup_region + 1 == entry_region
443  // lookup_region = entry_region - 1 => decr mode
444  s0_agt_entry.decr_mode := !s0_region_hit && !any_region_m1_match && any_region_p1_match
445  val s0_replace_way = replacement.way
446  val s0_replace_mask = UIntToOH(s0_replace_way)
447  // s0 hit a entry that may be replaced in s1
448  val s0_update_conflict = Cat(VecInit(region_match_vec_s0).asUInt & s1_replace_mask_w).orR
449  val s0_update = s0_lookup_valid && s0_region_hit && !s0_update_conflict
450
451  val s0_access_way = Mux1H(
452    Seq(s0_update, s0_alloc),
453    Seq(OHToUInt(region_match_vec_s0), s0_replace_way)
454  )
455  when(s0_update || s0_alloc) {
456    replacement.access(s0_access_way)
457  }
458
459  // stage1: update/alloc
460  // region hit, update entry
461  val s1_update = RegNext(s0_update, false.B)
462  val s1_update_mask = RegEnable(VecInit(region_match_vec_s0), s0_lookup_valid)
463  val s1_agt_entry = RegEnable(s0_agt_entry, s0_lookup_valid)
464  val s1_cross_region_match = RegNext(s0_lookup_valid && s0_cross_region_hit, false.B)
465  val s1_alloc = RegNext(s0_alloc, false.B)
466  val s1_alloc_entry = s1_agt_entry
467  val s1_replace_mask = RegEnable(s0_replace_mask, s0_lookup_valid)
468  s1_replace_mask_w := s1_replace_mask & Fill(smsParams.active_gen_table_size, s1_alloc)
469  val s1_evict_entry = Mux1H(s1_replace_mask, entries)
470  val s1_evict_valid = Mux1H(s1_replace_mask, valids)
471  // pf gen
472  val s1_pf_gen_match_vec = RegEnable(VecInit(s0_pf_gen_match_vec), s0_lookup_valid)
473  val s1_region_paddr = RegEnable(s0_lookup.region_paddr, s0_lookup_valid)
474  val s1_region_vaddr = RegEnable(s0_lookup.region_vaddr, s0_lookup_valid)
475  val s1_region_offset = RegEnable(s0_lookup.region_offset, s0_lookup_valid)
476  for(i <- entries.indices){
477    val alloc = s1_replace_mask(i) && s1_alloc
478    val update = s1_update_mask(i) && s1_update
479    val update_entry = WireInit(entries(i))
480    update_entry.region_bits := entries(i).region_bits | s1_agt_entry.region_bits
481    update_entry.access_cnt := Mux(entries(i).access_cnt === (REGION_BLKS - 1).U,
482      entries(i).access_cnt,
483      entries(i).access_cnt + (s1_agt_entry.region_bits & (~entries(i).region_bits).asUInt).orR
484    )
485    valids(i) := valids(i) || alloc
486    entries(i) := Mux(alloc, s1_alloc_entry, Mux(update, update_entry, entries(i)))
487  }
488
489  when(s1_update){
490    assert(PopCount(s1_update_mask) === 1.U, "multi-agt-update")
491  }
492  when(s1_alloc){
493    assert(PopCount(s1_replace_mask) === 1.U, "multi-agt-alloc")
494  }
495
496  // pf_addr
497  // 1.hit => pf_addr = lookup_addr + (decr ? -1 : 1)
498  // 2.lookup region - 1 hit => lookup_addr + 1 (incr mode)
499  // 3.lookup region + 1 hit => lookup_addr - 1 (decr mode)
500  val s1_hited_entry_decr = Mux1H(s1_update_mask, entries.map(_.decr_mode))
501  val s1_pf_gen_decr_mode = Mux(s1_update,
502    s1_hited_entry_decr,
503    s1_agt_entry.decr_mode
504  )
505
506  val s1_pf_gen_vaddr_inc = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) + io.act_stride
507  val s1_pf_gen_vaddr_dec = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) - io.act_stride
508  val s1_vaddr_inc_cross_page = s1_pf_gen_vaddr_inc(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT)
509  val s1_vaddr_dec_cross_page = s1_pf_gen_vaddr_dec(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT)
510  val s1_vaddr_inc_cross_max_lim = s1_pf_gen_vaddr_inc.head(1).asBool
511  val s1_vaddr_dec_cross_max_lim = s1_pf_gen_vaddr_dec.head(1).asBool
512
513  //val s1_pf_gen_vaddr_p1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) + 1.U
514  //val s1_pf_gen_vaddr_m1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) - 1.U
515  val s1_pf_gen_vaddr = Cat(
516    s1_region_vaddr(REGION_ADDR_BITS - 1, REGION_TAG_WIDTH),
517    Mux(s1_pf_gen_decr_mode,
518      s1_pf_gen_vaddr_dec.tail(1).head(REGION_TAG_WIDTH),
519      s1_pf_gen_vaddr_inc.tail(1).head(REGION_TAG_WIDTH)
520    )
521  )
522  val s1_pf_gen_offset = Mux(s1_pf_gen_decr_mode,
523    s1_pf_gen_vaddr_dec(REGION_OFFSET - 1, 0),
524    s1_pf_gen_vaddr_inc(REGION_OFFSET - 1, 0)
525  )
526  val s1_pf_gen_offset_mask = UIntToOH(s1_pf_gen_offset)
527  val s1_pf_gen_access_cnt = Mux1H(s1_pf_gen_match_vec, entries.map(_.access_cnt))
528  val s1_in_active_page = s1_pf_gen_access_cnt > io.act_threshold
529  val s1_pf_gen_valid = prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && Mux(s1_pf_gen_decr_mode,
530    !s1_vaddr_dec_cross_max_lim,
531    !s1_vaddr_inc_cross_max_lim
532  ) && s1_in_active_page && io.agt_en
533  val s1_pf_gen_paddr_valid = Mux(s1_pf_gen_decr_mode, !s1_vaddr_dec_cross_page, !s1_vaddr_inc_cross_page)
534  val s1_pf_gen_region_addr = Mux(s1_pf_gen_paddr_valid,
535    Cat(s1_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), s1_pf_gen_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)),
536    s1_pf_gen_vaddr
537  )
538  val s1_pf_gen_region_tag = region_hash_tag(s1_pf_gen_vaddr)
539  val s1_pf_gen_incr_region_bits = VecInit((0 until REGION_BLKS).map(i => {
540    if(i == 0) true.B else !s1_pf_gen_offset_mask(i - 1, 0).orR
541  })).asUInt
542  val s1_pf_gen_decr_region_bits = VecInit((0 until REGION_BLKS).map(i => {
543    if(i == REGION_BLKS - 1) true.B
544    else !s1_pf_gen_offset_mask(REGION_BLKS - 1, i + 1).orR
545  })).asUInt
546  val s1_pf_gen_region_bits = Mux(s1_pf_gen_decr_mode,
547    s1_pf_gen_decr_region_bits,
548    s1_pf_gen_incr_region_bits
549  )
550  val s1_pht_lookup_valid = Wire(Bool())
551  val s1_pht_lookup = Wire(new PhtLookup())
552
553  s1_pht_lookup_valid := !s1_pf_gen_valid && prev_lookup_valid
554  s1_pht_lookup.pht_index := s1_agt_entry.pht_index
555  s1_pht_lookup.pht_tag := s1_agt_entry.pht_tag
556  s1_pht_lookup.region_vaddr := s1_region_vaddr
557  s1_pht_lookup.region_paddr := s1_region_paddr
558  s1_pht_lookup.region_offset := s1_region_offset
559
560  io.s1_sel_stride := prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && !s1_in_active_page
561
562  // stage2: gen pf reg / evict entry to pht
563  val s2_evict_entry = RegEnable(s1_evict_entry, s1_alloc)
564  val s2_evict_valid = RegNext(s1_alloc && s1_evict_valid, false.B)
565  val s2_paddr_valid = RegEnable(s1_pf_gen_paddr_valid, s1_pf_gen_valid)
566  val s2_pf_gen_region_tag = RegEnable(s1_pf_gen_region_tag, s1_pf_gen_valid)
567  val s2_pf_gen_decr_mode = RegEnable(s1_pf_gen_decr_mode, s1_pf_gen_valid)
568  val s2_pf_gen_region_paddr = RegEnable(s1_pf_gen_region_addr, s1_pf_gen_valid)
569  val s2_pf_gen_alias_bits = RegEnable(get_alias_bits(s1_pf_gen_vaddr), s1_pf_gen_valid)
570  val s2_pf_gen_region_bits = RegEnable(s1_pf_gen_region_bits, s1_pf_gen_valid)
571  val s2_pf_gen_valid = RegNext(s1_pf_gen_valid, false.B)
572  val s2_pht_lookup_valid = RegNext(s1_pht_lookup_valid, false.B) && !io.s2_stride_hit
573  val s2_pht_lookup = RegEnable(s1_pht_lookup, s1_pht_lookup_valid)
574
575  io.s2_evict.valid := s2_evict_valid
576  io.s2_evict.bits := s2_evict_entry
577
578  io.s2_pf_gen_req.bits.region_tag := s2_pf_gen_region_tag
579  io.s2_pf_gen_req.bits.region_addr := s2_pf_gen_region_paddr
580  io.s2_pf_gen_req.bits.alias_bits := s2_pf_gen_alias_bits
581  io.s2_pf_gen_req.bits.region_bits := s2_pf_gen_region_bits
582  io.s2_pf_gen_req.bits.paddr_valid := s2_paddr_valid
583  io.s2_pf_gen_req.bits.decr_mode := s2_pf_gen_decr_mode
584  io.s2_pf_gen_req.valid := s2_pf_gen_valid
585  io.s2_pf_gen_req.bits.debug_source_type := HW_PREFETCH_AGT.U
586
587  io.s2_pht_lookup.valid := s2_pht_lookup_valid
588  io.s2_pht_lookup.bits := s2_pht_lookup
589
590  XSPerfAccumulate("sms_agt_in", io.s0_lookup.valid)
591  XSPerfAccumulate("sms_agt_alloc", s1_alloc) // cross region match or filter evict
592  XSPerfAccumulate("sms_agt_update", s1_update) // entry hit
593  XSPerfAccumulate("sms_agt_pf_gen", io.s2_pf_gen_req.valid)
594  XSPerfAccumulate("sms_agt_pf_gen_paddr_valid",
595    io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.paddr_valid
596  )
597  XSPerfAccumulate("sms_agt_pf_gen_decr_mode",
598    io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.decr_mode
599  )
600  for(i <- 0 until smsParams.active_gen_table_size){
601    XSPerfAccumulate(s"sms_agt_access_entry_$i",
602      s1_alloc && s1_replace_mask(i) || s1_update && s1_update_mask(i)
603    )
604  }
605
606}
607
608class PhtLookup()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
609  val pht_index = UInt(PHT_INDEX_BITS.W)
610  val pht_tag = UInt(PHT_TAG_BITS.W)
611  val region_paddr = UInt(REGION_ADDR_BITS.W)
612  val region_vaddr = UInt(REGION_ADDR_BITS.W)
613  val region_offset = UInt(REGION_OFFSET.W)
614}
615
616class PhtEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
617  val hist = Vec(2 * (REGION_BLKS - 1), UInt(PHT_HIST_BITS.W))
618  val tag = UInt(PHT_TAG_BITS.W)
619  val decr_mode = Bool()
620}
621
622class PatternHistoryTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
623  val io = IO(new Bundle() {
624    // receive agt evicted entry
625    val agt_update = Flipped(ValidIO(new AGTEntry()))
626    // at stage2, if we know agt missed, lookup pht
627    val s2_agt_lookup = Flipped(ValidIO(new PhtLookup()))
628    // pht-generated prefetch req
629    val pf_gen_req = ValidIO(new PfGenReq())
630  })
631
632  val pht_ram = Module(new SRAMTemplate[PhtEntry](new PhtEntry,
633    set = smsParams.pht_size / smsParams.pht_ways,
634    way =smsParams.pht_ways,
635    singlePort = true
636  ))
637  def PHT_SETS = smsParams.pht_size / smsParams.pht_ways
638  val pht_valids = Seq.fill(smsParams.pht_ways){
639    RegInit(VecInit(Seq.fill(PHT_SETS){false.B}))
640  }
641  val replacement = Seq.fill(PHT_SETS) { ReplacementPolicy.fromString("plru", smsParams.pht_ways) }
642
643  val lookup_queue = Module(new OverrideableQueue(new PhtLookup, smsParams.pht_lookup_queue_size))
644  lookup_queue.io.in := io.s2_agt_lookup
645  val lookup = lookup_queue.io.out
646
647  val evict_queue = Module(new OverrideableQueue(new AGTEntry, smsParams.pht_lookup_queue_size))
648  evict_queue.io.in := io.agt_update
649  val evict = evict_queue.io.out
650
651  XSPerfAccumulate("sms_pht_lookup_in", lookup_queue.io.in.fire)
652  XSPerfAccumulate("sms_pht_lookup_out", lookup_queue.io.out.fire)
653  XSPerfAccumulate("sms_pht_evict_in", evict_queue.io.in.fire)
654  XSPerfAccumulate("sms_pht_evict_out", evict_queue.io.out.fire)
655
656  val s3_ram_en = Wire(Bool())
657  val s1_valid = Wire(Bool())
658  // if s1.raddr == s2.waddr or s3 is using ram port, block s1
659  val s1_wait = Wire(Bool())
660  // pipe s0: select an op from [lookup, update], generate ram read addr
661  val s0_valid = lookup.valid || evict.valid
662
663  evict.ready := !s1_valid || !s1_wait
664  lookup.ready := evict.ready && !evict.valid
665
666  val s0_ram_raddr = Mux(evict.valid,
667    evict.bits.pht_index,
668    lookup.bits.pht_index
669  )
670  val s0_tag = Mux(evict.valid, evict.bits.pht_tag, lookup.bits.pht_tag)
671  val s0_region_offset = Mux(evict.valid, evict.bits.region_offset, lookup.bits.region_offset)
672  val s0_region_paddr = lookup.bits.region_paddr
673  val s0_region_vaddr = lookup.bits.region_vaddr
674  val s0_region_bits = evict.bits.region_bits
675  val s0_decr_mode = evict.bits.decr_mode
676  val s0_evict = evict.valid
677
678  // pipe s1: send addr to ram
679  val s1_valid_r = RegInit(false.B)
680  s1_valid_r := Mux(s1_valid && s1_wait, true.B, s0_valid)
681  s1_valid := s1_valid_r
682  val s1_reg_en = s0_valid && (!s1_wait || !s1_valid)
683  val s1_ram_raddr = RegEnable(s0_ram_raddr, s1_reg_en)
684  val s1_tag = RegEnable(s0_tag, s1_reg_en)
685  val s1_region_bits = RegEnable(s0_region_bits, s1_reg_en)
686  val s1_decr_mode = RegEnable(s0_decr_mode, s1_reg_en)
687  val s1_region_paddr = RegEnable(s0_region_paddr, s1_reg_en)
688  val s1_region_vaddr = RegEnable(s0_region_vaddr, s1_reg_en)
689  val s1_region_offset = RegEnable(s0_region_offset, s1_reg_en)
690  val s1_pht_valids = pht_valids.map(way => Mux1H(
691    (0 until PHT_SETS).map(i => i.U === s1_ram_raddr),
692    way
693  ))
694  val s1_evict = RegEnable(s0_evict, s1_reg_en)
695  val s1_replace_way = Mux1H(
696    (0 until PHT_SETS).map(i => i.U === s1_ram_raddr),
697    replacement.map(_.way)
698  )
699  val s1_hist_update_mask = Cat(
700    Fill(REGION_BLKS - 1, true.B), 0.U((REGION_BLKS - 1).W)
701  ) >> s1_region_offset
702  val s1_hist_bits = Cat(
703    s1_region_bits.head(REGION_BLKS - 1) >> s1_region_offset,
704    (Cat(
705      s1_region_bits.tail(1), 0.U((REGION_BLKS - 1).W)
706    ) >> s1_region_offset)(REGION_BLKS - 2, 0)
707  )
708
709  // pipe s2: generate ram write addr/data
710  val s2_valid = RegNext(s1_valid && !s1_wait, false.B)
711  val s2_reg_en = s1_valid && !s1_wait
712  val s2_hist_update_mask = RegEnable(s1_hist_update_mask, s2_reg_en)
713  val s2_hist_bits = RegEnable(s1_hist_bits, s2_reg_en)
714  val s2_tag = RegEnable(s1_tag, s2_reg_en)
715  val s2_region_bits = RegEnable(s1_region_bits, s2_reg_en)
716  val s2_decr_mode = RegEnable(s1_decr_mode, s2_reg_en)
717  val s2_region_paddr = RegEnable(s1_region_paddr, s2_reg_en)
718  val s2_region_vaddr = RegEnable(s1_region_vaddr, s2_reg_en)
719  val s2_region_offset = RegEnable(s1_region_offset, s2_reg_en)
720  val s2_region_offset_mask = region_offset_to_bits(s2_region_offset)
721  val s2_evict = RegEnable(s1_evict, s2_reg_en)
722  val s2_pht_valids = s1_pht_valids.map(v => RegEnable(v, s2_reg_en))
723  val s2_replace_way = RegEnable(s1_replace_way, s2_reg_en)
724  val s2_ram_waddr = RegEnable(s1_ram_raddr, s2_reg_en)
725  val s2_ram_rdata = pht_ram.io.r.resp.data
726  val s2_ram_rtags = s2_ram_rdata.map(_.tag)
727  val s2_tag_match_vec = s2_ram_rtags.map(t => t === s2_tag)
728  val s2_hit_vec = s2_tag_match_vec.zip(s2_pht_valids).map({
729    case (tag_match, v) => v && tag_match
730  })
731  val s2_hist_update = s2_ram_rdata.map(way => VecInit(way.hist.zipWithIndex.map({
732    case (h, i) =>
733      val do_update = s2_hist_update_mask(i)
734      val hist_updated = Mux(s2_hist_bits(i),
735        Mux(h.andR, h, h + 1.U),
736        Mux(h === 0.U, 0.U, h - 1.U)
737      )
738      Mux(do_update, hist_updated, h)
739  })))
740  val s2_hist_pf_gen = Mux1H(s2_hit_vec, s2_ram_rdata.map(way => VecInit(way.hist.map(_.head(1))).asUInt))
741  val s2_new_hist = VecInit(s2_hist_bits.asBools.map(b => Cat(0.U((PHT_HIST_BITS - 1).W), b)))
742  val s2_pht_hit = Cat(s2_hit_vec).orR
743  val s2_hist = Mux(s2_pht_hit, Mux1H(s2_hit_vec, s2_hist_update), s2_new_hist)
744  val s2_repl_way_mask = UIntToOH(s2_replace_way)
745
746  // pipe s3: send addr/data to ram, gen pf_req
747  val s3_valid = RegNext(s2_valid, false.B)
748  val s3_evict = RegEnable(s2_evict, s2_valid)
749  val s3_hist = RegEnable(s2_hist, s2_valid)
750  val s3_hist_pf_gen = RegEnable(s2_hist_pf_gen, s2_valid)
751  val s3_hist_update_mask = RegEnable(s2_hist_update_mask.asUInt, s2_valid)
752  val s3_region_offset = RegEnable(s2_region_offset, s2_valid)
753  val s3_region_offset_mask = RegEnable(s2_region_offset_mask, s2_valid)
754  val s3_decr_mode = RegEnable(s2_decr_mode, s2_valid)
755  val s3_region_paddr = RegEnable(s2_region_paddr, s2_valid)
756  val s3_region_vaddr = RegEnable(s2_region_vaddr, s2_valid)
757  val s3_pht_tag = RegEnable(s2_tag, s2_valid)
758  val s3_hit_vec = s2_hit_vec.map(h => RegEnable(h, s2_valid))
759  val s3_hit = Cat(s3_hit_vec).orR
760  val s3_hit_way = OHToUInt(s3_hit_vec)
761  val s3_repl_way = RegEnable(s2_replace_way, s2_valid)
762  val s3_repl_way_mask = RegEnable(s2_repl_way_mask, s2_valid)
763  val s3_repl_update_mask = RegEnable(VecInit((0 until PHT_SETS).map(i => i.U === s2_ram_waddr)), s2_valid)
764  val s3_ram_waddr = RegEnable(s2_ram_waddr, s2_valid)
765  s3_ram_en := s3_valid && s3_evict
766  val s3_ram_wdata = Wire(new PhtEntry())
767  s3_ram_wdata.hist := s3_hist
768  s3_ram_wdata.tag := s3_pht_tag
769  s3_ram_wdata.decr_mode := s3_decr_mode
770
771  s1_wait := (s2_valid && s2_evict && s2_ram_waddr === s1_ram_raddr) || s3_ram_en
772
773  for((valids, way_idx) <- pht_valids.zipWithIndex){
774    val update_way = s3_repl_way_mask(way_idx)
775    for((v, set_idx) <- valids.zipWithIndex){
776      val update_set = s3_repl_update_mask(set_idx)
777      when(s3_valid && s3_evict && !s3_hit && update_set && update_way){
778        v := true.B
779      }
780    }
781  }
782  for((r, i) <- replacement.zipWithIndex){
783    when(s3_valid && s3_repl_update_mask(i)){
784      when(s3_hit){
785        r.access(s3_hit_way)
786      }.elsewhen(s3_evict){
787        r.access(s3_repl_way)
788      }
789    }
790  }
791
792  val s3_way_mask = Mux(s3_hit,
793    VecInit(s3_hit_vec).asUInt,
794    s3_repl_way_mask,
795  ).asUInt
796
797  pht_ram.io.r(
798    s1_valid, s1_ram_raddr
799  )
800  pht_ram.io.w(
801    s3_ram_en, s3_ram_wdata, s3_ram_waddr, s3_way_mask
802  )
803
804  when(s3_valid && s3_hit){
805    assert(!Cat(s3_hit_vec).andR, "sms_pht: multi-hit!")
806  }
807
808  // generate pf req if hit
809  val s3_hist_hi = s3_hist_pf_gen.head(REGION_BLKS - 1)
810  val s3_hist_lo = s3_hist_pf_gen.tail(REGION_BLKS - 1)
811  val s3_hist_hi_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_hi) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0)
812  val s3_hist_lo_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_lo) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0)
813  val s3_cur_region_bits = Cat(s3_hist_hi_shifted.tail(REGION_BLKS - 1), 0.U(1.W)) |
814    Cat(0.U(1.W), s3_hist_lo_shifted.head(REGION_BLKS - 1))
815  val s3_incr_region_bits = Cat(0.U(1.W), s3_hist_hi_shifted.head(REGION_BLKS - 1))
816  val s3_decr_region_bits = Cat(s3_hist_lo_shifted.tail(REGION_BLKS - 1), 0.U(1.W))
817  val s3_pf_gen_valid = s3_valid && s3_hit && !s3_evict
818  val s3_cur_region_valid =  s3_pf_gen_valid && (s3_hist_pf_gen & s3_hist_update_mask).orR
819  val s3_incr_region_valid = s3_pf_gen_valid && (s3_hist_hi & (~s3_hist_update_mask.head(REGION_BLKS - 1)).asUInt).orR
820  val s3_decr_region_valid = s3_pf_gen_valid && (s3_hist_lo & (~s3_hist_update_mask.tail(REGION_BLKS - 1)).asUInt).orR
821  val s3_incr_region_vaddr = s3_region_vaddr + 1.U
822  val s3_incr_alias_bits = get_alias_bits(s3_incr_region_vaddr)
823  val s3_decr_region_vaddr = s3_region_vaddr - 1.U
824  val s3_decr_alias_bits = get_alias_bits(s3_decr_region_vaddr)
825  val s3_incr_region_paddr = Cat(
826    s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT),
827    s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)
828  )
829  val s3_decr_region_paddr = Cat(
830    s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT),
831    s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)
832  )
833  val s3_incr_crosspage = s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT)
834  val s3_decr_crosspage = s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT)
835  val s3_cur_region_tag = region_hash_tag(s3_region_vaddr)
836  val s3_incr_region_tag = region_hash_tag(s3_incr_region_vaddr)
837  val s3_decr_region_tag = region_hash_tag(s3_decr_region_vaddr)
838
839  val pf_gen_req_arb = Module(new Arbiter(new PfGenReq, 3))
840  val s4_pf_gen_cur_region_valid = RegInit(false.B)
841  val s4_pf_gen_cur_region = Reg(new PfGenReq)
842  val s4_pf_gen_incr_region_valid = RegInit(false.B)
843  val s4_pf_gen_incr_region = Reg(new PfGenReq)
844  val s4_pf_gen_decr_region_valid = RegInit(false.B)
845  val s4_pf_gen_decr_region = Reg(new PfGenReq)
846
847  s4_pf_gen_cur_region_valid := s3_cur_region_valid
848  when(s3_cur_region_valid){
849    s4_pf_gen_cur_region.region_addr := s3_region_paddr
850    s4_pf_gen_cur_region.alias_bits := get_alias_bits(s3_region_vaddr)
851    s4_pf_gen_cur_region.region_tag := s3_cur_region_tag
852    s4_pf_gen_cur_region.region_bits := s3_cur_region_bits
853    s4_pf_gen_cur_region.paddr_valid := true.B
854    s4_pf_gen_cur_region.decr_mode := false.B
855  }
856  s4_pf_gen_incr_region_valid := s3_incr_region_valid ||
857    (!pf_gen_req_arb.io.in(1).ready && s4_pf_gen_incr_region_valid)
858  when(s3_incr_region_valid){
859    s4_pf_gen_incr_region.region_addr := Mux(s3_incr_crosspage, s3_incr_region_vaddr, s3_incr_region_paddr)
860    s4_pf_gen_incr_region.alias_bits := s3_incr_alias_bits
861    s4_pf_gen_incr_region.region_tag := s3_incr_region_tag
862    s4_pf_gen_incr_region.region_bits := s3_incr_region_bits
863    s4_pf_gen_incr_region.paddr_valid := !s3_incr_crosspage
864    s4_pf_gen_incr_region.decr_mode := false.B
865  }
866  s4_pf_gen_decr_region_valid := s3_decr_region_valid ||
867    (!pf_gen_req_arb.io.in(2).ready && s4_pf_gen_decr_region_valid)
868  when(s3_decr_region_valid){
869    s4_pf_gen_decr_region.region_addr := Mux(s3_decr_crosspage, s3_decr_region_vaddr, s3_decr_region_paddr)
870    s4_pf_gen_decr_region.alias_bits := s3_decr_alias_bits
871    s4_pf_gen_decr_region.region_tag := s3_decr_region_tag
872    s4_pf_gen_decr_region.region_bits := s3_decr_region_bits
873    s4_pf_gen_decr_region.paddr_valid := !s3_decr_crosspage
874    s4_pf_gen_decr_region.decr_mode := true.B
875  }
876
877  pf_gen_req_arb.io.in.head.valid := s4_pf_gen_cur_region_valid
878  pf_gen_req_arb.io.in.head.bits := s4_pf_gen_cur_region
879  pf_gen_req_arb.io.in.head.bits.debug_source_type := HW_PREFETCH_PHT_CUR.U
880  pf_gen_req_arb.io.in(1).valid := s4_pf_gen_incr_region_valid
881  pf_gen_req_arb.io.in(1).bits := s4_pf_gen_incr_region
882  pf_gen_req_arb.io.in(1).bits.debug_source_type := HW_PREFETCH_PHT_INC.U
883  pf_gen_req_arb.io.in(2).valid := s4_pf_gen_decr_region_valid
884  pf_gen_req_arb.io.in(2).bits := s4_pf_gen_decr_region
885  pf_gen_req_arb.io.in(2).bits.debug_source_type := HW_PREFETCH_PHT_DEC.U
886  pf_gen_req_arb.io.out.ready := true.B
887
888  io.pf_gen_req.valid := pf_gen_req_arb.io.out.valid
889  io.pf_gen_req.bits := pf_gen_req_arb.io.out.bits
890
891  XSPerfAccumulate("sms_pht_update", io.agt_update.valid)
892  XSPerfAccumulate("sms_pht_update_hit", s2_valid && s2_evict && s2_pht_hit)
893  XSPerfAccumulate("sms_pht_lookup", io.s2_agt_lookup.valid)
894  XSPerfAccumulate("sms_pht_lookup_hit", s2_valid && !s2_evict && s2_pht_hit)
895  for(i <- 0 until smsParams.pht_ways){
896    XSPerfAccumulate(s"sms_pht_write_way_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.waymask.get(i))
897  }
898  for(i <- 0 until PHT_SETS){
899    XSPerfAccumulate(s"sms_pht_write_set_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.setIdx === i.U)
900  }
901  XSPerfAccumulate(s"sms_pht_pf_gen", io.pf_gen_req.valid)
902}
903
904class PrefetchFilterEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
905  val region_tag = UInt(REGION_TAG_WIDTH.W)
906  val region_addr = UInt(REGION_ADDR_BITS.W)
907  val region_bits = UInt(REGION_BLKS.W)
908  val filter_bits = UInt(REGION_BLKS.W)
909  val alias_bits = UInt(2.W)
910  val paddr_valid = Bool()
911  val decr_mode = Bool()
912  val debug_source_type = UInt(log2Up(nSourceType).W)
913}
914
915class PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
916  val io = IO(new Bundle() {
917    val gen_req = Flipped(ValidIO(new PfGenReq()))
918    val tlb_req = new TlbRequestIO(2)
919    val l2_pf_addr = ValidIO(UInt(PAddrBits.W))
920    val pf_alias_bits = Output(UInt(2.W))
921    val debug_source_type = Output(UInt(log2Up(nSourceType).W))
922  })
923  val entries = Seq.fill(smsParams.pf_filter_size){ Reg(new PrefetchFilterEntry()) }
924  val valids = Seq.fill(smsParams.pf_filter_size){ RegInit(false.B) }
925  val replacement = ReplacementPolicy.fromString("plru", smsParams.pf_filter_size)
926
927  val prev_valid = RegNext(io.gen_req.valid, false.B)
928  val prev_gen_req = RegEnable(io.gen_req.bits, io.gen_req.valid)
929
930  val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, smsParams.pf_filter_size))
931  val pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), smsParams.pf_filter_size))
932
933  io.tlb_req.req <> tlb_req_arb.io.out
934  io.tlb_req.resp.ready := true.B
935  io.tlb_req.req_kill := false.B
936  io.l2_pf_addr.valid := pf_req_arb.io.out.valid
937  io.l2_pf_addr.bits := pf_req_arb.io.out.bits
938  io.pf_alias_bits := Mux1H(entries.zipWithIndex.map({
939    case (entry, i) => (i.U === pf_req_arb.io.chosen) -> entry.alias_bits
940  }))
941  pf_req_arb.io.out.ready := true.B
942
943  io.debug_source_type := VecInit(entries.map(_.debug_source_type))(pf_req_arb.io.chosen)
944
945  val s1_valid = Wire(Bool())
946  val s1_hit = Wire(Bool())
947  val s1_replace_vec = Wire(UInt(smsParams.pf_filter_size.W))
948  val s1_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W))
949
950  // s0: entries lookup
951  val s0_gen_req = io.gen_req.bits
952  val s0_match_prev = prev_valid && (s0_gen_req.region_tag === prev_gen_req.region_tag)
953  val s0_gen_req_valid = io.gen_req.valid && !s0_match_prev
954  val s0_match_vec = valids.indices.map(i => {
955    valids(i) && entries(i).region_tag === s0_gen_req.region_tag && !(s1_valid && !s1_hit && s1_replace_vec(i))
956  })
957  val s0_any_matched = Cat(s0_match_vec).orR
958  val s0_replace_vec = UIntToOH(replacement.way)
959  val s0_hit = s0_gen_req_valid && s0_any_matched
960
961  for(((v, ent), i) <- valids.zip(entries).zipWithIndex){
962    val is_evicted = s1_valid && s1_replace_vec(i)
963    tlb_req_arb.io.in(i).valid := v && !s1_tlb_fire_vec(i) && !ent.paddr_valid && !is_evicted
964    tlb_req_arb.io.in(i).bits.vaddr := Cat(ent.region_addr, 0.U(log2Up(REGION_SIZE).W))
965    tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read
966    tlb_req_arb.io.in(i).bits.size := 3.U
967    tlb_req_arb.io.in(i).bits.debug.robIdx := DontCare
968    tlb_req_arb.io.in(i).bits.no_translate := false.B
969    tlb_req_arb.io.in(i).bits.debug := DontCare
970
971    val pending_req_vec = ent.region_bits & (~ent.filter_bits).asUInt
972    val first_one_offset = PriorityMux(
973      pending_req_vec.asBools,
974      (0 until smsParams.pf_filter_size).map(_.U(REGION_OFFSET.W))
975    )
976    val last_one_offset = PriorityMux(
977      pending_req_vec.asBools.reverse,
978      (0 until smsParams.pf_filter_size).reverse.map(_.U(REGION_OFFSET.W))
979    )
980    val pf_addr = Cat(
981      ent.region_addr,
982      Mux(ent.decr_mode, last_one_offset, first_one_offset),
983      0.U(log2Up(dcacheParameters.blockBytes).W)
984    )
985    pf_req_arb.io.in(i).valid := v && Cat(pending_req_vec).orR && ent.paddr_valid && !is_evicted
986    pf_req_arb.io.in(i).bits := pf_addr
987  }
988
989  val s0_tlb_fire_vec = VecInit(tlb_req_arb.io.in.map(_.fire))
990  val s0_pf_fire_vec = VecInit(pf_req_arb.io.in.map(_.fire))
991
992  val s0_update_way = OHToUInt(s0_match_vec)
993  val s0_replace_way = replacement.way
994  val s0_access_way = Mux(s0_any_matched, s0_update_way, s0_replace_way)
995  when(s0_gen_req_valid){
996    replacement.access(s0_access_way)
997  }
998
999  // s1: update or alloc
1000  val s1_valid_r = RegNext(s0_gen_req_valid, false.B)
1001  val s1_hit_r = RegEnable(s0_hit, false.B, s0_gen_req_valid)
1002  val s1_gen_req = RegEnable(s0_gen_req, s0_gen_req_valid)
1003  val s1_replace_vec_r = RegEnable(s0_replace_vec, s0_gen_req_valid && !s0_hit)
1004  val s1_update_vec = RegEnable(VecInit(s0_match_vec).asUInt, s0_gen_req_valid && s0_hit)
1005  val s1_tlb_fire_vec_r = RegNext(s0_tlb_fire_vec, 0.U.asTypeOf(s0_tlb_fire_vec))
1006  val s1_alloc_entry = Wire(new PrefetchFilterEntry())
1007  s1_valid := s1_valid_r
1008  s1_hit := s1_hit_r
1009  s1_replace_vec := s1_replace_vec_r
1010  s1_tlb_fire_vec := s1_tlb_fire_vec_r.asUInt
1011  s1_alloc_entry.region_tag := s1_gen_req.region_tag
1012  s1_alloc_entry.region_addr := s1_gen_req.region_addr
1013  s1_alloc_entry.region_bits := s1_gen_req.region_bits
1014  s1_alloc_entry.paddr_valid := s1_gen_req.paddr_valid
1015  s1_alloc_entry.decr_mode := s1_gen_req.decr_mode
1016  s1_alloc_entry.filter_bits := 0.U
1017  s1_alloc_entry.alias_bits := s1_gen_req.alias_bits
1018  s1_alloc_entry.debug_source_type := s1_gen_req.debug_source_type
1019  for(((v, ent), i) <- valids.zip(entries).zipWithIndex){
1020    val alloc = s1_valid && !s1_hit && s1_replace_vec(i)
1021    val update = s1_valid && s1_hit && s1_update_vec(i)
1022    // for pf: use s0 data
1023    val pf_fired = s0_pf_fire_vec(i)
1024    val tlb_fired = s1_tlb_fire_vec(i) && !io.tlb_req.resp.bits.miss
1025    when(tlb_fired){
1026      ent.paddr_valid := !io.tlb_req.resp.bits.miss
1027      ent.region_addr := region_addr(io.tlb_req.resp.bits.paddr.head)
1028    }
1029    when(update){
1030      ent.region_bits := ent.region_bits | s1_gen_req.region_bits
1031    }
1032    when(pf_fired){
1033      val curr_bit = UIntToOH(block_addr(pf_req_arb.io.in(i).bits)(REGION_OFFSET - 1, 0))
1034      ent.filter_bits := ent.filter_bits | curr_bit
1035    }
1036    when(alloc){
1037      ent := s1_alloc_entry
1038      v := true.B
1039    }
1040  }
1041  when(s1_valid && s1_hit){
1042    assert(PopCount(s1_update_vec) === 1.U, "sms_pf_filter: multi-hit")
1043  }
1044
1045  XSPerfAccumulate("sms_pf_filter_recv_req", io.gen_req.valid)
1046  XSPerfAccumulate("sms_pf_filter_hit", s1_valid && s1_hit)
1047  XSPerfAccumulate("sms_pf_filter_tlb_req", io.tlb_req.req.fire)
1048  XSPerfAccumulate("sms_pf_filter_tlb_resp_miss", io.tlb_req.resp.fire && io.tlb_req.resp.bits.miss)
1049  for(i <- 0 until smsParams.pf_filter_size){
1050    XSPerfAccumulate(s"sms_pf_filter_access_way_$i", s0_gen_req_valid && s0_access_way === i.U)
1051  }
1052  XSPerfAccumulate("sms_pf_filter_l2_req", io.l2_pf_addr.valid)
1053}
1054
1055class SMSPrefetcher()(implicit p: Parameters) extends BasePrefecher with HasSMSModuleHelper {
1056
1057  require(exuParameters.LduCnt == 2)
1058
1059  val io_agt_en = IO(Input(Bool()))
1060  val io_stride_en = IO(Input(Bool()))
1061  val io_pht_en = IO(Input(Bool()))
1062  val io_act_threshold = IO(Input(UInt(REGION_OFFSET.W)))
1063  val io_act_stride = IO(Input(UInt(6.W)))
1064
1065  val ld_curr = io.ld_in.map(_.bits)
1066  val ld_curr_block_tag = ld_curr.map(x => block_hash_tag(x.vaddr))
1067
1068  // block filter
1069  val ld_prev = io.ld_in.map(ld => RegEnable(ld.bits, ld.valid))
1070  val ld_prev_block_tag = ld_curr_block_tag.zip(io.ld_in.map(_.valid)).map({
1071    case (tag, v) => RegEnable(tag, v)
1072  })
1073  val ld_prev_vld = io.ld_in.map(ld => RegNext(ld.valid, false.B))
1074
1075  val ld_curr_match_prev = ld_curr_block_tag.map(cur_tag =>
1076    Cat(ld_prev_block_tag.zip(ld_prev_vld).map({
1077      case (prev_tag, prev_vld) => prev_vld && prev_tag === cur_tag
1078    })).orR
1079  )
1080  val ld0_match_ld1 = io.ld_in.head.valid && io.ld_in.last.valid && ld_curr_block_tag.head === ld_curr_block_tag.last
1081  val ld_curr_vld = Seq(
1082    io.ld_in.head.valid && !ld_curr_match_prev.head,
1083    io.ld_in.last.valid && !ld_curr_match_prev.last && !ld0_match_ld1
1084  )
1085  val ld0_older_than_ld1 = Cat(ld_curr_vld).andR && isBefore(ld_curr.head.uop.robIdx, ld_curr.last.uop.robIdx)
1086  val pending_vld = RegNext(Cat(ld_curr_vld).andR, false.B)
1087  val pending_sel_ld0 = RegNext(Mux(pending_vld, ld0_older_than_ld1, !ld0_older_than_ld1))
1088  val pending_ld = Mux(pending_sel_ld0, ld_prev.head, ld_prev.last)
1089  val pending_ld_block_tag = Mux(pending_sel_ld0, ld_prev_block_tag.head, ld_prev_block_tag.last)
1090  val oldest_ld = Mux(pending_vld,
1091    pending_ld,
1092    Mux(ld0_older_than_ld1 || !ld_curr_vld.last, ld_curr.head, ld_curr.last)
1093  )
1094
1095  val train_ld = RegEnable(oldest_ld, pending_vld || Cat(ld_curr_vld).orR)
1096
1097  val train_block_tag = block_hash_tag(train_ld.vaddr)
1098  val train_region_tag = train_block_tag.head(REGION_TAG_WIDTH)
1099
1100  val train_region_addr_raw = region_addr(train_ld.vaddr)(REGION_TAG_WIDTH + 2 * VADDR_HASH_WIDTH - 1, 0)
1101  val train_region_addr_p1 = Cat(0.U(1.W), train_region_addr_raw) + 1.U
1102  val train_region_addr_m1 = Cat(0.U(1.W), train_region_addr_raw) - 1.U
1103  // addr_p1 or addr_m1 is valid?
1104  val train_allow_cross_region_p1 = !train_region_addr_p1.head(1).asBool
1105  val train_allow_cross_region_m1 = !train_region_addr_m1.head(1).asBool
1106
1107  val train_region_p1_tag = region_hash_tag(train_region_addr_p1.tail(1))
1108  val train_region_m1_tag = region_hash_tag(train_region_addr_m1.tail(1))
1109
1110  val train_region_p1_cross_page = page_bit(train_region_addr_p1) ^ page_bit(train_region_addr_raw)
1111  val train_region_m1_cross_page = page_bit(train_region_addr_m1) ^ page_bit(train_region_addr_raw)
1112
1113  val train_region_paddr = region_addr(train_ld.paddr)
1114  val train_region_vaddr = region_addr(train_ld.vaddr)
1115  val train_region_offset = train_block_tag(REGION_OFFSET - 1, 0)
1116  val train_vld = RegNext(pending_vld || Cat(ld_curr_vld).orR, false.B)
1117
1118
1119  // prefetch stage0
1120  val active_gen_table = Module(new ActiveGenerationTable())
1121  val stride = Module(new StridePF())
1122  val pht = Module(new PatternHistoryTable())
1123  val pf_filter = Module(new PrefetchFilter())
1124
1125  val train_vld_s0 = RegNext(train_vld, false.B)
1126  val train_s0 = RegEnable(train_ld, train_vld)
1127  val train_region_tag_s0 = RegEnable(train_region_tag, train_vld)
1128  val train_region_p1_tag_s0 = RegEnable(train_region_p1_tag, train_vld)
1129  val train_region_m1_tag_s0 = RegEnable(train_region_m1_tag, train_vld)
1130  val train_allow_cross_region_p1_s0 = RegEnable(train_allow_cross_region_p1, train_vld)
1131  val train_allow_cross_region_m1_s0 = RegEnable(train_allow_cross_region_m1, train_vld)
1132  val train_pht_tag_s0 = RegEnable(pht_tag(train_ld.uop.cf.pc), train_vld)
1133  val train_pht_index_s0 = RegEnable(pht_index(train_ld.uop.cf.pc), train_vld)
1134  val train_region_offset_s0 = RegEnable(train_region_offset, train_vld)
1135  val train_region_p1_cross_page_s0 = RegEnable(train_region_p1_cross_page, train_vld)
1136  val train_region_m1_cross_page_s0 = RegEnable(train_region_m1_cross_page, train_vld)
1137  val train_region_paddr_s0 = RegEnable(train_region_paddr, train_vld)
1138  val train_region_vaddr_s0 = RegEnable(train_region_vaddr, train_vld)
1139
1140  active_gen_table.io.agt_en := io_agt_en
1141  active_gen_table.io.act_threshold := io_act_threshold
1142  active_gen_table.io.act_stride := io_act_stride
1143  active_gen_table.io.s0_lookup.valid := train_vld_s0
1144  active_gen_table.io.s0_lookup.bits.region_tag := train_region_tag_s0
1145  active_gen_table.io.s0_lookup.bits.region_p1_tag := train_region_p1_tag_s0
1146  active_gen_table.io.s0_lookup.bits.region_m1_tag := train_region_m1_tag_s0
1147  active_gen_table.io.s0_lookup.bits.region_offset := train_region_offset_s0
1148  active_gen_table.io.s0_lookup.bits.pht_index := train_pht_index_s0
1149  active_gen_table.io.s0_lookup.bits.pht_tag := train_pht_tag_s0
1150  active_gen_table.io.s0_lookup.bits.allow_cross_region_p1 := train_allow_cross_region_p1_s0
1151  active_gen_table.io.s0_lookup.bits.allow_cross_region_m1 := train_allow_cross_region_m1_s0
1152  active_gen_table.io.s0_lookup.bits.region_p1_cross_page := train_region_p1_cross_page_s0
1153  active_gen_table.io.s0_lookup.bits.region_m1_cross_page := train_region_m1_cross_page_s0
1154  active_gen_table.io.s0_lookup.bits.region_paddr := train_region_paddr_s0
1155  active_gen_table.io.s0_lookup.bits.region_vaddr := train_region_vaddr_s0
1156  active_gen_table.io.s2_stride_hit := stride.io.s2_gen_req.valid
1157
1158  stride.io.stride_en := io_stride_en
1159  stride.io.s0_lookup.valid := train_vld_s0
1160  stride.io.s0_lookup.bits.pc := train_s0.uop.cf.pc(STRIDE_PC_BITS - 1, 0)
1161  stride.io.s0_lookup.bits.vaddr := Cat(
1162    train_region_vaddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W)
1163  )
1164  stride.io.s0_lookup.bits.paddr := Cat(
1165    train_region_paddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W)
1166  )
1167  stride.io.s1_valid := active_gen_table.io.s1_sel_stride
1168
1169  pht.io.s2_agt_lookup := active_gen_table.io.s2_pht_lookup
1170  pht.io.agt_update := active_gen_table.io.s2_evict
1171
1172  val pht_gen_valid = pht.io.pf_gen_req.valid && io_pht_en
1173  val agt_gen_valid = active_gen_table.io.s2_pf_gen_req.valid
1174  val stride_gen_valid = stride.io.s2_gen_req.valid
1175  val pf_gen_req = Mux(agt_gen_valid || stride_gen_valid,
1176    Mux1H(Seq(
1177      agt_gen_valid -> active_gen_table.io.s2_pf_gen_req.bits,
1178      stride_gen_valid -> stride.io.s2_gen_req.bits
1179    )),
1180    pht.io.pf_gen_req.bits
1181  )
1182  assert(!(agt_gen_valid && stride_gen_valid))
1183  pf_filter.io.gen_req.valid := pht_gen_valid || agt_gen_valid || stride_gen_valid
1184  pf_filter.io.gen_req.bits := pf_gen_req
1185  io.tlb_req <> pf_filter.io.tlb_req
1186  val is_valid_address = pf_filter.io.l2_pf_addr.bits > 0x80000000L.U
1187  io.pf_addr.valid := pf_filter.io.l2_pf_addr.valid && io.enable && is_valid_address
1188  io.pf_addr.bits := pf_filter.io.l2_pf_addr.bits
1189  io.l1_req.bits.paddr := pf_filter.io.l2_pf_addr.bits
1190  io.l1_req.bits.alias := pf_filter.io.pf_alias_bits
1191  io.l1_req.bits.is_store := true.B
1192  io.l1_req.bits.confidence := 1.U
1193  io.l1_req.valid := false.B
1194
1195  for((train, i) <- io.ld_in.zipWithIndex){
1196    XSPerfAccumulate(s"pf_train_miss_${i}", train.valid && train.bits.miss)
1197    XSPerfAccumulate(s"pf_train_prefetched_${i}", train.valid && train.bits.meta_prefetch)
1198  }
1199  val trace = Wire(new L1MissTrace)
1200  trace.vaddr := 0.U
1201  trace.pc := 0.U
1202  trace.paddr := io.pf_addr.bits
1203  trace.source := pf_filter.io.debug_source_type
1204  val table = ChiselDB.createTable("L1MissTrace", new L1MissTrace)
1205  table.log(trace, io.pf_addr.fire, "SMSPrefetcher", clock, reset)
1206
1207  XSPerfAccumulate("sms_pf_gen_conflict",
1208    pht_gen_valid && agt_gen_valid
1209  )
1210  XSPerfAccumulate("sms_pht_disabled", pht.io.pf_gen_req.valid && !io_pht_en)
1211  XSPerfAccumulate("sms_agt_disabled", active_gen_table.io.s2_pf_gen_req.valid && !io_agt_en)
1212  XSPerfAccumulate("sms_pf_real_issued", io.pf_addr.valid)
1213  XSPerfAccumulate("sms_l1_req_valid", io.l1_req.valid)
1214  XSPerfAccumulate("sms_l1_req_fire", io.l1_req.fire)
1215}