xref: /XiangShan/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala (revision bb2f3f51dd67f6e16e0cc1ffe43368c9fc7e4aef)
1package xiangshan.mem.prefetch
2
3import org.chipsalliance.cde.config.Parameters
4import chisel3._
5import chisel3.util._
6import xiangshan._
7import utils._
8import utility._
9import xiangshan.cache.HasDCacheParameters
10import xiangshan.cache.mmu._
11import xiangshan.mem.{LdPrefetchTrainBundle, StPrefetchTrainBundle, L1PrefetchReq}
12import xiangshan.mem.trace._
13import xiangshan.mem.HasL1PrefetchSourceParameter
14
15case class SMSParams
16(
17  region_size: Int = 1024,
18  vaddr_hash_width: Int = 5,
19  block_addr_raw_width: Int = 10,
20  stride_pc_bits: Int = 10,
21  max_stride: Int = 1024,
22  stride_entries: Int = 16,
23  active_gen_table_size: Int = 16,
24  pht_size: Int = 64,
25  pht_ways: Int = 2,
26  pht_hist_bits: Int = 2,
27  pht_tag_bits: Int = 13,
28  pht_lookup_queue_size: Int = 4,
29  pf_filter_size: Int = 16,
30  train_filter_size: Int = 8
31) extends PrefetcherParams
32
33trait HasSMSModuleHelper extends HasCircularQueuePtrHelper with HasDCacheParameters
34{ this: HasXSParameter =>
35  val smsParams = coreParams.prefetcher.get.asInstanceOf[SMSParams]
36  val BLK_ADDR_WIDTH = VAddrBits - log2Up(dcacheParameters.blockBytes)
37  val REGION_SIZE = smsParams.region_size
38  val REGION_BLKS = smsParams.region_size / dcacheParameters.blockBytes
39  val REGION_ADDR_BITS = VAddrBits - log2Up(REGION_SIZE)
40  val REGION_OFFSET = log2Up(REGION_BLKS)
41  val VADDR_HASH_WIDTH = smsParams.vaddr_hash_width
42  val BLK_ADDR_RAW_WIDTH = smsParams.block_addr_raw_width
43  val REGION_ADDR_RAW_WIDTH = BLK_ADDR_RAW_WIDTH - REGION_OFFSET
44  val BLK_TAG_WIDTH = BLK_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH
45  val REGION_TAG_WIDTH = REGION_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH
46  val PHT_INDEX_BITS = log2Up(smsParams.pht_size / smsParams.pht_ways)
47  val PHT_TAG_BITS = smsParams.pht_tag_bits
48  val PHT_HIST_BITS = smsParams.pht_hist_bits
49  // page bit index in block addr
50  val BLOCK_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / dcacheParameters.blockBytes)
51  val REGION_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / smsParams.region_size)
52  val STRIDE_PC_BITS = smsParams.stride_pc_bits
53  val STRIDE_BLK_ADDR_BITS = log2Up(smsParams.max_stride)
54
55  def block_addr(x: UInt): UInt = {
56    val offset = log2Up(dcacheParameters.blockBytes)
57    x(x.getWidth - 1, offset)
58  }
59
60  def region_addr(x: UInt): UInt = {
61    val offset = log2Up(REGION_SIZE)
62    x(x.getWidth - 1, offset)
63  }
64
65  def region_offset_to_bits(off: UInt): UInt = {
66    (1.U << off).asUInt
67  }
68
69  def region_hash_tag(rg_addr: UInt): UInt = {
70    val low = rg_addr(REGION_ADDR_RAW_WIDTH - 1, 0)
71    val high = rg_addr(REGION_ADDR_RAW_WIDTH + 3 * VADDR_HASH_WIDTH - 1, REGION_ADDR_RAW_WIDTH)
72    val high_hash = vaddr_hash(high)
73    Cat(high_hash, low)
74  }
75
76  def page_bit(region_addr: UInt): UInt = {
77    region_addr(log2Up(dcacheParameters.pageSize/REGION_SIZE))
78  }
79
80  def block_hash_tag(x: UInt): UInt = {
81    val blk_addr = block_addr(x)
82    val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0)
83    val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
84    val high_hash = vaddr_hash(high)
85    Cat(high_hash, low)
86  }
87
88  def vaddr_hash(x: UInt): UInt = {
89    val width = VADDR_HASH_WIDTH
90    val low = x(width - 1, 0)
91    val mid = x(2 * width - 1, width)
92    val high = x(3 * width - 1, 2 * width)
93    low ^ mid ^ high
94  }
95
96  def pht_index(pc: UInt): UInt = {
97    val low_bits = pc(PHT_INDEX_BITS, 2)
98    val hi_bit = pc(1) ^ pc(PHT_INDEX_BITS+1)
99    Cat(hi_bit, low_bits)
100  }
101
102  def pht_tag(pc: UInt): UInt = {
103    pc(PHT_INDEX_BITS + 2 + PHT_TAG_BITS - 1, PHT_INDEX_BITS + 2)
104  }
105
106  def get_alias_bits(region_vaddr: UInt): UInt = {
107    val offset = log2Up(REGION_SIZE)
108    get_alias(Cat(region_vaddr, 0.U(offset.W)))
109  }
110}
111
112class StridePF()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
113  val io = IO(new Bundle() {
114    val stride_en = Input(Bool())
115    val s0_lookup = Flipped(new ValidIO(new Bundle() {
116      val pc = UInt(STRIDE_PC_BITS.W)
117      val vaddr = UInt(VAddrBits.W)
118      val paddr = UInt(PAddrBits.W)
119    }))
120    val s1_valid = Input(Bool())
121    val s2_gen_req = ValidIO(new PfGenReq())
122  })
123
124  val prev_valid = GatedValidRegNext(io.s0_lookup.valid, false.B)
125  val prev_pc = RegEnable(io.s0_lookup.bits.pc, io.s0_lookup.valid)
126
127  val s0_valid = io.s0_lookup.valid && !(prev_valid && prev_pc === io.s0_lookup.bits.pc)
128
129  def entry_map[T](fn: Int => T) = (0 until smsParams.stride_entries).map(fn)
130
131  val replacement = ReplacementPolicy.fromString("plru", smsParams.stride_entries)
132  val valids = entry_map(_ => RegInit(false.B))
133  val entries_pc = entry_map(_ => Reg(UInt(STRIDE_PC_BITS.W)) )
134  val entries_conf = entry_map(_ => RegInit(1.U(2.W)))
135  val entries_last_addr = entry_map(_ => Reg(UInt(STRIDE_BLK_ADDR_BITS.W)) )
136  val entries_stride = entry_map(_ => Reg(SInt((STRIDE_BLK_ADDR_BITS+1).W)))
137
138
139  val s0_match_vec = valids.zip(entries_pc).map({
140    case (v, pc) => v && pc === io.s0_lookup.bits.pc
141  })
142
143  val s0_hit = s0_valid && Cat(s0_match_vec).orR
144  val s0_miss = s0_valid && !s0_hit
145  val s0_matched_conf = Mux1H(s0_match_vec, entries_conf)
146  val s0_matched_last_addr = Mux1H(s0_match_vec, entries_last_addr)
147  val s0_matched_last_stride = Mux1H(s0_match_vec, entries_stride)
148
149  val s1_hit = GatedValidRegNext(s0_hit) && io.s1_valid
150  val s1_alloc = GatedValidRegNext(s0_miss) && io.s1_valid
151  val s1_vaddr = RegEnable(io.s0_lookup.bits.vaddr, s0_valid)
152  val s1_paddr = RegEnable(io.s0_lookup.bits.paddr, s0_valid)
153  val s1_conf = RegEnable(s0_matched_conf, s0_valid)
154  val s1_last_addr = RegEnable(s0_matched_last_addr, s0_valid)
155  val s1_last_stride = RegEnable(s0_matched_last_stride, s0_valid)
156  val s1_match_vec = RegEnable(VecInit(s0_match_vec), s0_valid)
157
158  val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes)
159  val s1_new_stride_vaddr = s1_vaddr(BLOCK_OFFSET + STRIDE_BLK_ADDR_BITS - 1, BLOCK_OFFSET)
160  val s1_new_stride = (0.U(1.W) ## s1_new_stride_vaddr).asSInt - (0.U(1.W) ## s1_last_addr).asSInt
161  val s1_stride_non_zero = s1_last_stride =/= 0.S
162  val s1_stride_match = s1_new_stride === s1_last_stride && s1_stride_non_zero
163  val s1_replace_idx = replacement.way
164
165  for(i <- 0 until smsParams.stride_entries){
166    val alloc = s1_alloc && i.U === s1_replace_idx
167    val update = s1_hit && s1_match_vec(i)
168    when(update){
169      assert(valids(i))
170      entries_conf(i) := Mux(s1_stride_match,
171        Mux(s1_conf === 3.U, 3.U, s1_conf + 1.U),
172        Mux(s1_conf === 0.U, 0.U, s1_conf - 1.U)
173      )
174      entries_last_addr(i) := s1_new_stride_vaddr
175      when(!s1_conf(1)){
176        entries_stride(i) := s1_new_stride
177      }
178    }
179    when(alloc){
180      valids(i) := true.B
181      entries_pc(i) := prev_pc
182      entries_conf(i) := 0.U
183      entries_last_addr(i) := s1_new_stride_vaddr
184      entries_stride(i) := 0.S
185    }
186    assert(!(update && alloc))
187  }
188  when(s1_hit){
189    replacement.access(OHToUInt(s1_match_vec.asUInt))
190  }.elsewhen(s1_alloc){
191    replacement.access(s1_replace_idx)
192  }
193
194  val s1_block_vaddr = block_addr(s1_vaddr)
195  val s1_pf_block_vaddr = (s1_block_vaddr.asSInt + s1_last_stride).asUInt
196  val s1_pf_cross_page = s1_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT) =/= s1_block_vaddr(BLOCK_ADDR_PAGE_BIT)
197
198  val s2_pf_gen_valid = GatedValidRegNext(s1_hit && s1_stride_match, false.B)
199  val s2_pf_gen_paddr_valid = RegEnable(!s1_pf_cross_page, s1_hit && s1_stride_match)
200  val s2_pf_block_vaddr = RegEnable(s1_pf_block_vaddr, s1_hit && s1_stride_match)
201  val s2_block_paddr = RegEnable(block_addr(s1_paddr), s1_hit && s1_stride_match)
202
203  val s2_pf_block_addr = Mux(s2_pf_gen_paddr_valid,
204    Cat(
205      s2_block_paddr(PAddrBits - BLOCK_OFFSET - 1, BLOCK_ADDR_PAGE_BIT),
206      s2_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT - 1, 0)
207    ),
208    s2_pf_block_vaddr
209  )
210  val s2_pf_full_addr = Wire(UInt(VAddrBits.W))
211  s2_pf_full_addr := s2_pf_block_addr ## 0.U(BLOCK_OFFSET.W)
212
213  val s2_pf_region_addr = region_addr(s2_pf_full_addr)
214  val s2_pf_region_offset = s2_pf_block_addr(REGION_OFFSET - 1, 0)
215
216  val s2_full_vaddr = Wire(UInt(VAddrBits.W))
217  s2_full_vaddr := s2_pf_block_vaddr ## 0.U(BLOCK_OFFSET.W)
218
219  val s2_region_tag = region_hash_tag(region_addr(s2_full_vaddr))
220
221  io.s2_gen_req.valid := s2_pf_gen_valid && io.stride_en
222  io.s2_gen_req.bits.region_tag := s2_region_tag
223  io.s2_gen_req.bits.region_addr := s2_pf_region_addr
224  io.s2_gen_req.bits.alias_bits := get_alias_bits(region_addr(s2_full_vaddr))
225  io.s2_gen_req.bits.region_bits := region_offset_to_bits(s2_pf_region_offset)
226  io.s2_gen_req.bits.paddr_valid := s2_pf_gen_paddr_valid
227  io.s2_gen_req.bits.decr_mode := false.B
228  io.s2_gen_req.bits.debug_source_type := HW_PREFETCH_STRIDE.U
229
230}
231
232class AGTEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
233  val pht_index = UInt(PHT_INDEX_BITS.W)
234  val pht_tag = UInt(PHT_TAG_BITS.W)
235  val region_bits = UInt(REGION_BLKS.W)
236  val region_tag = UInt(REGION_TAG_WIDTH.W)
237  val region_offset = UInt(REGION_OFFSET.W)
238  val access_cnt = UInt((REGION_BLKS-1).U.getWidth.W)
239  val decr_mode = Bool()
240}
241
242class PfGenReq()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
243  val region_tag = UInt(REGION_TAG_WIDTH.W)
244  val region_addr = UInt(REGION_ADDR_BITS.W)
245  val region_bits = UInt(REGION_BLKS.W)
246  val paddr_valid = Bool()
247  val decr_mode = Bool()
248  val alias_bits = UInt(2.W)
249  val debug_source_type = UInt(log2Up(nSourceType).W)
250}
251
252class AGTEvictReq()(implicit p: Parameters) extends XSBundle {
253  val vaddr = UInt(VAddrBits.W)
254}
255
256class ActiveGenerationTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
257  val io = IO(new Bundle() {
258    val agt_en = Input(Bool())
259    val s0_lookup = Flipped(ValidIO(new Bundle() {
260      val region_tag = UInt(REGION_TAG_WIDTH.W)
261      val region_p1_tag = UInt(REGION_TAG_WIDTH.W)
262      val region_m1_tag = UInt(REGION_TAG_WIDTH.W)
263      val region_offset = UInt(REGION_OFFSET.W)
264      val pht_index = UInt(PHT_INDEX_BITS.W)
265      val pht_tag = UInt(PHT_TAG_BITS.W)
266      val allow_cross_region_p1 = Bool()
267      val allow_cross_region_m1 = Bool()
268      val region_p1_cross_page = Bool()
269      val region_m1_cross_page = Bool()
270      val region_paddr = UInt(REGION_ADDR_BITS.W)
271      val region_vaddr = UInt(REGION_ADDR_BITS.W)
272    }))
273    // dcache has released a block, evict it from agt
274    val s0_dcache_evict = Flipped(DecoupledIO(new AGTEvictReq))
275    val s1_sel_stride = Output(Bool())
276    val s2_stride_hit = Input(Bool())
277    // if agt/stride missed, try lookup pht
278    val s2_pht_lookup = ValidIO(new PhtLookup())
279    // evict entry to pht
280    val s2_evict = ValidIO(new AGTEntry())
281    val s2_pf_gen_req = ValidIO(new PfGenReq())
282    val act_threshold = Input(UInt(REGION_OFFSET.W))
283    val act_stride = Input(UInt(6.W))
284  })
285
286  val entries = Seq.fill(smsParams.active_gen_table_size){ Reg(new AGTEntry()) }
287  val valids = Seq.fill(smsParams.active_gen_table_size){ RegInit(false.B) }
288  val replacement = ReplacementPolicy.fromString("plru", smsParams.active_gen_table_size)
289
290  val s1_replace_mask_w = Wire(UInt(smsParams.active_gen_table_size.W))
291
292  val s0_lookup = io.s0_lookup.bits
293  val s0_lookup_valid = io.s0_lookup.valid
294
295  val s0_dcache_evict = io.s0_dcache_evict.bits
296  val s0_dcache_evict_valid = io.s0_dcache_evict.valid
297  val s0_dcache_evict_tag = block_hash_tag(s0_dcache_evict.vaddr).head(REGION_TAG_WIDTH)
298
299  val prev_lookup = RegEnable(s0_lookup, s0_lookup_valid)
300  val prev_lookup_valid = GatedValidRegNext(s0_lookup_valid, false.B)
301
302  val s0_match_prev = prev_lookup_valid && s0_lookup.region_tag === prev_lookup.region_tag
303
304  def gen_match_vec(region_tag: UInt): Seq[Bool] = {
305    entries.zip(valids).map({
306      case (ent, v) => v && ent.region_tag === region_tag
307    })
308  }
309
310  val region_match_vec_s0 = gen_match_vec(s0_lookup.region_tag)
311  val region_p1_match_vec_s0 = gen_match_vec(s0_lookup.region_p1_tag)
312  val region_m1_match_vec_s0 = gen_match_vec(s0_lookup.region_m1_tag)
313
314  val any_region_match = Cat(region_match_vec_s0).orR
315  val any_region_p1_match = Cat(region_p1_match_vec_s0).orR && s0_lookup.allow_cross_region_p1
316  val any_region_m1_match = Cat(region_m1_match_vec_s0).orR && s0_lookup.allow_cross_region_m1
317
318  val region_match_vec_dcache_evict_s0 = gen_match_vec(s0_dcache_evict_tag)
319  val any_region_dcache_evict_match = Cat(region_match_vec_dcache_evict_s0).orR
320  // s0 dcache evict a entry that may be replaced in s1
321  val s0_dcache_evict_conflict = Cat(VecInit(region_match_vec_dcache_evict_s0).asUInt & s1_replace_mask_w).orR
322  val s0_do_dcache_evict = io.s0_dcache_evict.fire && any_region_dcache_evict_match
323
324  io.s0_dcache_evict.ready := !s0_lookup_valid && !s0_dcache_evict_conflict
325
326  val s0_region_hit = any_region_match
327  val s0_cross_region_hit = any_region_m1_match || any_region_p1_match
328  val s0_alloc = s0_lookup_valid && !s0_region_hit && !s0_match_prev
329  val s0_pf_gen_match_vec = valids.indices.map(i => {
330    Mux(any_region_match,
331      region_match_vec_s0(i),
332      Mux(any_region_m1_match,
333        region_m1_match_vec_s0(i), region_p1_match_vec_s0(i)
334      )
335    )
336  })
337  val s0_agt_entry = Wire(new AGTEntry())
338
339  s0_agt_entry.pht_index := s0_lookup.pht_index
340  s0_agt_entry.pht_tag := s0_lookup.pht_tag
341  s0_agt_entry.region_bits := region_offset_to_bits(s0_lookup.region_offset)
342  s0_agt_entry.region_tag := s0_lookup.region_tag
343  s0_agt_entry.region_offset := s0_lookup.region_offset
344  s0_agt_entry.access_cnt := 1.U
345  // lookup_region + 1 == entry_region
346  // lookup_region = entry_region - 1 => decr mode
347  s0_agt_entry.decr_mode := !s0_region_hit && !any_region_m1_match && any_region_p1_match
348  val s0_replace_way = replacement.way
349  val s0_replace_mask = UIntToOH(s0_replace_way)
350  // s0 hit a entry that may be replaced in s1
351  val s0_update_conflict = Cat(VecInit(region_match_vec_s0).asUInt & s1_replace_mask_w).orR
352  val s0_update = s0_lookup_valid && s0_region_hit && !s0_update_conflict
353
354  val s0_access_way = Mux1H(
355    Seq(s0_update, s0_alloc),
356    Seq(OHToUInt(region_match_vec_s0), s0_replace_way)
357  )
358  when(s0_update || s0_alloc) {
359    replacement.access(s0_access_way)
360  }
361
362  // stage1: update/alloc
363  // region hit, update entry
364  val s1_update = GatedValidRegNext(s0_update, false.B)
365  val s1_update_mask = RegEnable(VecInit(region_match_vec_s0), s0_lookup_valid)
366  val s1_agt_entry = RegEnable(s0_agt_entry, s0_lookup_valid)
367  val s1_cross_region_match = RegEnable(s0_cross_region_hit, s0_lookup_valid)
368  val s1_alloc = GatedValidRegNext(s0_alloc, false.B)
369  val s1_alloc_entry = s1_agt_entry
370  val s1_do_dcache_evict = GatedValidRegNext(s0_do_dcache_evict, false.B)
371  val s1_replace_mask = Mux(
372    s1_do_dcache_evict,
373    RegEnable(VecInit(region_match_vec_dcache_evict_s0).asUInt, s0_do_dcache_evict),
374    RegEnable(s0_replace_mask, s0_lookup_valid)
375  )
376  s1_replace_mask_w := s1_replace_mask & Fill(smsParams.active_gen_table_size, s1_alloc || s1_do_dcache_evict)
377  val s1_evict_entry = Mux1H(s1_replace_mask, entries)
378  val s1_evict_valid = Mux1H(s1_replace_mask, valids)
379  // pf gen
380  val s1_pf_gen_match_vec = RegEnable(VecInit(s0_pf_gen_match_vec), s0_lookup_valid)
381  val s1_region_paddr = RegEnable(s0_lookup.region_paddr, s0_lookup_valid)
382  val s1_region_vaddr = RegEnable(s0_lookup.region_vaddr, s0_lookup_valid)
383  val s1_region_offset = RegEnable(s0_lookup.region_offset, s0_lookup_valid)
384  for(i <- entries.indices){
385    val alloc = s1_replace_mask(i) && s1_alloc
386    val update = s1_update_mask(i) && s1_update
387    val update_entry = WireInit(entries(i))
388    update_entry.region_bits := entries(i).region_bits | s1_agt_entry.region_bits
389    update_entry.access_cnt := Mux(entries(i).access_cnt === (REGION_BLKS - 1).U,
390      entries(i).access_cnt,
391      entries(i).access_cnt + (s1_agt_entry.region_bits & (~entries(i).region_bits).asUInt).orR
392    )
393    valids(i) := valids(i) || alloc
394    entries(i) := Mux(alloc, s1_alloc_entry, Mux(update, update_entry, entries(i)))
395  }
396
397  when(s1_update){
398    assert(PopCount(s1_update_mask) === 1.U, "multi-agt-update")
399  }
400  when(s1_alloc){
401    assert(PopCount(s1_replace_mask) === 1.U, "multi-agt-alloc")
402  }
403
404  // pf_addr
405  // 1.hit => pf_addr = lookup_addr + (decr ? -1 : 1)
406  // 2.lookup region - 1 hit => lookup_addr + 1 (incr mode)
407  // 3.lookup region + 1 hit => lookup_addr - 1 (decr mode)
408  val s1_hited_entry_decr = Mux1H(s1_update_mask, entries.map(_.decr_mode))
409  val s1_pf_gen_decr_mode = Mux(s1_update,
410    s1_hited_entry_decr,
411    s1_agt_entry.decr_mode
412  )
413
414  val s1_pf_gen_vaddr_inc = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) + io.act_stride
415  val s1_pf_gen_vaddr_dec = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) - io.act_stride
416  val s1_vaddr_inc_cross_page = s1_pf_gen_vaddr_inc(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT)
417  val s1_vaddr_dec_cross_page = s1_pf_gen_vaddr_dec(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT)
418  val s1_vaddr_inc_cross_max_lim = s1_pf_gen_vaddr_inc.head(1).asBool
419  val s1_vaddr_dec_cross_max_lim = s1_pf_gen_vaddr_dec.head(1).asBool
420
421  //val s1_pf_gen_vaddr_p1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) + 1.U
422  //val s1_pf_gen_vaddr_m1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) - 1.U
423  val s1_pf_gen_vaddr = Cat(
424    s1_region_vaddr(REGION_ADDR_BITS - 1, REGION_TAG_WIDTH),
425    Mux(s1_pf_gen_decr_mode,
426      s1_pf_gen_vaddr_dec.tail(1).head(REGION_TAG_WIDTH),
427      s1_pf_gen_vaddr_inc.tail(1).head(REGION_TAG_WIDTH)
428    )
429  )
430  val s1_pf_gen_offset = Mux(s1_pf_gen_decr_mode,
431    s1_pf_gen_vaddr_dec(REGION_OFFSET - 1, 0),
432    s1_pf_gen_vaddr_inc(REGION_OFFSET - 1, 0)
433  )
434  val s1_pf_gen_offset_mask = UIntToOH(s1_pf_gen_offset)
435  val s1_pf_gen_access_cnt = Mux1H(s1_pf_gen_match_vec, entries.map(_.access_cnt))
436  val s1_in_active_page = s1_pf_gen_access_cnt > io.act_threshold
437  val s1_pf_gen_valid = prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && Mux(s1_pf_gen_decr_mode,
438    !s1_vaddr_dec_cross_max_lim,
439    !s1_vaddr_inc_cross_max_lim
440  ) && s1_in_active_page && io.agt_en
441  val s1_pf_gen_paddr_valid = Mux(s1_pf_gen_decr_mode, !s1_vaddr_dec_cross_page, !s1_vaddr_inc_cross_page)
442  val s1_pf_gen_region_addr = Mux(s1_pf_gen_paddr_valid,
443    Cat(s1_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), s1_pf_gen_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)),
444    s1_pf_gen_vaddr
445  )
446  val s1_pf_gen_region_tag = region_hash_tag(s1_pf_gen_vaddr)
447  val s1_pf_gen_incr_region_bits = VecInit((0 until REGION_BLKS).map(i => {
448    if(i == 0) true.B else !s1_pf_gen_offset_mask(i - 1, 0).orR
449  })).asUInt
450  val s1_pf_gen_decr_region_bits = VecInit((0 until REGION_BLKS).map(i => {
451    if(i == REGION_BLKS - 1) true.B
452    else !s1_pf_gen_offset_mask(REGION_BLKS - 1, i + 1).orR
453  })).asUInt
454  val s1_pf_gen_region_bits = Mux(s1_pf_gen_decr_mode,
455    s1_pf_gen_decr_region_bits,
456    s1_pf_gen_incr_region_bits
457  )
458  val s1_pht_lookup_valid = Wire(Bool())
459  val s1_pht_lookup = Wire(new PhtLookup())
460
461  s1_pht_lookup_valid := !s1_pf_gen_valid && prev_lookup_valid
462  s1_pht_lookup.pht_index := s1_agt_entry.pht_index
463  s1_pht_lookup.pht_tag := s1_agt_entry.pht_tag
464  s1_pht_lookup.region_vaddr := s1_region_vaddr
465  s1_pht_lookup.region_paddr := s1_region_paddr
466  s1_pht_lookup.region_offset := s1_region_offset
467
468  io.s1_sel_stride := prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && !s1_in_active_page
469
470  // stage2: gen pf reg / evict entry to pht
471  val s2_do_dcache_evict = GatedValidRegNext(s1_do_dcache_evict, false.B)
472  val s2_evict_entry = RegEnable(s1_evict_entry, (s1_alloc || s1_do_dcache_evict) && s1_evict_valid)
473  val s2_evict_valid = GatedValidRegNext((s1_alloc || s1_do_dcache_evict) && s1_evict_valid, false.B)
474  val s2_paddr_valid = RegEnable(s1_pf_gen_paddr_valid, s1_pf_gen_valid)
475  val s2_pf_gen_region_tag = RegEnable(s1_pf_gen_region_tag, s1_pf_gen_valid)
476  val s2_pf_gen_decr_mode = RegEnable(s1_pf_gen_decr_mode, s1_pf_gen_valid)
477  val s2_pf_gen_region_paddr = RegEnable(s1_pf_gen_region_addr, s1_pf_gen_valid)
478  val s2_pf_gen_alias_bits = RegEnable(get_alias_bits(s1_pf_gen_vaddr), s1_pf_gen_valid)
479  val s2_pf_gen_region_bits = RegEnable(s1_pf_gen_region_bits, s1_pf_gen_valid)
480  val s2_pf_gen_valid = GatedValidRegNext(s1_pf_gen_valid, false.B)
481  val s2_pht_lookup_valid = GatedValidRegNext(s1_pht_lookup_valid, false.B) && !io.s2_stride_hit
482  val s2_pht_lookup = RegEnable(s1_pht_lookup, s1_pht_lookup_valid)
483
484  io.s2_evict.valid := s2_evict_valid && (s2_evict_entry.access_cnt > 1.U)
485  io.s2_evict.bits := s2_evict_entry
486
487  io.s2_pf_gen_req.bits.region_tag := s2_pf_gen_region_tag
488  io.s2_pf_gen_req.bits.region_addr := s2_pf_gen_region_paddr
489  io.s2_pf_gen_req.bits.alias_bits := s2_pf_gen_alias_bits
490  io.s2_pf_gen_req.bits.region_bits := s2_pf_gen_region_bits
491  io.s2_pf_gen_req.bits.paddr_valid := s2_paddr_valid
492  io.s2_pf_gen_req.bits.decr_mode := s2_pf_gen_decr_mode
493  io.s2_pf_gen_req.valid := false.B
494  io.s2_pf_gen_req.bits.debug_source_type := HW_PREFETCH_AGT.U
495
496  io.s2_pht_lookup.valid := s2_pht_lookup_valid
497  io.s2_pht_lookup.bits := s2_pht_lookup
498
499  XSPerfAccumulate("sms_agt_in", io.s0_lookup.valid)
500  XSPerfAccumulate("sms_agt_alloc", s1_alloc) // cross region match or filter evict
501  XSPerfAccumulate("sms_agt_update", s1_update) // entry hit
502  XSPerfAccumulate("sms_agt_pf_gen", io.s2_pf_gen_req.valid)
503  XSPerfAccumulate("sms_agt_pf_gen_paddr_valid",
504    io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.paddr_valid
505  )
506  XSPerfAccumulate("sms_agt_pf_gen_decr_mode",
507    io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.decr_mode
508  )
509  for(i <- 0 until smsParams.active_gen_table_size){
510    XSPerfAccumulate(s"sms_agt_access_entry_$i",
511      s1_alloc && s1_replace_mask(i) || s1_update && s1_update_mask(i)
512    )
513  }
514  XSPerfAccumulate("sms_agt_evict", s2_evict_valid)
515  XSPerfAccumulate("sms_agt_evict_by_plru", s2_evict_valid && !s2_do_dcache_evict)
516  XSPerfAccumulate("sms_agt_evict_by_dcache", s2_evict_valid && s2_do_dcache_evict)
517  XSPerfAccumulate("sms_agt_evict_one_hot_pattern", s2_evict_valid && (s2_evict_entry.access_cnt === 1.U))
518}
519
520class PhtLookup()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
521  val pht_index = UInt(PHT_INDEX_BITS.W)
522  val pht_tag = UInt(PHT_TAG_BITS.W)
523  val region_paddr = UInt(REGION_ADDR_BITS.W)
524  val region_vaddr = UInt(REGION_ADDR_BITS.W)
525  val region_offset = UInt(REGION_OFFSET.W)
526}
527
528class PhtEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
529  val hist = Vec(2 * (REGION_BLKS - 1), UInt(PHT_HIST_BITS.W))
530  val tag = UInt(PHT_TAG_BITS.W)
531  val decr_mode = Bool()
532}
533
534class PatternHistoryTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
535  val io = IO(new Bundle() {
536    // receive agt evicted entry
537    val agt_update = Flipped(ValidIO(new AGTEntry()))
538    // at stage2, if we know agt missed, lookup pht
539    val s2_agt_lookup = Flipped(ValidIO(new PhtLookup()))
540    // pht-generated prefetch req
541    val pf_gen_req = ValidIO(new PfGenReq())
542  })
543
544  val pht_ram = Module(new SRAMTemplate[PhtEntry](new PhtEntry,
545    set = smsParams.pht_size / smsParams.pht_ways,
546    way =smsParams.pht_ways,
547    singlePort = true
548  ))
549  def PHT_SETS = smsParams.pht_size / smsParams.pht_ways
550  // clockgated on pht_valids
551  val pht_valids_reg = RegInit(VecInit(Seq.fill(smsParams.pht_ways){
552    VecInit(Seq.fill(PHT_SETS){false.B})
553  }))
554  val pht_valids_enable = WireInit(VecInit(Seq.fill(PHT_SETS) {false.B}))
555  val pht_valids_next = WireInit(pht_valids_reg)
556  for(j <- 0 until PHT_SETS){
557    when(pht_valids_enable(j)){
558      (0 until smsParams.pht_ways).foreach(i => pht_valids_reg(i)(j) := pht_valids_next(i)(j))
559    }
560  }
561
562  val replacement = Seq.fill(PHT_SETS) { ReplacementPolicy.fromString("plru", smsParams.pht_ways) }
563
564  val lookup_queue = Module(new OverrideableQueue(new PhtLookup, smsParams.pht_lookup_queue_size))
565  lookup_queue.io.in := io.s2_agt_lookup
566  val lookup = lookup_queue.io.out
567
568  val evict_queue = Module(new OverrideableQueue(new AGTEntry, smsParams.pht_lookup_queue_size))
569  evict_queue.io.in := io.agt_update
570  val evict = evict_queue.io.out
571
572  XSPerfAccumulate("sms_pht_lookup_in", lookup_queue.io.in.fire)
573  XSPerfAccumulate("sms_pht_lookup_out", lookup_queue.io.out.fire)
574  XSPerfAccumulate("sms_pht_evict_in", evict_queue.io.in.fire)
575  XSPerfAccumulate("sms_pht_evict_out", evict_queue.io.out.fire)
576
577  val s3_ram_en = Wire(Bool())
578  val s1_valid = Wire(Bool())
579  // if s1.raddr == s2.waddr or s3 is using ram port, block s1
580  val s1_wait = Wire(Bool())
581  // pipe s0: select an op from [lookup, update], generate ram read addr
582  val s0_valid = lookup.valid || evict.valid
583
584  evict.ready := !s1_valid || !s1_wait
585  lookup.ready := evict.ready && !evict.valid
586
587  val s0_ram_raddr = Mux(evict.valid,
588    evict.bits.pht_index,
589    lookup.bits.pht_index
590  )
591  val s0_tag = Mux(evict.valid, evict.bits.pht_tag, lookup.bits.pht_tag)
592  val s0_region_offset = Mux(evict.valid, evict.bits.region_offset, lookup.bits.region_offset)
593  val s0_region_paddr = lookup.bits.region_paddr
594  val s0_region_vaddr = lookup.bits.region_vaddr
595  val s0_region_bits = evict.bits.region_bits
596  val s0_decr_mode = evict.bits.decr_mode
597  val s0_evict = evict.valid
598
599  // pipe s1: send addr to ram
600  val s1_valid_r = RegInit(false.B)
601  s1_valid_r := Mux(s1_valid && s1_wait, true.B, s0_valid)
602  s1_valid := s1_valid_r
603  val s1_reg_en = s0_valid && (!s1_wait || !s1_valid)
604  val s1_ram_raddr = RegEnable(s0_ram_raddr, s1_reg_en)
605  val s1_tag = RegEnable(s0_tag, s1_reg_en)
606  val s1_region_bits = RegEnable(s0_region_bits, s1_reg_en)
607  val s1_decr_mode = RegEnable(s0_decr_mode, s1_reg_en)
608  val s1_region_paddr = RegEnable(s0_region_paddr, s1_reg_en)
609  val s1_region_vaddr = RegEnable(s0_region_vaddr, s1_reg_en)
610  val s1_region_offset = RegEnable(s0_region_offset, s1_reg_en)
611  val s1_pht_valids = pht_valids_reg.map(way => Mux1H(
612    (0 until PHT_SETS).map(i => i.U === s1_ram_raddr),
613    way
614  ))
615  val s1_evict = RegEnable(s0_evict, s1_reg_en)
616  val s1_replace_way = Mux1H(
617    (0 until PHT_SETS).map(i => i.U === s1_ram_raddr),
618    replacement.map(_.way)
619  )
620  val s1_hist_update_mask = Cat(
621    Fill(REGION_BLKS - 1, true.B), 0.U((REGION_BLKS - 1).W)
622  ) >> s1_region_offset
623  val s1_hist_bits = Cat(
624    s1_region_bits.head(REGION_BLKS - 1) >> s1_region_offset,
625    (Cat(
626      s1_region_bits.tail(1), 0.U((REGION_BLKS - 1).W)
627    ) >> s1_region_offset)(REGION_BLKS - 2, 0)
628  )
629
630  // pipe s2: generate ram write addr/data
631  val s2_valid = GatedValidRegNext(s1_valid && !s1_wait, false.B)
632  val s2_reg_en = s1_valid && !s1_wait
633  val s2_hist_update_mask = RegEnable(s1_hist_update_mask, s2_reg_en)
634  val s2_hist_bits = RegEnable(s1_hist_bits, s2_reg_en)
635  val s2_tag = RegEnable(s1_tag, s2_reg_en)
636  val s2_region_bits = RegEnable(s1_region_bits, s2_reg_en)
637  val s2_decr_mode = RegEnable(s1_decr_mode, s2_reg_en)
638  val s2_region_paddr = RegEnable(s1_region_paddr, s2_reg_en)
639  val s2_region_vaddr = RegEnable(s1_region_vaddr, s2_reg_en)
640  val s2_region_offset = RegEnable(s1_region_offset, s2_reg_en)
641  val s2_region_offset_mask = region_offset_to_bits(s2_region_offset)
642  val s2_evict = RegEnable(s1_evict, s2_reg_en)
643  val s2_pht_valids = s1_pht_valids.map(v => RegEnable(v, s2_reg_en))
644  val s2_replace_way = RegEnable(s1_replace_way, s2_reg_en)
645  val s2_ram_waddr = RegEnable(s1_ram_raddr, s2_reg_en)
646  val s2_ram_rdata = pht_ram.io.r.resp.data
647  val s2_ram_rtags = s2_ram_rdata.map(_.tag)
648  val s2_tag_match_vec = s2_ram_rtags.map(t => t === s2_tag)
649  val s2_hit_vec = s2_tag_match_vec.zip(s2_pht_valids).map({
650    case (tag_match, v) => v && tag_match
651  })
652  val s2_hist_update = s2_ram_rdata.map(way => VecInit(way.hist.zipWithIndex.map({
653    case (h, i) =>
654      val do_update = s2_hist_update_mask(i)
655      val hist_updated = Mux(s2_hist_bits(i),
656        Mux(h.andR, h, h + 1.U),
657        Mux(h === 0.U, 0.U, h - 1.U)
658      )
659      Mux(do_update, hist_updated, h)
660  })))
661  val s2_hist_pf_gen = Mux1H(s2_hit_vec, s2_ram_rdata.map(way => VecInit(way.hist.map(_.head(1))).asUInt))
662  val s2_new_hist = VecInit(s2_hist_bits.asBools.map(b => Cat(0.U((PHT_HIST_BITS - 1).W), b)))
663  val s2_pht_hit = Cat(s2_hit_vec).orR
664  val s2_hist = Mux(s2_pht_hit, Mux1H(s2_hit_vec, s2_hist_update), s2_new_hist)
665  val s2_repl_way_mask = UIntToOH(s2_replace_way)
666  val s2_incr_region_vaddr = s2_region_vaddr + 1.U
667  val s2_decr_region_vaddr = s2_region_vaddr - 1.U
668
669  // pipe s3: send addr/data to ram, gen pf_req
670  val s3_valid = GatedValidRegNext(s2_valid, false.B)
671  val s3_evict = RegEnable(s2_evict, s2_valid)
672  val s3_hist = RegEnable(s2_hist, s2_valid)
673  val s3_hist_pf_gen = RegEnable(s2_hist_pf_gen, s2_valid)
674  val s3_hist_update_mask = RegEnable(s2_hist_update_mask.asUInt, s2_valid)
675  val s3_region_offset = RegEnable(s2_region_offset, s2_valid)
676  val s3_region_offset_mask = RegEnable(s2_region_offset_mask, s2_valid)
677  val s3_decr_mode = RegEnable(s2_decr_mode, s2_valid)
678  val s3_region_paddr = RegEnable(s2_region_paddr, s2_valid)
679  val s3_region_vaddr = RegEnable(s2_region_vaddr, s2_valid)
680  val s3_pht_tag = RegEnable(s2_tag, s2_valid)
681  val s3_hit_vec = s2_hit_vec.map(h => RegEnable(h, s2_valid))
682  val s3_hit = Cat(s3_hit_vec).orR
683  val s3_hit_way = OHToUInt(s3_hit_vec)
684  val s3_repl_way = RegEnable(s2_replace_way, s2_valid)
685  val s3_repl_way_mask = RegEnable(s2_repl_way_mask, s2_valid)
686  val s3_repl_update_mask = RegEnable(VecInit((0 until PHT_SETS).map(i => i.U === s2_ram_waddr)), s2_valid)
687  val s3_ram_waddr = RegEnable(s2_ram_waddr, s2_valid)
688  val s3_incr_region_vaddr = RegEnable(s2_incr_region_vaddr, s2_valid)
689  val s3_decr_region_vaddr = RegEnable(s2_decr_region_vaddr, s2_valid)
690  s3_ram_en := s3_valid && s3_evict
691  val s3_ram_wdata = Wire(new PhtEntry())
692  s3_ram_wdata.hist := s3_hist
693  s3_ram_wdata.tag := s3_pht_tag
694  s3_ram_wdata.decr_mode := s3_decr_mode
695
696  s1_wait := (s2_valid && s2_evict && s2_ram_waddr === s1_ram_raddr) || s3_ram_en
697
698  for((valids, way_idx) <- pht_valids_next.zipWithIndex){
699    val update_way = s3_repl_way_mask(way_idx)
700    for((v, set_idx) <- valids.zipWithIndex){
701      val update_set = s3_repl_update_mask(set_idx)
702      when(s3_valid && s3_evict && !s3_hit && update_set && update_way){
703        pht_valids_enable(set_idx) := true.B
704        v := true.B
705      }
706    }
707  }
708  for((r, i) <- replacement.zipWithIndex){
709    when(s3_valid && s3_repl_update_mask(i)){
710      when(s3_hit){
711        r.access(s3_hit_way)
712      }.elsewhen(s3_evict){
713        r.access(s3_repl_way)
714      }
715    }
716  }
717
718  val s3_way_mask = Mux(s3_hit,
719    VecInit(s3_hit_vec).asUInt,
720    s3_repl_way_mask,
721  ).asUInt
722
723  pht_ram.io.r(
724    s1_valid, s1_ram_raddr
725  )
726  pht_ram.io.w(
727    s3_ram_en, s3_ram_wdata, s3_ram_waddr, s3_way_mask
728  )
729
730  when(s3_valid && s3_hit){
731    assert(!Cat(s3_hit_vec).andR, "sms_pht: multi-hit!")
732  }
733
734  // generate pf req if hit
735  val s3_hist_hi = s3_hist_pf_gen.head(REGION_BLKS - 1)
736  val s3_hist_lo = s3_hist_pf_gen.tail(REGION_BLKS - 1)
737  val s3_hist_hi_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_hi) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0)
738  val s3_hist_lo_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_lo) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0)
739  val s3_cur_region_bits = Cat(s3_hist_hi_shifted.tail(REGION_BLKS - 1), 0.U(1.W)) |
740    Cat(0.U(1.W), s3_hist_lo_shifted.head(REGION_BLKS - 1))
741  val s3_incr_region_bits = Cat(0.U(1.W), s3_hist_hi_shifted.head(REGION_BLKS - 1))
742  val s3_decr_region_bits = Cat(s3_hist_lo_shifted.tail(REGION_BLKS - 1), 0.U(1.W))
743  val s3_pf_gen_valid = s3_valid && s3_hit && !s3_evict
744  val s3_cur_region_valid =  s3_pf_gen_valid && (s3_hist_pf_gen & s3_hist_update_mask).orR
745  val s3_incr_region_valid = s3_pf_gen_valid && (s3_hist_hi & (~s3_hist_update_mask.head(REGION_BLKS - 1)).asUInt).orR
746  val s3_decr_region_valid = s3_pf_gen_valid && (s3_hist_lo & (~s3_hist_update_mask.tail(REGION_BLKS - 1)).asUInt).orR
747  val s3_incr_alias_bits = get_alias_bits(s3_incr_region_vaddr)
748  val s3_decr_alias_bits = get_alias_bits(s3_decr_region_vaddr)
749  val s3_incr_region_paddr = Cat(
750    s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT),
751    s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)
752  )
753  val s3_decr_region_paddr = Cat(
754    s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT),
755    s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)
756  )
757  val s3_incr_crosspage = s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT)
758  val s3_decr_crosspage = s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT)
759  val s3_cur_region_tag = region_hash_tag(s3_region_vaddr)
760  val s3_incr_region_tag = region_hash_tag(s3_incr_region_vaddr)
761  val s3_decr_region_tag = region_hash_tag(s3_decr_region_vaddr)
762
763  val pf_gen_req_arb = Module(new Arbiter(new PfGenReq, 3))
764  val s4_pf_gen_cur_region_valid = RegInit(false.B)
765  val s4_pf_gen_cur_region = Reg(new PfGenReq)
766  val s4_pf_gen_incr_region_valid = RegInit(false.B)
767  val s4_pf_gen_incr_region = Reg(new PfGenReq)
768  val s4_pf_gen_decr_region_valid = RegInit(false.B)
769  val s4_pf_gen_decr_region = Reg(new PfGenReq)
770
771  s4_pf_gen_cur_region_valid := s3_cur_region_valid
772  when(s3_cur_region_valid){
773    s4_pf_gen_cur_region.region_addr := s3_region_paddr
774    s4_pf_gen_cur_region.alias_bits := get_alias_bits(s3_region_vaddr)
775    s4_pf_gen_cur_region.region_tag := s3_cur_region_tag
776    s4_pf_gen_cur_region.region_bits := s3_cur_region_bits
777    s4_pf_gen_cur_region.paddr_valid := true.B
778    s4_pf_gen_cur_region.decr_mode := false.B
779  }
780  s4_pf_gen_incr_region_valid := s3_incr_region_valid ||
781    (!pf_gen_req_arb.io.in(1).ready && s4_pf_gen_incr_region_valid)
782  when(s3_incr_region_valid){
783    s4_pf_gen_incr_region.region_addr := Mux(s3_incr_crosspage, s3_incr_region_vaddr, s3_incr_region_paddr)
784    s4_pf_gen_incr_region.alias_bits := s3_incr_alias_bits
785    s4_pf_gen_incr_region.region_tag := s3_incr_region_tag
786    s4_pf_gen_incr_region.region_bits := s3_incr_region_bits
787    s4_pf_gen_incr_region.paddr_valid := !s3_incr_crosspage
788    s4_pf_gen_incr_region.decr_mode := false.B
789  }
790  s4_pf_gen_decr_region_valid := s3_decr_region_valid ||
791    (!pf_gen_req_arb.io.in(2).ready && s4_pf_gen_decr_region_valid)
792  when(s3_decr_region_valid){
793    s4_pf_gen_decr_region.region_addr := Mux(s3_decr_crosspage, s3_decr_region_vaddr, s3_decr_region_paddr)
794    s4_pf_gen_decr_region.alias_bits := s3_decr_alias_bits
795    s4_pf_gen_decr_region.region_tag := s3_decr_region_tag
796    s4_pf_gen_decr_region.region_bits := s3_decr_region_bits
797    s4_pf_gen_decr_region.paddr_valid := !s3_decr_crosspage
798    s4_pf_gen_decr_region.decr_mode := true.B
799  }
800
801  pf_gen_req_arb.io.in.head.valid := s4_pf_gen_cur_region_valid
802  pf_gen_req_arb.io.in.head.bits := s4_pf_gen_cur_region
803  pf_gen_req_arb.io.in.head.bits.debug_source_type := HW_PREFETCH_PHT_CUR.U
804  pf_gen_req_arb.io.in(1).valid := s4_pf_gen_incr_region_valid
805  pf_gen_req_arb.io.in(1).bits := s4_pf_gen_incr_region
806  pf_gen_req_arb.io.in(1).bits.debug_source_type := HW_PREFETCH_PHT_INC.U
807  pf_gen_req_arb.io.in(2).valid := s4_pf_gen_decr_region_valid
808  pf_gen_req_arb.io.in(2).bits := s4_pf_gen_decr_region
809  pf_gen_req_arb.io.in(2).bits.debug_source_type := HW_PREFETCH_PHT_DEC.U
810  pf_gen_req_arb.io.out.ready := true.B
811
812  io.pf_gen_req.valid := pf_gen_req_arb.io.out.valid
813  io.pf_gen_req.bits := pf_gen_req_arb.io.out.bits
814
815  XSPerfAccumulate("sms_pht_update", io.agt_update.valid)
816  XSPerfAccumulate("sms_pht_update_hit", s2_valid && s2_evict && s2_pht_hit)
817  XSPerfAccumulate("sms_pht_lookup", io.s2_agt_lookup.valid)
818  XSPerfAccumulate("sms_pht_lookup_hit", s2_valid && !s2_evict && s2_pht_hit)
819  for(i <- 0 until smsParams.pht_ways){
820    XSPerfAccumulate(s"sms_pht_write_way_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.waymask.get(i))
821  }
822  for(i <- 0 until PHT_SETS){
823    XSPerfAccumulate(s"sms_pht_write_set_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.setIdx === i.U)
824  }
825  XSPerfAccumulate(s"sms_pht_pf_gen", io.pf_gen_req.valid)
826}
827
828class PrefetchFilterEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
829  val region_tag = UInt(REGION_TAG_WIDTH.W)
830  val region_addr = UInt(REGION_ADDR_BITS.W)
831  val region_bits = UInt(REGION_BLKS.W)
832  val filter_bits = UInt(REGION_BLKS.W)
833  val alias_bits = UInt(2.W)
834  val paddr_valid = Bool()
835  val decr_mode = Bool()
836  val debug_source_type = UInt(log2Up(nSourceType).W)
837}
838
839class PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
840  val io = IO(new Bundle() {
841    val gen_req = Flipped(ValidIO(new PfGenReq()))
842    val tlb_req = new TlbRequestIO(2)
843    val l2_pf_addr = ValidIO(UInt(PAddrBits.W))
844    val pf_alias_bits = Output(UInt(2.W))
845    val debug_source_type = Output(UInt(log2Up(nSourceType).W))
846  })
847  val entries = Seq.fill(smsParams.pf_filter_size){ Reg(new PrefetchFilterEntry()) }
848  val valids = Seq.fill(smsParams.pf_filter_size){ RegInit(false.B) }
849  val replacement = ReplacementPolicy.fromString("plru", smsParams.pf_filter_size)
850
851  val prev_valid = GatedValidRegNext(io.gen_req.valid, false.B)
852  val prev_gen_req = RegEnable(io.gen_req.bits, io.gen_req.valid)
853
854  val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, smsParams.pf_filter_size))
855  val pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), smsParams.pf_filter_size))
856
857  io.l2_pf_addr.valid := pf_req_arb.io.out.valid
858  io.l2_pf_addr.bits := pf_req_arb.io.out.bits
859  io.pf_alias_bits := Mux1H(entries.zipWithIndex.map({
860    case (entry, i) => (i.U === pf_req_arb.io.chosen) -> entry.alias_bits
861  }))
862  pf_req_arb.io.out.ready := true.B
863
864  io.debug_source_type := VecInit(entries.map(_.debug_source_type))(pf_req_arb.io.chosen)
865
866  val s1_valid = Wire(Bool())
867  val s1_hit = Wire(Bool())
868  val s1_replace_vec = Wire(UInt(smsParams.pf_filter_size.W))
869  val s1_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W))
870  val s2_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W))
871
872  // s0: entries lookup
873  val s0_gen_req = io.gen_req.bits
874  val s0_match_prev = prev_valid && (s0_gen_req.region_tag === prev_gen_req.region_tag)
875  val s0_gen_req_valid = io.gen_req.valid && !s0_match_prev
876  val s0_match_vec = valids.indices.map(i => {
877    valids(i) && entries(i).region_tag === s0_gen_req.region_tag && !(s1_valid && !s1_hit && s1_replace_vec(i))
878  })
879  val s0_any_matched = Cat(s0_match_vec).orR
880  val s0_replace_vec = UIntToOH(replacement.way)
881  val s0_hit = s0_gen_req_valid && s0_any_matched
882
883  for(((v, ent), i) <- valids.zip(entries).zipWithIndex){
884    val is_evicted = s1_valid && s1_replace_vec(i)
885    tlb_req_arb.io.in(i).valid := v && !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !ent.paddr_valid && !is_evicted
886    tlb_req_arb.io.in(i).bits.vaddr := Cat(ent.region_addr, 0.U(log2Up(REGION_SIZE).W))
887    tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read
888    tlb_req_arb.io.in(i).bits.size := 3.U
889    tlb_req_arb.io.in(i).bits.kill := false.B
890    tlb_req_arb.io.in(i).bits.no_translate := false.B
891    tlb_req_arb.io.in(i).bits.memidx := DontCare
892    tlb_req_arb.io.in(i).bits.debug := DontCare
893    tlb_req_arb.io.in(i).bits.hlvx := DontCare
894    tlb_req_arb.io.in(i).bits.hyperinst := DontCare
895
896    val pending_req_vec = ent.region_bits & (~ent.filter_bits).asUInt
897    val first_one_offset = PriorityMux(
898      pending_req_vec.asBools,
899      (0 until smsParams.pf_filter_size).map(_.U(REGION_OFFSET.W))
900    )
901    val last_one_offset = PriorityMux(
902      pending_req_vec.asBools.reverse,
903      (0 until smsParams.pf_filter_size).reverse.map(_.U(REGION_OFFSET.W))
904    )
905    val pf_addr = Cat(
906      ent.region_addr,
907      Mux(ent.decr_mode, last_one_offset, first_one_offset),
908      0.U(log2Up(dcacheParameters.blockBytes).W)
909    )
910    pf_req_arb.io.in(i).valid := v && Cat(pending_req_vec).orR && ent.paddr_valid && !is_evicted
911    pf_req_arb.io.in(i).bits := pf_addr
912  }
913
914  val s0_tlb_fire_vec = VecInit(tlb_req_arb.io.in.map(_.fire))
915  val s0_pf_fire_vec = VecInit(pf_req_arb.io.in.map(_.fire))
916
917  val s0_update_way = OHToUInt(s0_match_vec)
918  val s0_replace_way = replacement.way
919  val s0_access_way = Mux(s0_any_matched, s0_update_way, s0_replace_way)
920  when(s0_gen_req_valid){
921    replacement.access(s0_access_way)
922  }
923
924  // s1: update or alloc
925  val s1_valid_r = GatedValidRegNext(s0_gen_req_valid, false.B)
926  val s1_hit_r = RegEnable(s0_hit, false.B, s0_gen_req_valid)
927  val s1_gen_req = RegEnable(s0_gen_req, s0_gen_req_valid)
928  val s1_replace_vec_r = RegEnable(s0_replace_vec, s0_gen_req_valid && !s0_hit)
929  val s1_update_vec = RegEnable(VecInit(s0_match_vec).asUInt, s0_gen_req_valid && s0_hit)
930  val s1_tlb_fire_vec_r = GatedValidRegNext(s0_tlb_fire_vec)
931  // tlb req will latch one cycle after tlb_arb
932  val s1_tlb_req_valid = GatedValidRegNext(tlb_req_arb.io.out.fire)
933  val s1_tlb_req_bits  = RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.fire)
934  val s1_alloc_entry = Wire(new PrefetchFilterEntry())
935  s1_valid := s1_valid_r
936  s1_hit := s1_hit_r
937  s1_replace_vec := s1_replace_vec_r
938  s1_tlb_fire_vec := s1_tlb_fire_vec_r.asUInt
939  s1_alloc_entry.region_tag := s1_gen_req.region_tag
940  s1_alloc_entry.region_addr := s1_gen_req.region_addr
941  s1_alloc_entry.region_bits := s1_gen_req.region_bits
942  s1_alloc_entry.paddr_valid := s1_gen_req.paddr_valid
943  s1_alloc_entry.decr_mode := s1_gen_req.decr_mode
944  s1_alloc_entry.filter_bits := 0.U
945  s1_alloc_entry.alias_bits := s1_gen_req.alias_bits
946  s1_alloc_entry.debug_source_type := s1_gen_req.debug_source_type
947  io.tlb_req.req.valid := s1_tlb_req_valid && !((s1_tlb_fire_vec & s1_replace_vec).orR && s1_valid && !s1_hit)
948  io.tlb_req.req.bits := s1_tlb_req_bits
949  io.tlb_req.resp.ready := true.B
950  io.tlb_req.req_kill := false.B
951  tlb_req_arb.io.out.ready := true.B
952
953  val s2_tlb_fire_vec_r = GatedValidRegNext(s1_tlb_fire_vec_r)
954  s2_tlb_fire_vec := s2_tlb_fire_vec_r.asUInt
955
956  for(((v, ent), i) <- valids.zip(entries).zipWithIndex){
957    val alloc = s1_valid && !s1_hit && s1_replace_vec(i)
958    val update = s1_valid && s1_hit && s1_update_vec(i)
959    // for pf: use s0 data
960    val pf_fired = s0_pf_fire_vec(i)
961    val tlb_fired = s2_tlb_fire_vec(i) && !io.tlb_req.resp.bits.miss && io.tlb_req.resp.fire
962    when(tlb_fired){
963      ent.paddr_valid := !io.tlb_req.resp.bits.miss
964      ent.region_addr := region_addr(io.tlb_req.resp.bits.paddr.head)
965    }
966    when(update){
967      ent.region_bits := ent.region_bits | s1_gen_req.region_bits
968    }
969    when(pf_fired){
970      val curr_bit = UIntToOH(block_addr(pf_req_arb.io.in(i).bits)(REGION_OFFSET - 1, 0))
971      ent.filter_bits := ent.filter_bits | curr_bit
972    }
973    when(alloc){
974      ent := s1_alloc_entry
975      v := true.B
976    }
977  }
978  when(s1_valid && s1_hit){
979    assert(PopCount(s1_update_vec) === 1.U, "sms_pf_filter: multi-hit")
980  }
981  assert(!io.tlb_req.resp.fire || Cat(s2_tlb_fire_vec).orR, "sms_pf_filter: tlb resp fires, but no tlb req from tlb_req_arb 2 cycles ago")
982
983  XSPerfAccumulate("sms_pf_filter_recv_req", io.gen_req.valid)
984  XSPerfAccumulate("sms_pf_filter_hit", s1_valid && s1_hit)
985  XSPerfAccumulate("sms_pf_filter_tlb_req", io.tlb_req.req.fire)
986  XSPerfAccumulate("sms_pf_filter_tlb_resp_miss", io.tlb_req.resp.fire && io.tlb_req.resp.bits.miss)
987  for(i <- 0 until smsParams.pf_filter_size){
988    XSPerfAccumulate(s"sms_pf_filter_access_way_$i", s0_gen_req_valid && s0_access_way === i.U)
989  }
990  XSPerfAccumulate("sms_pf_filter_l2_req", io.l2_pf_addr.valid)
991}
992
993class SMSTrainFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper with HasTrainFilterHelper {
994  val io = IO(new Bundle() {
995    // train input
996    // hybrid load store
997    val ld_in = Flipped(Vec(backendParams.LdExuCnt, ValidIO(new LdPrefetchTrainBundle())))
998    val st_in = Flipped(Vec(backendParams.StaExuCnt, ValidIO(new StPrefetchTrainBundle())))
999    // filter out
1000    val train_req = ValidIO(new PrefetchReqBundle())
1001  })
1002
1003  class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr](
1004    p => smsParams.train_filter_size
1005  ){
1006  }
1007
1008  object Ptr {
1009    def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = {
1010      val ptr = Wire(new Ptr)
1011      ptr.flag := f
1012      ptr.value := v
1013      ptr
1014    }
1015  }
1016
1017  val entries = RegInit(VecInit(Seq.fill(smsParams.train_filter_size){ (0.U.asTypeOf(new PrefetchReqBundle())) }))
1018  val valids = RegInit(VecInit(Seq.fill(smsParams.train_filter_size){ (false.B) }))
1019
1020  val enqLen = backendParams.LduCnt + backendParams.StaCnt
1021  val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr))))
1022  val deqPtrExt = RegInit(0.U.asTypeOf(new Ptr))
1023
1024  val deqPtr = WireInit(deqPtrExt.value)
1025
1026  require(smsParams.train_filter_size >= enqLen)
1027
1028  val ld_reorder = reorder(io.ld_in)
1029  val st_reorder = reorder(io.st_in)
1030  val reqs_ls = ld_reorder.map(_.bits.asPrefetchReqBundle()) ++ st_reorder.map(_.bits.asPrefetchReqBundle())
1031  val reqs_vls = ld_reorder.map(_.valid) ++ st_reorder.map(_.valid)
1032  val needAlloc = Wire(Vec(enqLen, Bool()))
1033  val canAlloc = Wire(Vec(enqLen, Bool()))
1034
1035  for(i <- (0 until enqLen)) {
1036    val req = reqs_ls(i)
1037    val req_v = reqs_vls(i)
1038    val index = PopCount(needAlloc.take(i))
1039    val allocPtr = enqPtrExt(index)
1040    val entry_match = Cat(entries.zip(valids).map {
1041      case(e, v) => v && block_hash_tag(e.vaddr) === block_hash_tag(req.vaddr)
1042    }).orR
1043    val prev_enq_match = if(i == 0) false.B else Cat(reqs_ls.zip(reqs_vls).take(i).map {
1044      case(pre, pre_v) => pre_v && block_hash_tag(pre.vaddr) === block_hash_tag(req.vaddr)
1045    }).orR
1046
1047    needAlloc(i) := req_v && !entry_match && !prev_enq_match
1048    canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt
1049
1050    when(canAlloc(i)) {
1051      valids(allocPtr.value) := true.B
1052      entries(allocPtr.value) := req
1053    }
1054  }
1055  val allocNum = PopCount(canAlloc)
1056
1057  enqPtrExt.foreach{case x => when(canAlloc.asUInt.orR) {x := x + allocNum} }
1058
1059  io.train_req.valid := false.B
1060  io.train_req.bits := DontCare
1061  valids.zip(entries).zipWithIndex.foreach {
1062    case((valid, entry), i) => {
1063      when(deqPtr === i.U) {
1064        io.train_req.valid := valid
1065        io.train_req.bits := entry
1066      }
1067    }
1068  }
1069
1070  when(io.train_req.valid) {
1071    valids(deqPtr) := false.B
1072    deqPtrExt := deqPtrExt + 1.U
1073  }
1074
1075  XSPerfAccumulate("sms_train_filter_full", PopCount(valids) === (smsParams.train_filter_size).U)
1076  XSPerfAccumulate("sms_train_filter_half", PopCount(valids) >= (smsParams.train_filter_size / 2).U)
1077  XSPerfAccumulate("sms_train_filter_empty", PopCount(valids) === 0.U)
1078
1079  val raw_enq_pattern = Cat(reqs_vls)
1080  val filtered_enq_pattern = Cat(needAlloc)
1081  val actual_enq_pattern = Cat(canAlloc)
1082  XSPerfAccumulate("sms_train_filter_enq", allocNum > 0.U)
1083  XSPerfAccumulate("sms_train_filter_deq", io.train_req.fire)
1084  def toBinary(n: Int): String = n match {
1085    case 0|1 => s"$n"
1086    case _   => s"${toBinary(n/2)}${n%2}"
1087  }
1088  for(i <- 0 until (1 << enqLen)) {
1089    XSPerfAccumulate(s"sms_train_filter_raw_enq_pattern_${toBinary(i)}", raw_enq_pattern === i.U)
1090    XSPerfAccumulate(s"sms_train_filter_filtered_enq_pattern_${toBinary(i)}", filtered_enq_pattern === i.U)
1091    XSPerfAccumulate(s"sms_train_filter_actual_enq_pattern_${toBinary(i)}", actual_enq_pattern === i.U)
1092  }
1093}
1094
1095class SMSPrefetcher()(implicit p: Parameters) extends BasePrefecher with HasSMSModuleHelper with HasL1PrefetchSourceParameter {
1096
1097
1098  val io_agt_en = IO(Input(Bool()))
1099  val io_stride_en = IO(Input(Bool()))
1100  val io_pht_en = IO(Input(Bool()))
1101  val io_act_threshold = IO(Input(UInt(REGION_OFFSET.W)))
1102  val io_act_stride = IO(Input(UInt(6.W)))
1103  val io_dcache_evict = IO(Flipped(DecoupledIO(new AGTEvictReq)))
1104
1105  val train_filter = Module(new SMSTrainFilter)
1106
1107  train_filter.io.ld_in <> io.ld_in
1108  train_filter.io.st_in <> io.st_in
1109
1110  val train_ld = train_filter.io.train_req.bits
1111
1112  val train_block_tag = block_hash_tag(train_ld.vaddr)
1113  val train_region_tag = train_block_tag.head(REGION_TAG_WIDTH)
1114
1115  val train_region_addr_raw = region_addr(train_ld.vaddr)(REGION_TAG_WIDTH + 2 * VADDR_HASH_WIDTH - 1, 0)
1116  val train_region_addr_p1 = Cat(0.U(1.W), train_region_addr_raw) + 1.U
1117  val train_region_addr_m1 = Cat(0.U(1.W), train_region_addr_raw) - 1.U
1118  // addr_p1 or addr_m1 is valid?
1119  val train_allow_cross_region_p1 = !train_region_addr_p1.head(1).asBool
1120  val train_allow_cross_region_m1 = !train_region_addr_m1.head(1).asBool
1121
1122  val train_region_p1_tag = region_hash_tag(train_region_addr_p1.tail(1))
1123  val train_region_m1_tag = region_hash_tag(train_region_addr_m1.tail(1))
1124
1125  val train_region_p1_cross_page = page_bit(train_region_addr_p1) ^ page_bit(train_region_addr_raw)
1126  val train_region_m1_cross_page = page_bit(train_region_addr_m1) ^ page_bit(train_region_addr_raw)
1127
1128  val train_region_paddr = region_addr(train_ld.paddr)
1129  val train_region_vaddr = region_addr(train_ld.vaddr)
1130  val train_region_offset = train_block_tag(REGION_OFFSET - 1, 0)
1131  val train_vld = train_filter.io.train_req.valid
1132
1133
1134  // prefetch stage0
1135  val active_gen_table = Module(new ActiveGenerationTable())
1136  val stride = Module(new StridePF())
1137  val pht = Module(new PatternHistoryTable())
1138  val pf_filter = Module(new PrefetchFilter())
1139
1140  val train_vld_s0 = GatedValidRegNext(train_vld, false.B)
1141  val train_s0 = RegEnable(train_ld, train_vld)
1142  val train_region_tag_s0 = RegEnable(train_region_tag, train_vld)
1143  val train_region_p1_tag_s0 = RegEnable(train_region_p1_tag, train_vld)
1144  val train_region_m1_tag_s0 = RegEnable(train_region_m1_tag, train_vld)
1145  val train_allow_cross_region_p1_s0 = RegEnable(train_allow_cross_region_p1, train_vld)
1146  val train_allow_cross_region_m1_s0 = RegEnable(train_allow_cross_region_m1, train_vld)
1147  val train_pht_tag_s0 = RegEnable(pht_tag(train_ld.pc), train_vld)
1148  val train_pht_index_s0 = RegEnable(pht_index(train_ld.pc), train_vld)
1149  val train_region_offset_s0 = RegEnable(train_region_offset, train_vld)
1150  val train_region_p1_cross_page_s0 = RegEnable(train_region_p1_cross_page, train_vld)
1151  val train_region_m1_cross_page_s0 = RegEnable(train_region_m1_cross_page, train_vld)
1152  val train_region_paddr_s0 = RegEnable(train_region_paddr, train_vld)
1153  val train_region_vaddr_s0 = RegEnable(train_region_vaddr, train_vld)
1154
1155  active_gen_table.io.agt_en := io_agt_en
1156  active_gen_table.io.act_threshold := io_act_threshold
1157  active_gen_table.io.act_stride := io_act_stride
1158  active_gen_table.io.s0_lookup.valid := train_vld_s0
1159  active_gen_table.io.s0_lookup.bits.region_tag := train_region_tag_s0
1160  active_gen_table.io.s0_lookup.bits.region_p1_tag := train_region_p1_tag_s0
1161  active_gen_table.io.s0_lookup.bits.region_m1_tag := train_region_m1_tag_s0
1162  active_gen_table.io.s0_lookup.bits.region_offset := train_region_offset_s0
1163  active_gen_table.io.s0_lookup.bits.pht_index := train_pht_index_s0
1164  active_gen_table.io.s0_lookup.bits.pht_tag := train_pht_tag_s0
1165  active_gen_table.io.s0_lookup.bits.allow_cross_region_p1 := train_allow_cross_region_p1_s0
1166  active_gen_table.io.s0_lookup.bits.allow_cross_region_m1 := train_allow_cross_region_m1_s0
1167  active_gen_table.io.s0_lookup.bits.region_p1_cross_page := train_region_p1_cross_page_s0
1168  active_gen_table.io.s0_lookup.bits.region_m1_cross_page := train_region_m1_cross_page_s0
1169  active_gen_table.io.s0_lookup.bits.region_paddr := train_region_paddr_s0
1170  active_gen_table.io.s0_lookup.bits.region_vaddr := train_region_vaddr_s0
1171  active_gen_table.io.s2_stride_hit := stride.io.s2_gen_req.valid
1172  active_gen_table.io.s0_dcache_evict <> io_dcache_evict
1173
1174  stride.io.stride_en := io_stride_en
1175  stride.io.s0_lookup.valid := train_vld_s0
1176  stride.io.s0_lookup.bits.pc := train_s0.pc(STRIDE_PC_BITS - 1, 0)
1177  stride.io.s0_lookup.bits.vaddr := Cat(
1178    train_region_vaddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W)
1179  )
1180  stride.io.s0_lookup.bits.paddr := Cat(
1181    train_region_paddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W)
1182  )
1183  stride.io.s1_valid := active_gen_table.io.s1_sel_stride
1184
1185  pht.io.s2_agt_lookup := active_gen_table.io.s2_pht_lookup
1186  pht.io.agt_update := active_gen_table.io.s2_evict
1187
1188  val pht_gen_valid = pht.io.pf_gen_req.valid && io_pht_en
1189  val agt_gen_valid = active_gen_table.io.s2_pf_gen_req.valid
1190  val stride_gen_valid = stride.io.s2_gen_req.valid
1191  val pf_gen_req = Mux(agt_gen_valid || stride_gen_valid,
1192    Mux1H(Seq(
1193      agt_gen_valid -> active_gen_table.io.s2_pf_gen_req.bits,
1194      stride_gen_valid -> stride.io.s2_gen_req.bits
1195    )),
1196    pht.io.pf_gen_req.bits
1197  )
1198  assert(!(agt_gen_valid && stride_gen_valid))
1199  pf_filter.io.gen_req.valid := pht_gen_valid || agt_gen_valid || stride_gen_valid
1200  pf_filter.io.gen_req.bits := pf_gen_req
1201  io.tlb_req <> pf_filter.io.tlb_req
1202  val is_valid_address = pf_filter.io.l2_pf_addr.bits > 0x80000000L.U
1203
1204  io.l2_req.valid := pf_filter.io.l2_pf_addr.valid && io.enable && is_valid_address
1205  io.l2_req.bits.addr := pf_filter.io.l2_pf_addr.bits
1206  io.l2_req.bits.source := MemReqSource.Prefetch2L2SMS.id.U
1207
1208  // for now, sms will not send l1 prefetch requests
1209  io.l1_req.bits.paddr := pf_filter.io.l2_pf_addr.bits
1210  io.l1_req.bits.alias := pf_filter.io.pf_alias_bits
1211  io.l1_req.bits.is_store := true.B
1212  io.l1_req.bits.confidence := 1.U
1213  io.l1_req.bits.pf_source.value := L1_HW_PREFETCH_NULL
1214  io.l1_req.valid := false.B
1215
1216  for((train, i) <- io.ld_in.zipWithIndex){
1217    XSPerfAccumulate(s"pf_train_miss_${i}", train.valid && train.bits.miss)
1218    XSPerfAccumulate(s"pf_train_prefetched_${i}", train.valid && isFromL1Prefetch(train.bits.meta_prefetch))
1219  }
1220  val trace = Wire(new L1MissTrace)
1221  trace.vaddr := 0.U
1222  trace.pc := 0.U
1223  trace.paddr := io.l2_req.bits.addr
1224  trace.source := pf_filter.io.debug_source_type
1225  val table = ChiselDB.createTable("L1SMSMissTrace_hart"+ p(XSCoreParamsKey).HartId.toString, new L1MissTrace)
1226  table.log(trace, io.l2_req.fire, "SMSPrefetcher", clock, reset)
1227
1228  XSPerfAccumulate("sms_pf_gen_conflict",
1229    pht_gen_valid && agt_gen_valid
1230  )
1231  XSPerfAccumulate("sms_pht_disabled", pht.io.pf_gen_req.valid && !io_pht_en)
1232  XSPerfAccumulate("sms_agt_disabled", active_gen_table.io.s2_pf_gen_req.valid && !io_agt_en)
1233  XSPerfAccumulate("sms_pf_real_issued", io.l2_req.valid)
1234  XSPerfAccumulate("sms_l1_req_valid", io.l1_req.valid)
1235  XSPerfAccumulate("sms_l1_req_fire", io.l1_req.fire)
1236}