xref: /XiangShan/src/main/scala/xiangshan/mem/prefetch/SMSPrefetcher.scala (revision 289fc2f9291f65a873c97dd5ad2876aea033ce6f)
1package xiangshan.mem.prefetch
2
3import chipsalliance.rocketchip.config.Parameters
4import chisel3._
5import chisel3.util._
6import treadle.utils.BitUtils
7import xiangshan._
8import utils._
9import xiangshan.cache.mmu._
10
11case class SMSParams
12(
13  region_size: Int = 1024,
14  vaddr_hash_width: Int = 1,
15  block_addr_raw_width: Int = 30,
16  filter_table_size: Int = 16,
17  active_gen_table_size: Int = 16,
18  pht_size: Int = 64,
19  pht_ways: Int = 2,
20  pht_hist_bits: Int = 2,
21  pht_tag_bits: Int = 13,
22  pht_lookup_queue_size: Int = 4,
23  pf_filter_size: Int = 16
24) extends PrefetcherParams
25
26trait HasSMSModuleHelper extends HasCircularQueuePtrHelper
27{ this: HasXSParameter =>
28  val smsParams = coreParams.prefetcher.get.asInstanceOf[SMSParams]
29  val REGION_SIZE = smsParams.region_size
30  val REGION_BLKS = smsParams.region_size / dcacheParameters.blockBytes
31  val REGION_ADDR_BITS = VAddrBits - log2Up(REGION_SIZE)
32  val REGION_OFFSET = log2Up(REGION_BLKS)
33  val VADDR_HASH_WIDTH = smsParams.vaddr_hash_width
34  val BLK_ADDR_RAW_WIDTH = smsParams.block_addr_raw_width
35  val REGION_ADDR_RAW_WIDTH = BLK_ADDR_RAW_WIDTH - REGION_OFFSET
36  val BLK_TAG_WIDTH = BLK_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH
37  val REGION_TAG_WIDTH = REGION_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH
38  val PHT_INDEX_BITS = log2Up(smsParams.pht_size / smsParams.pht_ways)
39  val PHT_TAG_BITS = smsParams.pht_tag_bits
40  val PHT_HIST_BITS = smsParams.pht_hist_bits
41
42  def block_addr(x: UInt): UInt = {
43    val offset = log2Up(dcacheParameters.blockBytes)
44    x(x.getWidth - 1, offset)
45  }
46
47  def region_addr(x: UInt): UInt = {
48    val offset = log2Up(REGION_SIZE)
49    x(x.getWidth - 1, offset)
50  }
51
52  def region_offset_to_bits(off: UInt): UInt = {
53    (1.U << off).asUInt
54  }
55
56  def region_hash_tag(rg_addr: UInt): UInt = {
57    val low = rg_addr(REGION_ADDR_RAW_WIDTH - 1, 0)
58    val high = rg_addr(REGION_ADDR_RAW_WIDTH + 3 * VADDR_HASH_WIDTH - 1, REGION_ADDR_RAW_WIDTH)
59    val high_hash = vaddr_hash(high)
60    Cat(high_hash, low)
61  }
62
63  def page_bit(region_addr: UInt): UInt = {
64    region_addr(log2Up(dcacheParameters.pageSize/REGION_SIZE))
65  }
66
67  def block_hash_tag(x: UInt): UInt = {
68    val blk_addr = block_addr(x)
69    val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0)
70    val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
71    val high_hash = vaddr_hash(high)
72    Cat(high_hash, low)
73  }
74
75  def vaddr_hash(x: UInt): UInt = {
76    val width = VADDR_HASH_WIDTH
77    val low = x(width - 1, 0)
78    val mid = x(2 * width - 1, width)
79    val high = x(3 * width - 1, 2 * width)
80    low ^ mid ^ high
81  }
82
83  def pht_index(pc: UInt): UInt = {
84    val low_bits = pc(PHT_INDEX_BITS, 2)
85    val hi_bit = pc(1) ^ pc(PHT_INDEX_BITS+1)
86    Cat(hi_bit, low_bits)
87  }
88
89  def pht_tag(pc: UInt): UInt = {
90    pc(PHT_INDEX_BITS + 2 + PHT_TAG_BITS - 1, PHT_INDEX_BITS + 2)
91  }
92}
93
94class FilterTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
95  val io = IO(new Bundle() {
96    val s0_lookup = Flipped(ValidIO(new FilterEntry()))
97    val s1_result = ValidIO(new FilterEntry())
98    val s1_update = Input(Bool())
99  })
100
101  val s0_lookup_entry = io.s0_lookup.bits
102  val s0_lookup_valid = io.s0_lookup.valid
103
104  val entries = Seq.fill(smsParams.filter_table_size){ Reg(new FilterEntry()) }
105  val valids = Seq.fill(smsParams.filter_table_size){ RegInit(false.B) }
106  val w_ptr = RegInit(0.U(log2Up(smsParams.filter_table_size).W))
107
108  val prev_entry = RegEnable(s0_lookup_entry, s0_lookup_valid)
109  val prev_lookup_valid = RegNext(s0_lookup_valid, false.B)
110
111  val s0_entry_match_vec = entries.zip(valids).map({
112    case (ent, v) => v && ent.region_tag === s0_lookup_entry.region_tag && ent.offset =/= s0_lookup_entry.offset
113  })
114  val s0_any_entry_match = Cat(s0_entry_match_vec).orR
115  val s0_matched_entry = Mux1H(s0_entry_match_vec, entries)
116  val s0_match_s1 = prev_lookup_valid && prev_entry.region_tag === s0_lookup_entry.region_tag
117
118  val s0_hit = s0_lookup_valid && (s0_any_entry_match || s0_match_s1)
119
120  val s0_lookup_result = Wire(new FilterEntry())
121  s0_lookup_result := Mux(s0_match_s1, prev_entry, s0_matched_entry)
122  io.s1_result.valid := RegNext(s0_hit, false.B)
123  io.s1_result.bits := RegEnable(s0_lookup_result, s0_hit)
124
125  val s0_invalid_mask = valids.map(!_)
126  val s0_has_invalid_entry = Cat(s0_invalid_mask).orR
127  val s0_invalid_index = PriorityEncoder(s0_invalid_mask)
128  // if match, invalidte entry
129  for((v, i) <- valids.zipWithIndex){
130    when(s0_lookup_valid && s0_entry_match_vec(i)){
131      v := false.B
132    }
133  }
134
135  // stage1
136  val s1_has_invalid_entry = RegEnable(s0_has_invalid_entry, s0_lookup_valid)
137  val s1_invalid_index = RegEnable(s0_invalid_index, s0_lookup_valid)
138  // alloc entry if (s0 miss && s1_update)
139  val s1_do_update = io.s1_update && prev_lookup_valid && !io.s1_result.valid
140  val update_ptr = Mux(s1_has_invalid_entry, s1_invalid_index, w_ptr)
141  when(s1_do_update && !s1_has_invalid_entry){ w_ptr := w_ptr + 1.U }
142  for((ent, i) <- entries.zipWithIndex){
143    val wen = s1_do_update && update_ptr === i.U
144    when(wen){
145      valids(i) := true.B
146      ent := prev_entry
147    }
148  }
149
150  XSPerfAccumulate("sms_filter_table_hit", io.s1_result.valid)
151  XSPerfAccumulate("sms_filter_table_update", s1_do_update)
152  for(i <- 0 until smsParams.filter_table_size){
153    XSPerfAccumulate(s"sms_filter_table_access_$i",
154      s1_do_update && update_ptr === i.U
155    )
156  }
157}
158
159class FilterEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
160  val pht_index = UInt(PHT_INDEX_BITS.W)
161  val pht_tag = UInt(PHT_TAG_BITS.W)
162  val region_tag = UInt(REGION_TAG_WIDTH.W)
163  val offset = UInt(REGION_OFFSET.W)
164}
165
166class AGTEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
167  val pht_index = UInt(PHT_INDEX_BITS.W)
168  val pht_tag = UInt(PHT_TAG_BITS.W)
169  val region_bits = UInt(REGION_BLKS.W)
170  val region_tag = UInt(REGION_TAG_WIDTH.W)
171  val access_cnt = UInt((REGION_BLKS-1).U.getWidth.W)
172  val decr_mode = Bool()
173}
174
175class PfGenReq()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
176  val region_tag = UInt(REGION_TAG_WIDTH.W)
177  val region_addr = UInt(REGION_ADDR_BITS.W)
178  val region_bits = UInt(REGION_BLKS.W)
179  val paddr_valid = Bool()
180  val decr_mode = Bool()
181}
182
183class ActiveGenerationTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
184  val io = IO(new Bundle() {
185    val s0_lookup = Flipped(ValidIO(new Bundle() {
186      val region_tag = UInt(REGION_TAG_WIDTH.W)
187      val region_p1_tag = UInt(REGION_TAG_WIDTH.W)
188      val region_m1_tag = UInt(REGION_TAG_WIDTH.W)
189      val region_offset = UInt(REGION_OFFSET.W)
190      val pht_index = UInt(PHT_INDEX_BITS.W)
191      val pht_tag = UInt(PHT_TAG_BITS.W)
192      val allow_cross_region_p1 = Bool()
193      val allow_cross_region_m1 = Bool()
194      val region_p1_cross_page = Bool()
195      val region_m1_cross_page = Bool()
196      val region_paddr = UInt(REGION_ADDR_BITS.W)
197      val region_vaddr = UInt(REGION_ADDR_BITS.W)
198    }))
199    // do not alloc entry in filter table if agt hit
200    val s1_match_or_alloc = Output(Bool())
201    // if agt missed, try lookup pht
202    val s1_pht_lookup = ValidIO(new PhtLookup())
203    // receive second hit from filter table
204    val s1_recv_entry = Flipped(ValidIO(new AGTEntry()))
205    // evict entry to pht
206    val s2_evict = ValidIO(new AGTEntry())
207    val s2_pf_gen_req = ValidIO(new PfGenReq())
208  })
209
210  val entries = Seq.fill(smsParams.active_gen_table_size){ Reg(new AGTEntry()) }
211  val valids = Seq.fill(smsParams.active_gen_table_size){ RegInit(false.B) }
212  val replacement = ReplacementPolicy.fromString("plru", smsParams.active_gen_table_size)
213
214  val s0_lookup = io.s0_lookup.bits
215  val s0_lookup_valid = io.s0_lookup.valid
216
217  val prev_lookup = RegEnable(s0_lookup, s0_lookup_valid)
218  val prev_lookup_valid = RegNext(s0_lookup_valid, false.B)
219
220  val s0_match_prev = prev_lookup_valid && s0_lookup.region_tag === prev_lookup.region_tag
221
222  def gen_match_vec(region_tag: UInt): Seq[Bool] = {
223    entries.zip(valids).map({
224      case (ent, v) => v && ent.region_tag === region_tag
225    })
226  }
227
228  val region_match_vec_s0 = gen_match_vec(s0_lookup.region_tag)
229  val region_p1_match_vec_s0 = gen_match_vec(s0_lookup.region_p1_tag)
230  val region_m1_match_vec_s0 = gen_match_vec(s0_lookup.region_m1_tag)
231
232  val any_region_match = Cat(region_match_vec_s0).orR
233  val any_region_p1_match = Cat(region_p1_match_vec_s0).orR && s0_lookup.allow_cross_region_p1
234  val any_region_m1_match = Cat(region_m1_match_vec_s0).orR && s0_lookup.allow_cross_region_m1
235
236  val s0_region_hit = any_region_match
237  // region miss, but cross region match
238  val s0_alloc = !s0_region_hit && (any_region_p1_match || any_region_m1_match) && !s0_match_prev
239  val s0_match_or_alloc = any_region_match || any_region_p1_match || any_region_m1_match
240  val s0_pf_gen_match_vec = valids.indices.map(i => {
241    Mux(any_region_match,
242      region_match_vec_s0(i),
243      Mux(any_region_m1_match,
244        region_m1_match_vec_s0(i), region_p1_match_vec_s0(i)
245      )
246    )
247  })
248  val s0_agt_entry = Wire(new AGTEntry())
249
250  s0_agt_entry.pht_index := s0_lookup.pht_index
251  s0_agt_entry.pht_tag := s0_lookup.pht_tag
252  s0_agt_entry.region_bits := region_offset_to_bits(s0_lookup.region_offset)
253  s0_agt_entry.region_tag := s0_lookup.region_tag
254  s0_agt_entry.access_cnt := 1.U
255  // lookup_region + 1 == entry_region
256  // lookup_region = entry_region - 1 => decr mode
257  s0_agt_entry.decr_mode := !s0_region_hit && !any_region_m1_match && any_region_p1_match
258  val s0_replace_mask = UIntToOH(replacement.way)
259  // s0 hit a entry that may be replaced in s1
260  val s0_update_conflict = Cat(VecInit(region_match_vec_s0).asUInt & s0_replace_mask).orR
261
262  // stage1: update/alloc
263  val s1_agt_entry = RegEnable(s0_agt_entry, s0_lookup_valid)
264  val s1_recv_entry = io.s1_recv_entry
265  // cross region match or filter table second hit
266  val s1_cross_region_match = RegNext(s0_lookup_valid && s0_alloc, false.B)
267  val s1_alloc = s1_cross_region_match || s1_recv_entry.valid
268  val s1_alloc_entry = Mux(s1_recv_entry.valid, s1_recv_entry.bits, s1_agt_entry)
269  val s1_replace_mask = RegEnable(s0_replace_mask, s0_lookup_valid)
270  val s1_evict_entry = Mux1H(s1_replace_mask, entries)
271  val s1_evict_valid = Mux1H(s1_replace_mask, valids)
272  // region hit, update entry
273  val s1_update_conflict = RegEnable(s0_update_conflict, s0_lookup_valid && s0_region_hit)
274  val s1_update = RegNext(s0_lookup_valid && s0_region_hit, false.B) && !s1_update_conflict
275  val s1_update_mask = RegEnable(
276    VecInit(region_match_vec_s0),
277    VecInit(Seq.fill(smsParams.active_gen_table_size){ false.B }),
278    s0_lookup_valid
279  )
280  val s1_allow_cross_region_p1 = RegEnable(s0_lookup.allow_cross_region_p1, s0_lookup_valid)
281  val s1_allow_cross_region_m1 = RegEnable(s0_lookup.allow_cross_region_m1, s0_lookup_valid)
282  val s1_region_p1_cross_page = RegEnable(s0_lookup.region_p1_cross_page, s0_lookup_valid)
283  val s1_region_m1_cross_page = RegEnable(s0_lookup.region_m1_cross_page, s0_lookup_valid)
284  val s1_pf_gen_match_vec = RegEnable(VecInit(s0_pf_gen_match_vec), s0_lookup_valid)
285  val s1_region_paddr = RegEnable(s0_lookup.region_paddr, s0_lookup_valid)
286  val s1_region_vaddr = RegEnable(s0_lookup.region_vaddr, s0_lookup_valid)
287  val s1_region_offset = RegEnable(s0_lookup.region_offset, s0_lookup_valid)
288  for(i <- entries.indices){
289    val alloc = s1_replace_mask(i) && s1_alloc
290    val update = s1_update_mask(i) && s1_update
291    val update_entry = WireInit(entries(i))
292    update_entry.region_bits := entries(i).region_bits | s1_agt_entry.region_bits
293    update_entry.access_cnt := Mux(entries(i).access_cnt === (REGION_BLKS - 1).U,
294      entries(i).access_cnt,
295      entries(i).access_cnt + (s1_agt_entry.region_bits & (~entries(i).region_bits).asUInt).orR
296    )
297    valids(i) := valids(i) || alloc
298    entries(i) := Mux(alloc, s1_alloc_entry, Mux(update, update_entry, entries(i)))
299  }
300  when(s1_update) {
301    replacement.access(OHToUInt(s1_update_mask))
302  }.elsewhen(s1_alloc){
303    replacement.access(OHToUInt(s1_replace_mask))
304  }
305
306  io.s1_match_or_alloc := s1_update || s1_alloc
307
308
309  // pf_addr
310  // 1.hit => pf_addr = lookup_addr + (decr ? -1 : 1)
311  // 2.lookup region - 1 hit => lookup_addr + 1 (incr mode)
312  // 3.lookup region + 1 hit => lookup_addr - 1 (decr mode)
313  val s1_hited_entry_decr = Mux1H(s1_update_mask, entries.map(_.decr_mode))
314  val s1_pf_gen_decr_mode = Mux(s1_update,
315    s1_hited_entry_decr,
316    s1_agt_entry.decr_mode
317  )
318  val s1_pf_gen_paddr_valid = Mux(s1_pf_gen_decr_mode,
319    !s1_region_m1_cross_page,
320    !s1_region_p1_cross_page
321  )
322  val s1_pf_gen_vaddr_p1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) + 1.U
323  val s1_pf_gen_vaddr_m1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) - 1.U
324  val s1_pf_gen_vaddr = Cat(
325    s1_region_vaddr(REGION_ADDR_BITS - 1, REGION_TAG_WIDTH),
326    Mux(s1_pf_gen_decr_mode,
327      s1_pf_gen_vaddr_m1,
328      s1_pf_gen_vaddr_p1
329    )
330  )
331  val s1_pf_gen_access_cnt = Mux1H(s1_pf_gen_match_vec, entries.map(_.access_cnt))
332  val s1_pf_gen_valid = prev_lookup_valid && io.s1_match_or_alloc && Mux(s1_pf_gen_decr_mode,
333    !s1_region_m1_cross_page || s1_allow_cross_region_m1,
334    !s1_region_p1_cross_page || s1_allow_cross_region_p1
335  ) && (s1_pf_gen_access_cnt > (REGION_BLKS / 2).U)
336  val s1_pf_gen_region_tag = s1_agt_entry.region_tag
337  val s1_pf_gen_region_addr = Mux(s1_pf_gen_paddr_valid,
338    s1_region_paddr, s1_pf_gen_vaddr
339  )
340  val s1_pf_gen_incr_region_bits = VecInit((0 until REGION_BLKS).map(i => {
341    if(i == 0) true.B else !s1_agt_entry.region_bits(i - 1, 0).orR
342  })).asUInt
343  val s1_pf_gen_decr_region_bits = VecInit((0 until REGION_BLKS).map(i => {
344    if(i == REGION_BLKS - 1) true.B
345    else !s1_agt_entry.region_bits(REGION_BLKS - 1, i + 1).orR
346  })).asUInt
347  val s1_pf_gen_region_bits = Mux(s1_pf_gen_decr_mode,
348    s1_pf_gen_decr_region_bits,
349    s1_pf_gen_incr_region_bits
350  )
351
352  io.s1_pht_lookup.valid := !s1_pf_gen_valid && prev_lookup_valid
353  io.s1_pht_lookup.bits.pht_index := s1_agt_entry.pht_index
354  io.s1_pht_lookup.bits.pht_tag := s1_agt_entry.pht_tag
355  io.s1_pht_lookup.bits.region_tag := s1_agt_entry.region_tag
356  io.s1_pht_lookup.bits.region_addr := s1_region_paddr
357  io.s1_pht_lookup.bits.paddr_valid := true.B
358  io.s1_pht_lookup.bits.region_offset := s1_region_offset
359
360  // stage2: gen pf reg / evict entry to pht
361  val s2_evict_entry = RegEnable(s1_evict_entry, s1_alloc)
362  val s2_evict_valid = RegNext(s1_alloc && s1_evict_valid, false.B)
363  val s2_paddr_valid = RegEnable(s1_pf_gen_paddr_valid, s1_pf_gen_valid)
364  val s2_pf_gen_region_tag = RegEnable(s1_pf_gen_region_tag, s1_pf_gen_valid)
365  val s2_pf_gen_decr_mode = RegEnable(s1_pf_gen_decr_mode, s1_pf_gen_valid)
366  val s2_pf_gen_region_paddr = RegEnable(s1_pf_gen_region_addr, s1_pf_gen_valid)
367  val s2_pf_gen_region_bits = RegEnable(s1_pf_gen_region_bits, s1_pf_gen_valid)
368  val s2_pf_gen_valid = RegNext(s1_pf_gen_valid, false.B)
369
370  io.s2_evict.valid := s2_evict_valid
371  io.s2_evict.bits := s2_evict_entry
372
373  io.s2_pf_gen_req.bits.region_tag := s2_pf_gen_region_tag
374  io.s2_pf_gen_req.bits.region_addr := s2_pf_gen_region_paddr
375  io.s2_pf_gen_req.bits.region_bits := s2_pf_gen_region_bits
376  io.s2_pf_gen_req.bits.paddr_valid := s2_paddr_valid
377  io.s2_pf_gen_req.bits.decr_mode := s2_pf_gen_decr_mode
378  io.s2_pf_gen_req.valid := s2_pf_gen_valid
379
380  XSPerfAccumulate("sms_agt_alloc", s1_alloc) // cross region match or filter evict
381  XSPerfAccumulate("sms_agt_update", s1_update) // entry hit
382  XSPerfAccumulate("sms_agt_pf_gen", io.s2_pf_gen_req.valid)
383  XSPerfAccumulate("sms_agt_pf_gen_paddr_valid",
384    io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.paddr_valid
385  )
386  XSPerfAccumulate("sms_agt_pf_gen_decr_mode",
387    io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.decr_mode
388  )
389  for(i <- 0 until smsParams.active_gen_table_size){
390    XSPerfAccumulate(s"sms_agt_access_entry_$i",
391      s1_alloc && s1_replace_mask(i) || s1_update && s1_update_mask(i)
392    )
393  }
394
395}
396
397class PhtLookup()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
398  val pht_index = UInt(PHT_INDEX_BITS.W)
399  val pht_tag = UInt(PHT_TAG_BITS.W)
400  val region_tag = UInt(REGION_TAG_WIDTH.W)
401  val region_addr = UInt(REGION_ADDR_BITS.W)
402  val paddr_valid = Bool()
403  val region_offset = UInt(REGION_OFFSET.W)
404}
405
406class PhtEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
407  val hist = Vec(REGION_BLKS, UInt(PHT_HIST_BITS.W))
408  val tag = UInt(PHT_TAG_BITS.W)
409  val decr_mode = Bool()
410}
411
412class PatternHistoryTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
413  val io = IO(new Bundle() {
414    // receive agt evicted entry
415    val agt_update = Flipped(ValidIO(new AGTEntry()))
416    // at stage1, if we know agt missed, lookup pht
417    val s1_agt_lookup = Flipped(ValidIO(new PhtLookup()))
418    // pht-generated prefetch req
419    val pf_gen_req = ValidIO(new PfGenReq())
420  })
421
422  val pht_ram = Module(new SRAMTemplate[PhtEntry](new PhtEntry,
423    set = smsParams.pht_size / smsParams.pht_ways,
424    way =smsParams.pht_ways,
425    singlePort = true
426  ))
427  def PHT_SETS = smsParams.pht_size / smsParams.pht_ways
428  val pht_valids = Seq.fill(smsParams.pht_ways){
429    RegInit(VecInit(Seq.fill(PHT_SETS){false.B}))
430  }
431  val replacement = Seq.fill(PHT_SETS) { ReplacementPolicy.fromString("plru", smsParams.pht_ways) }
432
433  val lookup_queue = Module(new OverrideableQueue(new PhtLookup, smsParams.pht_lookup_queue_size))
434  lookup_queue.io.in := io.s1_agt_lookup
435  val lookup = lookup_queue.io.out
436
437  val evict_queue = Module(new OverrideableQueue(new AGTEntry, smsParams.pht_lookup_queue_size))
438  evict_queue.io.in := io.agt_update
439  val evict = evict_queue.io.out
440
441  val s3_ram_en = Wire(Bool())
442  val s1_valid = Wire(Bool())
443  // pipe s0: select an op from [lookup, update], generate ram read addr
444  val s0_valid = lookup.valid || evict.valid
445  val s0_ready = !s1_valid || s1_valid && !s3_ram_en
446
447  evict.ready := !s1_valid || !s3_ram_en
448  lookup.ready := evict.ready && !evict.valid
449
450  val s0_ram_raddr = Mux(evict.valid,
451    evict.bits.pht_index,
452    lookup.bits.pht_index
453  )
454  val s0_tag = Mux(evict.valid, evict.bits.pht_tag, lookup.bits.pht_tag)
455  val s0_region_addr = lookup.bits.region_addr
456  val s0_region_tag = lookup.bits.region_tag
457  val s0_region_offset = lookup.bits.region_offset
458  val s0_paddr_valid = lookup.bits.paddr_valid
459  val s0_region_bits = evict.bits.region_bits
460  val s0_decr_mode = evict.bits.decr_mode
461  val s0_evict = evict.valid
462
463  // pipe s1: send addr to ram
464  val s1_valid_r = RegInit(false.B)
465  s1_valid_r := Mux(s3_ram_en, s1_valid, s0_valid)
466  s1_valid := s1_valid_r
467  val s1_reg_en = s0_valid && (!s3_ram_en || !s1_valid)
468  val s1_ram_raddr = RegEnable(s0_ram_raddr, s1_reg_en)
469  val s1_tag = RegEnable(s0_tag, s1_reg_en)
470  val s1_region_bits = RegEnable(s0_region_bits, s1_reg_en)
471  val s1_decr_mode = RegEnable(s0_decr_mode, s1_reg_en)
472  val s1_region_addr = RegEnable(s0_region_addr, s1_reg_en)
473  val s1_region_tag = RegEnable(s0_region_tag, s1_reg_en)
474  val s1_region_offset = RegEnable(s0_region_offset, s1_reg_en)
475  val s1_paddr_valid = RegEnable(s0_paddr_valid, s1_reg_en)
476  val s1_pht_valids = pht_valids.map(way => way(s1_ram_raddr))
477  val s1_evict = RegEnable(s0_evict, s1_reg_en)
478  val s1_replace_way = Mux1H(
479    (0 until PHT_SETS).map(i => i.U === s1_ram_raddr),
480    replacement.map(_.way)
481  )
482
483  // pipe s2: generate ram write addr/data
484  val s2_valid = RegNext(s1_valid && !s3_ram_en, false.B)
485  val s2_reg_en = s1_valid && !s3_ram_en
486  val s2_tag = RegEnable(s1_tag, s2_reg_en)
487  val s2_region_bits = RegEnable(s1_region_bits, s2_reg_en)
488  val s2_decr_mode = RegEnable(s1_decr_mode, s2_reg_en)
489  val s2_region_addr = RegEnable(s1_region_addr, s2_reg_en)
490  val s2_region_tag = RegEnable(s1_region_tag, s2_reg_en)
491  val s2_region_offset = RegEnable(s1_region_offset, s2_reg_en)
492  val s2_region_offset_mask = region_offset_to_bits(s2_region_offset)
493  val s2_paddr_valid = RegEnable(s1_paddr_valid, s2_reg_en)
494  val s2_evict = RegEnable(s1_evict, s2_reg_en)
495  val s2_pht_valids = s1_pht_valids.map(v => RegEnable(v, s2_reg_en))
496  val s2_replace_way = RegEnable(s1_replace_way, s2_reg_en)
497  val s2_ram_waddr = RegEnable(s1_ram_raddr, s2_reg_en)
498  val s2_ram_rdata = pht_ram.io.r.resp.data
499  val s2_ram_rtags = s2_ram_rdata.map(_.tag)
500  val s2_tag_match_vec = s2_ram_rtags.map(t => t === s2_tag)
501  val s2_hit_vec = s2_tag_match_vec.zip(s2_pht_valids).map({
502    case (tag_match, v) => v && tag_match
503  })
504  val s2_hist_update = s2_ram_rdata.map(way => VecInit(way.hist.zipWithIndex.map({
505    case (h, i) => Mux(s2_region_bits(i),
506      Mux(h === ((1 << PHT_HIST_BITS) - 1).U, h, h + 1.U),
507      Mux(h === 0.U, 0.U, h - 1.U)
508    )
509  })))
510  val s2_hits_pf_gen = s2_ram_rdata.map(way => VecInit(way.hist.map(_.head(1))).asUInt)
511  val s2_new_hist = VecInit((0 until REGION_BLKS).map(i => {
512    Cat(0.U((PHT_HIST_BITS - 1).W), s2_region_bits(i))
513  }))
514  val s2_pht_hit = Cat(s2_hit_vec).orR
515  val s2_hist = Mux(s2_pht_hit, Mux1H(s2_hit_vec, s2_hist_update), s2_new_hist)
516  val s2_repl_way_mask = UIntToOH(s2_replace_way)
517  val s2_pf_gen_region_bits = Mux1H(s2_hit_vec, s2_hits_pf_gen)
518
519  // pipe s3: send addr/data to ram, gen pf_req
520  val s3_valid = RegNext(s2_valid, false.B)
521  val s3_evict = RegEnable(s2_evict, s2_valid)
522  val s3_hist = RegEnable(s2_hist, s2_valid)
523  val s3_pf_gen_region_bits = RegEnable(s2_pf_gen_region_bits, s2_valid)
524  val s3_decr_mode = RegEnable(s2_decr_mode, s2_valid)
525  val s3_region_addr = RegEnable(s2_region_addr, s2_valid)
526  val s3_region_tag = RegEnable(s2_region_tag, s2_valid)
527  val s3_region_offset_mask = RegEnable(s2_region_offset_mask, s2_valid)
528  val s3_paddr_valid = RegEnable(s2_paddr_valid, s2_valid)
529  val s3_pht_tag = RegEnable(s2_tag, s2_valid)
530  val s3_hit_vec = s2_hit_vec.map(h => RegEnable(h, s2_valid))
531  val s3_hit = Cat(s3_hit_vec).orR
532  val s3_hit_way = OHToUInt(s3_hit_vec)
533  val s3_repl_way = RegEnable(s2_replace_way, s2_valid)
534  val s3_repl_way_mask = RegEnable(s2_repl_way_mask, s2_valid)
535  val s3_repl_update_mask = RegEnable(VecInit((0 until PHT_SETS).map(i => i.U === s2_ram_waddr)), s2_valid)
536  val s3_ram_waddr = RegEnable(s2_ram_waddr, s2_valid)
537  s3_ram_en := s3_valid && s3_evict
538  val s3_ram_wdata = Wire(new PhtEntry())
539  s3_ram_wdata.hist := s3_hist
540  s3_ram_wdata.tag := s3_pht_tag
541  s3_ram_wdata.decr_mode := s3_decr_mode
542
543  for((valids, way_idx) <- pht_valids.zipWithIndex){
544    val update_way = s3_repl_way_mask(way_idx)
545    for((v, set_idx) <- valids.zipWithIndex){
546      val update_set = s3_repl_update_mask(set_idx)
547      when(s3_valid && s3_evict && !s3_hit && update_set && update_way){
548        v := true.B
549      }
550    }
551  }
552  for((r, i) <- replacement.zipWithIndex){
553    when(s3_valid && s3_repl_update_mask(i)){
554      when(s3_hit){
555        r.access(s3_hit_way)
556      }.elsewhen(s3_evict){
557        r.access(s3_repl_way)
558      }
559    }
560  }
561
562  val s3_way_mask = Mux(s3_hit,
563    VecInit(s3_hit_vec).asUInt,
564    s3_repl_way_mask,
565  ).asUInt
566
567  pht_ram.io.r(
568    s1_valid, s1_ram_raddr
569  )
570  pht_ram.io.w(
571    s3_ram_en, s3_ram_wdata, s3_ram_waddr, s3_way_mask
572  )
573  // generate pf req if hit
574  io.pf_gen_req.valid := s3_valid && !s3_evict && Cat(s3_hit_vec).orR
575  io.pf_gen_req.bits.region_addr := s3_region_addr
576  io.pf_gen_req.bits.region_tag := s3_region_tag
577  // mask out current req
578  io.pf_gen_req.bits.region_bits := s3_pf_gen_region_bits & (~s3_region_offset_mask).asUInt
579  io.pf_gen_req.bits.paddr_valid := s3_paddr_valid
580  io.pf_gen_req.bits.decr_mode := s3_decr_mode
581
582  XSPerfAccumulate("sms_pht_update", io.agt_update.valid)
583  XSPerfAccumulate("sms_pht_lookup", io.s1_agt_lookup.valid)
584  XSPerfAccumulate("sms_pht_lookup_hit", s2_valid && s2_pht_hit)
585  for(i <- 0 until smsParams.pht_ways){
586    XSPerfAccumulate(s"sms_pht_write_way_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.waymask.get(i))
587  }
588  for(i <- 0 until PHT_SETS){
589    XSPerfAccumulate(s"sms_pht_write_set_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.setIdx === i.U)
590  }
591}
592
593class PrefetchFilterEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper {
594  val region_tag = UInt(REGION_TAG_WIDTH.W)
595  val region_addr = UInt(REGION_ADDR_BITS.W)
596  val region_bits = UInt(REGION_BLKS.W)
597  val filter_bits = UInt(REGION_BLKS.W)
598  val paddr_valid = Bool()
599  val decr_mode = Bool()
600}
601
602class PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper {
603  val io = IO(new Bundle() {
604    val gen_req = Flipped(ValidIO(new PfGenReq()))
605    val tlb_req = new TlbRequestIO(nRespDups = 2)
606    val l2_pf_addr = ValidIO(UInt(PAddrBits.W))
607  })
608  val entries = Seq.fill(smsParams.pf_filter_size){ Reg(new PrefetchFilterEntry()) }
609  val valids = Seq.fill(smsParams.pf_filter_size){ RegInit(false.B) }
610  val replacement = ReplacementPolicy.fromString("plru", smsParams.pf_filter_size)
611
612  val tlb_req_arb = Module(new RRArbiter(new TlbReq, smsParams.pf_filter_size))
613  val pf_req_arb = Module(new RRArbiter(UInt(PAddrBits.W), smsParams.pf_filter_size))
614
615  io.tlb_req.req <> tlb_req_arb.io.out
616  io.tlb_req.req_kill := false.B
617  io.tlb_req.resp.ready := true.B
618  io.l2_pf_addr.valid := pf_req_arb.io.out.valid
619  io.l2_pf_addr.bits := pf_req_arb.io.out.bits
620  pf_req_arb.io.out.ready := true.B
621
622  val s1_valid = Wire(Bool())
623  val s1_replace_vec = Wire(UInt(smsParams.pf_filter_size.W))
624  val s1_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W))
625
626  // s0: entries lookup
627  val s0_gen_req = io.gen_req.bits
628  val s0_gen_req_valid = io.gen_req.valid
629  val s0_match_vec = valids.indices.map(i => {
630    valids(i) && entries(i).region_tag === s0_gen_req.region_tag && !(s1_valid && s1_replace_vec(i))
631  })
632  val s0_any_matched = Cat(s0_match_vec).orR
633  val s0_replace_vec = UIntToOH(replacement.way)
634  val s0_hit = s0_gen_req_valid && s0_any_matched
635
636  for(((v, ent), i) <- valids.zip(entries).zipWithIndex){
637    val is_evicted = s1_valid && s1_replace_vec(i)
638    tlb_req_arb.io.in(i).valid := v && !s1_tlb_fire_vec(i) && !ent.paddr_valid && !is_evicted
639    tlb_req_arb.io.in(i).bits.vaddr := Cat(ent.region_addr, 0.U(log2Up(REGION_SIZE).W))
640    tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read
641    tlb_req_arb.io.in(i).bits.size := 3.U
642    tlb_req_arb.io.in(i).bits.robIdx := DontCare
643    tlb_req_arb.io.in(i).bits.debug := DontCare
644
645    val pending_req_vec = ent.region_bits & (~ent.filter_bits).asUInt
646    val first_one_offset = PriorityMux(
647      pending_req_vec.asBools,
648      (0 until smsParams.filter_table_size).map(_.U(REGION_OFFSET.W))
649    )
650    val last_one_offset = PriorityMux(
651      pending_req_vec.asBools.reverse,
652      (0 until smsParams.filter_table_size).reverse.map(_.U(REGION_OFFSET.W))
653    )
654    val pf_addr = Cat(
655      ent.region_addr,
656      Mux(ent.decr_mode, last_one_offset, first_one_offset),
657      0.U(log2Up(dcacheParameters.blockBytes).W)
658    )
659    pf_req_arb.io.in(i).valid := v && Cat(pending_req_vec).orR && ent.paddr_valid && !is_evicted
660    pf_req_arb.io.in(i).bits := pf_addr
661  }
662
663  val s0_tlb_fire_vec = VecInit(tlb_req_arb.io.in.map(_.fire))
664  val s0_pf_fire_vec = VecInit(pf_req_arb.io.in.map(_.fire))
665
666  // s1: update or alloc
667  val s1_valid_r = RegNext(s0_gen_req_valid, false.B)
668  val s1_hit = RegEnable(s0_hit, s0_gen_req_valid)
669  val s1_gen_req = RegEnable(s0_gen_req, s0_gen_req_valid)
670  val s1_replace_vec_r = RegEnable(s0_replace_vec, s0_gen_req_valid && !s0_hit)
671  val s1_update_vec = RegEnable(VecInit(s0_match_vec).asUInt, s0_gen_req_valid && s0_hit)
672  val s1_tlb_fire_vec_r = RegNext(s0_tlb_fire_vec, 0.U.asTypeOf(s0_tlb_fire_vec))
673  val s1_alloc_entry = Wire(new PrefetchFilterEntry())
674  s1_valid := s1_valid_r
675  s1_replace_vec := s1_replace_vec_r
676  s1_tlb_fire_vec := s1_tlb_fire_vec_r.asUInt
677  s1_alloc_entry.region_tag := s1_gen_req.region_tag
678  s1_alloc_entry.region_addr := s1_gen_req.region_addr
679  s1_alloc_entry.region_bits := s1_gen_req.region_bits
680  s1_alloc_entry.paddr_valid := s1_gen_req.paddr_valid
681  s1_alloc_entry.decr_mode := s1_gen_req.decr_mode
682  s1_alloc_entry.filter_bits := 0.U
683  for(((v, ent), i) <- valids.zip(entries).zipWithIndex){
684    val alloc = s1_valid && !s1_hit && s1_replace_vec(i)
685    val update = s1_valid && s1_hit && s1_update_vec(i)
686    // for pf: use s0 data
687    val pf_fired = s0_pf_fire_vec(i)
688    val tlb_fired = s1_tlb_fire_vec(i)
689    when(tlb_fired){
690      ent.paddr_valid := !io.tlb_req.resp.bits.miss
691      ent.region_addr := region_addr(io.tlb_req.resp.bits.paddr.head)
692    }
693    when(update){
694      ent.region_bits := ent.region_bits | s1_gen_req.region_bits
695    }
696    when(pf_fired){
697      val curr_bit = UIntToOH(block_addr(pf_req_arb.io.in(i).bits)(REGION_OFFSET - 1, 0))
698      ent.filter_bits := ent.filter_bits | curr_bit
699    }
700    when(alloc){
701      ent := s1_alloc_entry
702      v := true.B
703    }
704  }
705  val s1_access_mask = Mux(s1_hit, s1_update_vec, s1_replace_vec)
706  val s1_access_way = OHToUInt(s1_access_mask.asUInt)
707  when(s1_valid){
708    replacement.access(s1_access_way)
709  }
710
711  XSPerfAccumulate("sms_pf_filter_recv_req", io.gen_req.valid)
712  XSPerfAccumulate("sms_pf_filter_hit", s1_valid && s1_hit)
713  XSPerfAccumulate("sms_pf_filter_tlb_req", io.tlb_req.req.fire)
714  XSPerfAccumulate("sms_pf_filter_tlb_resp_miss", io.tlb_req.resp.fire && io.tlb_req.resp.bits.miss)
715  for(i <- 0 until smsParams.pf_filter_size){
716    XSPerfAccumulate(s"sms_pf_filter_access_way_$i", s1_valid && s1_access_way === i.U)
717  }
718  XSPerfAccumulate("sms_pf_filter_l2_req", io.l2_pf_addr.valid)
719}
720
721class SMSPrefetcher()(implicit p: Parameters) extends BasePrefecher with HasSMSModuleHelper {
722
723  require(exuParameters.LduCnt == 2)
724
725  val ld_curr = io.ld_in.map(_.bits)
726  val ld_curr_block_tag = ld_curr.map(x => block_hash_tag(x.vaddr))
727
728  // block filter
729  val ld_prev = io.ld_in.map(ld => RegEnable(ld.bits, ld.valid))
730  val ld_prev_block_tag = ld_curr_block_tag.zip(io.ld_in.map(_.valid)).map({
731    case (tag, v) => RegEnable(tag, v)
732  })
733  val ld_prev_vld = io.ld_in.map(ld => RegNext(ld.valid, false.B))
734
735  val ld_curr_match_prev = ld_curr_block_tag.map(cur_tag =>
736    Cat(ld_prev_block_tag.zip(ld_prev_vld).map({
737      case (prev_tag, prev_vld) => prev_vld && prev_tag === cur_tag
738    })).orR
739  )
740  val ld0_match_ld1 = io.ld_in.head.valid && io.ld_in.last.valid && ld_curr_block_tag.head === ld_curr_block_tag.last
741  val ld_curr_vld = Seq(
742    io.ld_in.head.valid && !ld_curr_match_prev.head,
743    io.ld_in.last.valid && !ld_curr_match_prev.last && !ld0_match_ld1
744  )
745  val ld0_older_than_ld1 = Cat(ld_curr_vld).andR && isBefore(ld_curr.head.uop.robIdx, ld_curr.last.uop.robIdx)
746  val pending_vld = RegNext(Cat(ld_curr_vld).andR, false.B)
747  val pending_sel_ld0 = RegNext(Mux(pending_vld, ld0_older_than_ld1, !ld0_older_than_ld1))
748  val pending_ld = Mux(pending_sel_ld0, ld_prev.head, ld_prev.last)
749  val pending_ld_block_tag = Mux(pending_sel_ld0, ld_prev_block_tag.head, ld_prev_block_tag.last)
750
751  // prepare training data
752  val train_ld = RegEnable(
753    Mux(pending_vld, pending_ld, Mux(ld0_older_than_ld1 || !ld_curr_vld.last, ld_curr.head, ld_curr.last)),
754    pending_vld || Cat(ld_curr_vld).orR
755  )
756
757  val train_block_tag = RegEnable(
758    Mux(pending_vld, pending_ld_block_tag,
759      Mux(ld0_older_than_ld1 || !ld_curr_vld.last, ld_curr_block_tag.head, ld_curr_block_tag.last)
760    ),
761    pending_vld || Cat(ld_curr_vld).orR
762  )
763  val train_region_tag = train_block_tag.head(REGION_TAG_WIDTH)
764
765  val train_region_addr_raw = region_addr(train_ld.vaddr)(REGION_TAG_WIDTH + 2 * VADDR_HASH_WIDTH - 1, 0)
766  val train_region_addr_p1 = Cat(0.U(1.W), train_region_addr_raw) + 1.U
767  val train_region_addr_m1 = Cat(0.U(1.W), train_region_addr_raw) - 1.U
768  // addr_p1 or addr_m1 is valid?
769  val train_allow_cross_region_p1 = !train_region_addr_p1.head(1).asBool
770  val train_allow_cross_region_m1 = !train_region_addr_m1.head(1).asBool
771
772  val train_region_p1_tag = region_hash_tag(train_region_addr_p1.tail(1))
773  val train_region_m1_tag = region_hash_tag(train_region_addr_m1.tail(1))
774
775  val train_region_p1_cross_page = page_bit(train_region_addr_p1) ^ page_bit(train_region_addr_raw)
776  val train_region_m1_cross_page = page_bit(train_region_addr_m1) ^ page_bit(train_region_addr_raw)
777
778  val train_region_paddr = region_addr(train_ld.paddr)
779  val train_region_vaddr = region_addr(train_ld.vaddr)
780  val train_region_offset = train_block_tag(REGION_OFFSET - 1, 0)
781  val train_vld = RegNext(pending_vld || Cat(ld_curr_vld).orR, false.B)
782
783
784  // prefetch stage0
785  val filter_table = Module(new FilterTable())
786  val active_gen_table = Module(new ActiveGenerationTable())
787  val pht = Module(new PatternHistoryTable())
788  val pf_filter = Module(new PrefetchFilter())
789
790  val train_vld_s0 = RegNext(train_vld, false.B)
791  val train_s0 = RegEnable(train_ld, train_vld)
792  val train_region_tag_s0 = RegEnable(train_region_tag, train_vld)
793  val train_region_p1_tag_s0 = RegEnable(train_region_p1_tag, train_vld)
794  val train_region_m1_tag_s0 = RegEnable(train_region_m1_tag, train_vld)
795  val train_allow_cross_region_p1_s0 = RegEnable(train_allow_cross_region_p1, train_vld)
796  val train_allow_cross_region_m1_s0 = RegEnable(train_allow_cross_region_m1, train_vld)
797  val train_pht_tag_s0 = RegEnable(pht_tag(train_ld.uop.cf.pc), train_vld)
798  val train_pht_index_s0 = RegEnable(pht_index(train_ld.uop.cf.pc), train_vld)
799  val train_region_offset_s0 = RegEnable(train_region_offset, train_vld)
800  val train_region_p1_cross_page_s0 = RegEnable(train_region_p1_cross_page, train_vld)
801  val train_region_m1_cross_page_s0 = RegEnable(train_region_m1_cross_page, train_vld)
802  val train_region_paddr_s0 = RegEnable(train_region_paddr, train_vld)
803  val train_region_vaddr_s0 = RegEnable(train_region_vaddr, train_vld)
804
805  filter_table.io.s0_lookup.valid := train_vld_s0
806  filter_table.io.s0_lookup.bits.pht_tag := train_pht_tag_s0
807  filter_table.io.s0_lookup.bits.pht_index := train_pht_index_s0
808  filter_table.io.s0_lookup.bits.region_tag := train_region_tag_s0
809  filter_table.io.s0_lookup.bits.offset := train_region_offset_s0
810  filter_table.io.s1_update := !active_gen_table.io.s1_match_or_alloc
811
812  active_gen_table.io.s0_lookup.valid := train_vld_s0
813  active_gen_table.io.s0_lookup.bits.region_tag := train_region_tag_s0
814  active_gen_table.io.s0_lookup.bits.region_p1_tag := train_region_p1_tag_s0
815  active_gen_table.io.s0_lookup.bits.region_m1_tag := train_region_m1_tag_s0
816  active_gen_table.io.s0_lookup.bits.region_offset := train_region_offset_s0
817  active_gen_table.io.s0_lookup.bits.pht_index := train_pht_index_s0
818  active_gen_table.io.s0_lookup.bits.pht_tag := train_pht_tag_s0
819  active_gen_table.io.s0_lookup.bits.allow_cross_region_p1 := train_allow_cross_region_p1_s0
820  active_gen_table.io.s0_lookup.bits.allow_cross_region_m1 := train_allow_cross_region_m1_s0
821  active_gen_table.io.s0_lookup.bits.region_p1_cross_page := train_region_p1_cross_page_s0
822  active_gen_table.io.s0_lookup.bits.region_m1_cross_page := train_region_m1_cross_page_s0
823  active_gen_table.io.s0_lookup.bits.region_paddr := train_region_paddr_s0
824  active_gen_table.io.s0_lookup.bits.region_vaddr := train_region_vaddr_s0
825
826  val train_region_offset_s1 = RegEnable(train_region_offset_s0, train_vld_s0)
827  val agt_region_bits_s1 = region_offset_to_bits(train_region_offset_s1) |
828    region_offset_to_bits(filter_table.io.s1_result.bits.offset)
829
830  active_gen_table.io.s1_recv_entry.valid := filter_table.io.s1_result.valid
831  active_gen_table.io.s1_recv_entry.bits.pht_index := filter_table.io.s1_result.bits.pht_index
832  active_gen_table.io.s1_recv_entry.bits.pht_tag := filter_table.io.s1_result.bits.pht_tag
833  active_gen_table.io.s1_recv_entry.bits.region_bits := agt_region_bits_s1
834  active_gen_table.io.s1_recv_entry.bits.region_tag := filter_table.io.s1_result.bits.region_tag
835  active_gen_table.io.s1_recv_entry.bits.access_cnt := 2.U
836  active_gen_table.io.s1_recv_entry.bits.decr_mode := false.B
837
838  pht.io.s1_agt_lookup := active_gen_table.io.s1_pht_lookup
839  pht.io.agt_update := active_gen_table.io.s2_evict
840
841  val pf_gen_req = Mux(pht.io.pf_gen_req.valid,
842    pht.io.pf_gen_req.bits,
843    active_gen_table.io.s2_pf_gen_req.bits
844  )
845
846  pf_filter.io.gen_req.valid := pht.io.pf_gen_req.valid ||
847    active_gen_table.io.s2_pf_gen_req.valid
848  pf_filter.io.gen_req.bits := pf_gen_req
849  io.tlb_req <> pf_filter.io.tlb_req
850  io.pf_addr := pf_filter.io.l2_pf_addr
851
852  XSPerfAccumulate("sms_pf_gen_conflict",
853    pht.io.pf_gen_req.valid && active_gen_table.io.s2_pf_gen_req.valid
854  )
855}
856