xref: /XiangShan/src/main/scala/xiangshan/mem/prefetch/L1PrefetchComponent.scala (revision b03c55a5df5dc8793cb44b42dd60141566e57e78)
1package xiangshan.mem.prefetch
2
3import org.chipsalliance.cde.config.Parameters
4import freechips.rocketchip.util.ValidPseudoLRU
5import chisel3._
6import chisel3.util._
7import xiangshan._
8import utils._
9import utility._
10import xiangshan.cache.HasDCacheParameters
11import xiangshan.cache.mmu._
12import xiangshan.mem.{L1PrefetchReq, LdPrefetchTrainBundle}
13import xiangshan.mem.trace._
14import xiangshan.mem.L1PrefetchSource
15
16trait HasL1PrefetchHelper extends HasCircularQueuePtrHelper with HasDCacheParameters {
17  // region related
18  val REGION_SIZE = 1024
19  val PAGE_OFFSET = 12
20  val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes)
21  val BIT_VEC_WITDH = REGION_SIZE / dcacheParameters.blockBytes
22  val REGION_BITS = log2Up(BIT_VEC_WITDH)
23  val REGION_TAG_OFFSET = BLOCK_OFFSET + REGION_BITS
24  val REGION_TAG_BITS = VAddrBits - BLOCK_OFFSET - REGION_BITS
25
26  // hash related
27  val VADDR_HASH_WIDTH = 5
28  val BLK_ADDR_RAW_WIDTH = 10
29  val HASH_TAG_WIDTH = VADDR_HASH_WIDTH + BLK_ADDR_RAW_WIDTH
30
31  // capacity related
32  val MLP_SIZE = 32
33  val MLP_L1_SIZE = 16
34  val MLP_L2L3_SIZE = MLP_SIZE - MLP_L1_SIZE
35
36  // prefetch sink related
37  val SINK_BITS = 2
38  def SINK_L1 = "b00".U
39  def SINK_L2 = "b01".U
40  def SINK_L3 = "b10".U
41
42  // vaddr: |       region tag        |  region bits  | block offset |
43  def get_region_tag(vaddr: UInt) = {
44    require(vaddr.getWidth == VAddrBits)
45    vaddr(vaddr.getWidth - 1, REGION_TAG_OFFSET)
46  }
47
48  def get_region_bits(vaddr: UInt) = {
49    require(vaddr.getWidth == VAddrBits)
50    vaddr(REGION_TAG_OFFSET - 1, BLOCK_OFFSET)
51  }
52
53  def block_addr(x: UInt): UInt = {
54    x(x.getWidth - 1, BLOCK_OFFSET)
55  }
56
57  def vaddr_hash(x: UInt): UInt = {
58    val width = VADDR_HASH_WIDTH
59    val low = x(width - 1, 0)
60    val mid = x(2 * width - 1, width)
61    val high = x(3 * width - 1, 2 * width)
62    low ^ mid ^ high
63  }
64
65  def pc_hash_tag(x: UInt): UInt = {
66    val low = x(BLK_ADDR_RAW_WIDTH - 1, 0)
67    val high = x(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
68    val high_hash = vaddr_hash(high)
69    Cat(high_hash, low)
70  }
71
72  def block_hash_tag(x: UInt): UInt = {
73    val blk_addr = block_addr(x)
74    val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0)
75    val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
76    val high_hash = vaddr_hash(high)
77    Cat(high_hash, low)
78  }
79
80  def region_hash_tag(region_tag: UInt): UInt = {
81    val low = region_tag(BLK_ADDR_RAW_WIDTH - 1, 0)
82    val high = region_tag(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH)
83    val high_hash = vaddr_hash(high)
84    Cat(high_hash, low)
85  }
86
87  def region_to_block_addr(region_tag: UInt, region_bits: UInt): UInt = {
88    Cat(region_tag, region_bits)
89  }
90
91  def get_candidate_oh(x: UInt): UInt = {
92    require(x.getWidth == PAddrBits)
93    UIntToOH(x(REGION_BITS + BLOCK_OFFSET - 1, BLOCK_OFFSET))
94  }
95
96  def toBinary(n: Int): String = n match {
97    case 0|1 => s"$n"
98    case _   => s"${toBinary(n/2)}${n%2}"
99  }
100}
101
102trait HasTrainFilterHelper extends HasCircularQueuePtrHelper {
103  def reorder[T <: LdPrefetchTrainBundle](source: Vec[ValidIO[T]]): Vec[ValidIO[T]] = {
104    if(source.length == 1) {
105      source
106    }else if(source.length == 2) {
107      val source_v = source.map(_.valid)
108      val res = Wire(source.cloneType)
109      // source 1 is older than source 0 (only when source0/1 are both valid)
110      val source_1_older = Mux(Cat(source_v).andR,
111        isBefore(source(1).bits.uop.robIdx, source(0).bits.uop.robIdx),
112        false.B
113      )
114      when(source_1_older) {
115        res(0) := source(1)
116        res(1) := source(0)
117      }.otherwise {
118        res := source
119      }
120
121      res
122    }else if(source.length == 3) {
123      // TODO: generalize
124      val res_0_1 = Reg(source.cloneType)
125      val res_1_2 = Reg(source.cloneType)
126      val res = Reg(source.cloneType)
127
128      val tmp = reorder(VecInit(source.slice(0, 2)))
129      res_0_1(0) := tmp(0)
130      res_0_1(1) := tmp(1)
131      res_0_1(2) := source(2)
132      val tmp_1 = reorder(VecInit(res_0_1.slice(1, 3)))
133      res_1_2(0) := res_0_1(0)
134      res_1_2(1) := tmp_1(0)
135      res_1_2(2) := tmp_1(1)
136      val tmp_2 = reorder(VecInit(res_1_2.slice(0, 2)))
137      res(0) := tmp_2(0)
138      res(1) := tmp_2(1)
139      res(2) := res_1_2(2)
140
141      res
142    }else {
143      require(false, "for now, 4 or more sources are invalid")
144      source
145    }
146  }
147}
148
149// get prefetch train reqs from `exuParameters.LduCnt` load pipelines (up to `exuParameters.LduCnt`/cycle)
150// filter by cache line address, send out train req to stride (up to 1 req/cycle)
151class TrainFilter(size: Int, name: String)(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper with HasTrainFilterHelper {
152  val io = IO(new Bundle() {
153    val enable = Input(Bool())
154    val flush = Input(Bool())
155    // train input, only from load for now
156    val ld_in = Flipped(Vec(backendParams.LduCnt, ValidIO(new LdPrefetchTrainBundle())))
157    // filter out
158    val train_req = DecoupledIO(new PrefetchReqBundle())
159  })
160
161  class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr]( p => size ){}
162  object Ptr {
163    def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = {
164      val ptr = Wire(new Ptr)
165      ptr.flag := f
166      ptr.value := v
167      ptr
168    }
169  }
170
171  val entries = Reg(Vec(size, new PrefetchReqBundle))
172  val valids = RegInit(VecInit(Seq.fill(size){ (false.B) }))
173
174  // enq
175  val enqLen = backendParams.LduCnt
176  val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr))))
177  val deqPtrExt = RegInit(0.U.asTypeOf(new Ptr))
178
179  val deqPtr = WireInit(deqPtrExt.value)
180
181  require(size >= enqLen)
182
183  val ld_in_reordered = reorder(io.ld_in)
184  val reqs_l = ld_in_reordered.map(_.bits.asPrefetchReqBundle())
185  val reqs_vl = ld_in_reordered.map(_.valid)
186  val needAlloc = Wire(Vec(enqLen, Bool()))
187  val canAlloc = Wire(Vec(enqLen, Bool()))
188
189  for(i <- (0 until enqLen)) {
190    val req = reqs_l(i)
191    val req_v = reqs_vl(i)
192    val index = PopCount(needAlloc.take(i))
193    val allocPtr = enqPtrExt(index)
194    val entry_match = Cat(entries.zip(valids).map {
195      case(e, v) => v && block_hash_tag(e.vaddr) === block_hash_tag(req.vaddr)
196    }).orR
197    val prev_enq_match = if(i == 0) false.B else Cat(reqs_l.zip(reqs_vl).take(i).map {
198      case(pre, pre_v) => pre_v && block_hash_tag(pre.vaddr) === block_hash_tag(req.vaddr)
199    }).orR
200
201    needAlloc(i) := req_v && !entry_match && !prev_enq_match
202    canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt && io.enable
203
204    when(canAlloc(i)) {
205      valids(allocPtr.value) := true.B
206      entries(allocPtr.value) := req
207    }
208  }
209  val allocNum = PopCount(canAlloc)
210
211  enqPtrExt.foreach{case x => when(canAlloc.asUInt.orR) {x := x + allocNum} }
212
213  // deq
214  io.train_req.valid := false.B
215  io.train_req.bits := DontCare
216  valids.zip(entries).zipWithIndex.foreach {
217    case((valid, entry), i) => {
218      when(deqPtr === i.U) {
219        io.train_req.valid := valid && io.enable
220        io.train_req.bits := entry
221      }
222    }
223  }
224
225  when(io.train_req.fire) {
226    valids(deqPtr) := false.B
227    deqPtrExt := deqPtrExt + 1.U
228  }
229
230  when(RegNext(io.flush)) {
231    valids.foreach {case valid => valid := false.B}
232    (0 until enqLen).map {case i => enqPtrExt(i) := i.U.asTypeOf(new Ptr)}
233    deqPtrExt := 0.U.asTypeOf(new Ptr)
234  }
235
236  XSPerfAccumulate(s"${name}_train_filter_full", PopCount(valids) === size.U)
237  XSPerfAccumulate(s"${name}_train_filter_half", PopCount(valids) >= (size / 2).U)
238  XSPerfAccumulate(s"${name}_train_filter_empty", PopCount(valids) === 0.U)
239
240  val raw_enq_pattern = Cat(reqs_vl)
241  val filtered_enq_pattern = Cat(needAlloc)
242  val actual_enq_pattern = Cat(canAlloc)
243  XSPerfAccumulate(s"${name}_train_filter_enq", allocNum > 0.U)
244  XSPerfAccumulate(s"${name}_train_filter_deq", io.train_req.fire)
245  for(i <- 0 until (1 << enqLen)) {
246    XSPerfAccumulate(s"${name}_train_filter_raw_enq_pattern_${toBinary(i)}", raw_enq_pattern === i.U)
247    XSPerfAccumulate(s"${name}_train_filter_filtered_enq_pattern_${toBinary(i)}", filtered_enq_pattern === i.U)
248    XSPerfAccumulate(s"${name}_train_filter_actual_enq_pattern_${toBinary(i)}", actual_enq_pattern === i.U)
249  }
250}
251
252class MLPReqFilterBundle(implicit p: Parameters) extends XSBundle with HasL1PrefetchHelper {
253  val tag = UInt(HASH_TAG_WIDTH.W)
254  val region = UInt(REGION_TAG_BITS.W)
255  val bit_vec = UInt(BIT_VEC_WITDH.W)
256  // NOTE: l1 will not use sent_vec, for making more prefetch reqs to l1 dcache
257  val sent_vec = UInt(BIT_VEC_WITDH.W)
258  val sink = UInt(SINK_BITS.W)
259  val alias = UInt(2.W)
260  val is_vaddr = Bool()
261  val source = new L1PrefetchSource()
262  val debug_va_region = UInt(REGION_TAG_BITS.W)
263
264  def reset(index: Int) = {
265    tag := region_hash_tag(index.U)
266    region := index.U
267    bit_vec := 0.U
268    sent_vec := 0.U
269    sink := SINK_L1
270    alias := 0.U
271    is_vaddr := false.B
272    source.value := L1_HW_PREFETCH_NULL
273    debug_va_region := 0.U
274  }
275
276  def tag_match(valid1: Bool, valid2: Bool, new_tag: UInt): Bool = {
277    require(new_tag.getWidth == HASH_TAG_WIDTH)
278    (tag === new_tag) && valid1 && valid2
279  }
280
281  def update(update_bit_vec: UInt, update_sink: UInt) = {
282    bit_vec := bit_vec | update_bit_vec
283    when(update_sink < sink) {
284      bit_vec := (bit_vec & ~sent_vec) | update_bit_vec
285      sink := update_sink
286    }
287
288    assert(PopCount(update_bit_vec) >= 1.U, "valid bits in update vector should greater than one")
289  }
290
291  def can_send_pf(valid: Bool): Bool = {
292    Mux(
293      sink === SINK_L1,
294      !is_vaddr && bit_vec.orR,
295      !is_vaddr && (bit_vec & ~sent_vec).orR
296    ) && valid
297  }
298
299  def may_be_replace(valid: Bool): Bool = {
300    // either invalid or has sent out all reqs out
301    !valid || RegNext(PopCount(sent_vec) === BIT_VEC_WITDH.U)
302  }
303
304  def get_pf_addr(): UInt = {
305    require(PAddrBits <= VAddrBits)
306    require((region.getWidth + REGION_BITS + BLOCK_OFFSET) == VAddrBits)
307
308    val candidate = Mux(
309      sink === SINK_L1,
310      PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)),
311      PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W))
312    )
313    Cat(region, candidate, 0.U(BLOCK_OFFSET.W))
314  }
315
316  def get_pf_debug_vaddr(): UInt = {
317    val candidate = Mux(
318      sink === SINK_L1,
319      PriorityEncoder(bit_vec).asTypeOf(UInt(REGION_BITS.W)),
320      PriorityEncoder(bit_vec & ~sent_vec).asTypeOf(UInt(REGION_BITS.W))
321    )
322    Cat(debug_va_region, candidate, 0.U(BLOCK_OFFSET.W))
323  }
324
325  def get_tlb_va(): UInt = {
326    require((region.getWidth + REGION_TAG_OFFSET) == VAddrBits)
327    Cat(region, 0.U(REGION_TAG_OFFSET.W))
328  }
329
330  def fromStreamPrefetchReqBundle(x : StreamPrefetchReqBundle): MLPReqFilterBundle = {
331    require(PAGE_OFFSET >= REGION_TAG_OFFSET, "region is greater than 4k, alias bit may be incorrect")
332
333    val res = Wire(new MLPReqFilterBundle)
334    res.tag := region_hash_tag(x.region)
335    res.region := x.region
336    res.bit_vec := x.bit_vec
337    res.sent_vec := 0.U
338    res.sink := x.sink
339    res.is_vaddr := true.B
340    res.source := x.source
341    res.alias := x.region(PAGE_OFFSET - REGION_TAG_OFFSET + 1, PAGE_OFFSET - REGION_TAG_OFFSET)
342    res.debug_va_region := x.region
343
344    res
345  }
346
347  def invalidate() = {
348    // disable sending pf req
349    when(sink === SINK_L1) {
350      bit_vec := 0.U(BIT_VEC_WITDH.W)
351    }.otherwise {
352      sent_vec := ~(0.U(BIT_VEC_WITDH.W))
353    }
354    // disable sending tlb req
355    is_vaddr := false.B
356  }
357}
358
359// there are 5 independent pipelines inside
360// 1. prefetch enqueue
361// 2. tlb request
362// 3. actual l1 prefetch
363// 4. actual l2 prefetch
364// 5. actual l3 prefetch
365class MutiLevelPrefetchFilter(implicit p: Parameters) extends XSModule with HasL1PrefetchHelper {
366  val io = IO(new XSBundle {
367    val enable = Input(Bool())
368    val flush = Input(Bool())
369    val l1_prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle))
370    val l2_l3_prefetch_req = Flipped(ValidIO(new StreamPrefetchReqBundle))
371    val tlb_req = new TlbRequestIO(nRespDups = 2)
372    val l1_req = DecoupledIO(new L1PrefetchReq())
373    val l2_pf_addr = ValidIO(new L2PrefetchReq())
374    val l3_pf_addr = ValidIO(UInt(PAddrBits.W)) // TODO: l3 pf source
375    val confidence = Input(UInt(1.W))
376    val l2PfqBusy = Input(Bool())
377  })
378
379  val l1_array = Reg(Vec(MLP_L1_SIZE, new MLPReqFilterBundle))
380  val l2_array = Reg(Vec(MLP_L2L3_SIZE, new MLPReqFilterBundle))
381  val l1_valids = RegInit(VecInit(Seq.fill(MLP_L1_SIZE)(false.B)))
382  val l2_valids = RegInit(VecInit(Seq.fill(MLP_L2L3_SIZE)(false.B)))
383
384  def _invalid(e: MLPReqFilterBundle, v: Bool): Unit = {
385    v := false.B
386    e.invalidate()
387  }
388
389  def invalid_array(i: UInt, isL2: Boolean): Unit = {
390    if (isL2) {
391      _invalid(l2_array(i), l2_valids(i))
392    } else {
393      _invalid(l1_array(i), l1_valids(i))
394    }
395  }
396
397  def _reset(e: MLPReqFilterBundle, v: Bool, idx: Int): Unit = {
398    v := false.B
399    //only need to reset control signals for firendly area
400    // e.reset(idx)
401  }
402
403
404  def reset_array(i: Int, isL2: Boolean): Unit = {
405    if(isL2){
406      _reset(l2_array(i), l2_valids(i), i)
407    }else{
408      _reset(l1_array(i), l1_valids(i), i)
409    }
410  }
411
412  val l1_replacement = new ValidPseudoLRU(MLP_L1_SIZE)
413  val l2_replacement = new ValidPseudoLRU(MLP_L2L3_SIZE)
414  val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, MLP_SIZE))
415  val l1_pf_req_arb = Module(new RRArbiterInit(new Bundle {
416    val req = new L1PrefetchReq
417    val debug_vaddr = UInt(VAddrBits.W)
418  }, MLP_L1_SIZE))
419  val l2_pf_req_arb = Module(new RRArbiterInit(new Bundle {
420    val req = new L2PrefetchReq
421    val debug_vaddr = UInt(VAddrBits.W)
422  }, MLP_L2L3_SIZE))
423  val l3_pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), MLP_L2L3_SIZE))
424
425  val l1_opt_replace_vec = VecInit(l1_array.zip(l1_valids).map{case (e, v) => e.may_be_replace(v)})
426  val l2_opt_replace_vec = VecInit(l2_array.zip(l2_valids).map{case (e, v) => e.may_be_replace(v)})
427  // if we have something to replace, then choose it, otherwise follow the plru manner
428  val l1_real_replace_vec = Mux(Cat(l1_opt_replace_vec).orR, l1_opt_replace_vec, VecInit(Seq.fill(MLP_L1_SIZE)(true.B)))
429  val l2_real_replace_vec = Mux(Cat(l2_opt_replace_vec).orR, l2_opt_replace_vec, VecInit(Seq.fill(MLP_L2L3_SIZE)(true.B)))
430
431  // l1 pf req enq
432  // s0: hash tag match
433  val s0_l1_can_accept = Wire(Bool())
434  val s0_l1_valid = io.l1_prefetch_req.valid && s0_l1_can_accept
435  val s0_l1_region = io.l1_prefetch_req.bits.region
436  val s0_l1_region_hash = region_hash_tag(s0_l1_region)
437  val s0_l1_match_vec = l1_array.zip(l1_valids).map{ case (e, v) => e.tag_match(v, s0_l1_valid, s0_l1_region_hash)}
438  val s0_l1_hit = VecInit(s0_l1_match_vec).asUInt.orR
439  val s0_l1_index = Wire(UInt(log2Up(MLP_L1_SIZE).W))
440  val s0_l1_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.l1_prefetch_req.bits)
441
442  s0_l1_index := Mux(s0_l1_hit, OHToUInt(VecInit(s0_l1_match_vec).asUInt), l1_replacement.way(l1_real_replace_vec.reverse)._2)
443
444  when(s0_l1_valid) {
445    l1_replacement.access(s0_l1_index)
446  }
447
448  assert(!s0_l1_valid || PopCount(VecInit(s0_l1_match_vec)) <= 1.U, "req region should match no more than 1 entry")
449
450  XSPerfAccumulate("s0_l1_enq_fire", s0_l1_valid)
451  XSPerfAccumulate("s0_l1_enq_valid", io.l1_prefetch_req.valid)
452  XSPerfAccumulate("s0_l1_cannot_enq", io.l1_prefetch_req.valid && !s0_l1_can_accept)
453
454  // s1: alloc or update
455  val s1_l1_valid = RegNext(s0_l1_valid)
456  val s1_l1_region = RegEnable(s0_l1_region, s0_l1_valid)
457  val s1_l1_region_hash = RegEnable(s0_l1_region_hash, s0_l1_valid)
458  val s1_l1_hit = RegEnable(s0_l1_hit, s0_l1_valid)
459  val s1_l1_index = RegEnable(s0_l1_index, s0_l1_valid)
460  val s1_l1_prefetch_req = RegEnable(s0_l1_prefetch_req, s0_l1_valid)
461  val s1_l1_alloc = s1_l1_valid && !s1_l1_hit
462  val s1_l1_update = s1_l1_valid && s1_l1_hit
463  s0_l1_can_accept := !(s1_l1_valid && s1_l1_alloc && (s0_l1_region_hash === s1_l1_region_hash))
464
465  when(s1_l1_alloc) {
466    l1_valids(s1_l1_index) := true.B
467    l1_array(s1_l1_index) := s1_l1_prefetch_req
468  }.elsewhen(s1_l1_update) {
469    l1_array(s1_l1_index).update(
470      update_bit_vec = s1_l1_prefetch_req.bit_vec,
471      update_sink = s1_l1_prefetch_req.sink
472    )
473  }
474
475  XSPerfAccumulate("s1_l1_enq_valid", s1_l1_valid)
476  XSPerfAccumulate("s1_l1_enq_alloc", s1_l1_alloc)
477  XSPerfAccumulate("s1_l1_enq_update", s1_l1_update)
478  XSPerfAccumulate("l1_hash_conflict", s0_l1_valid && RegNext(s1_l1_valid) && (s0_l1_region =/= RegNext(s1_l1_region)) && (s0_l1_region_hash === RegNext(s1_l1_region_hash)))
479  XSPerfAccumulate("s1_l1_enq_evict_useful_entry", s1_l1_alloc && l1_array(s1_l1_index).can_send_pf(l1_valids(s1_l1_index)))
480
481  // l2 l3 pf req enq
482  // s0: hash tag match
483  val s0_l2_can_accept = Wire(Bool())
484  val s0_l2_valid = io.l2_l3_prefetch_req.valid && s0_l2_can_accept
485  val s0_l2_region = io.l2_l3_prefetch_req.bits.region
486  val s0_l2_region_hash = region_hash_tag(s0_l2_region)
487  val s0_l2_match_vec = l2_array.zip(l2_valids).map{ case (e, v) => e.tag_match(v, s0_l2_valid, s0_l2_region_hash) }
488  val s0_l2_hit = VecInit(s0_l2_match_vec).asUInt.orR
489  val s0_l2_index = Wire(UInt(log2Up(MLP_L2L3_SIZE).W))
490  val s0_l2_prefetch_req = (new MLPReqFilterBundle).fromStreamPrefetchReqBundle(io.l2_l3_prefetch_req.bits)
491
492  s0_l2_index := Mux(s0_l2_hit, OHToUInt(VecInit(s0_l2_match_vec).asUInt), l2_replacement.way(l2_real_replace_vec.reverse)._2)
493
494  when(s0_l2_valid) {
495    l2_replacement.access(s0_l2_index)
496  }
497
498  assert(!s0_l2_valid || PopCount(VecInit(s0_l2_match_vec)) <= 1.U, "req region should match no more than 1 entry")
499
500  XSPerfAccumulate("s0_l2_enq_fire", s0_l2_valid)
501  XSPerfAccumulate("s0_l2_enq_valid", io.l2_l3_prefetch_req.valid)
502  XSPerfAccumulate("s0_l2_cannot_enq", io.l2_l3_prefetch_req.valid && !s0_l2_can_accept)
503
504  // s1: alloc or update
505  val s1_l2_valid = RegNext(s0_l2_valid)
506  val s1_l2_region = RegEnable(s0_l2_region, s0_l2_valid)
507  val s1_l2_region_hash = RegEnable(s0_l2_region_hash, s0_l2_valid)
508  val s1_l2_hit = RegEnable(s0_l2_hit, s0_l2_valid)
509  val s1_l2_index = RegEnable(s0_l2_index, s0_l2_valid)
510  val s1_l2_prefetch_req = RegEnable(s0_l2_prefetch_req, s0_l2_valid)
511  val s1_l2_alloc = s1_l2_valid && !s1_l2_hit
512  val s1_l2_update = s1_l2_valid && s1_l2_hit
513  s0_l2_can_accept := !(s1_l2_valid && s1_l2_alloc && (s0_l2_region_hash === s1_l2_region_hash))
514
515  when(s1_l2_alloc) {
516    l2_valids(s1_l2_index) := true.B
517    l2_array(s1_l2_index) := s1_l2_prefetch_req
518  }.elsewhen(s1_l2_update) {
519    l2_array(s1_l2_index).update(
520      update_bit_vec = s1_l2_prefetch_req.bit_vec,
521      update_sink = s1_l2_prefetch_req.sink
522    )
523  }
524
525  XSPerfAccumulate("s1_l2_enq_valid", s1_l2_valid)
526  XSPerfAccumulate("s1_l2_enq_alloc", s1_l2_alloc)
527  XSPerfAccumulate("s1_l2_enq_update", s1_l2_update)
528  XSPerfAccumulate("l2_hash_conflict", s0_l2_valid && RegNext(s1_l2_valid) && (s0_l2_region =/= RegNext(s1_l2_region)) && (s0_l2_region_hash === RegNext(s1_l2_region_hash)))
529  XSPerfAccumulate("s1_l2_enq_evict_useful_entry", s1_l2_alloc && l2_array(s1_l2_index).can_send_pf(l2_valids(s1_l2_index)))
530
531  // stream pf debug db here
532  // Hit:
533  // now seens only pending = (region_bits & ~filter_bits) are the peeding request
534  // if a PfGen comes, new added request can be new_req = PfGen.region_bits & ~(pending)
535  // Alloc:
536  // new_req = PfGen.region_bits
537  val stream_pf_trace_debug_table = ChiselDB.createTable("StreamPFTrace" + p(XSCoreParamsKey).HartId.toString, new StreamPFTraceInEntry, basicDB = false)
538  for (i <- 0 until BIT_VEC_WITDH) {
539    // l1 enq log
540    val hit_entry = l1_array(s0_l1_index)
541    val new_req = Mux(
542      s0_l1_hit,
543      io.l1_prefetch_req.bits.bit_vec & ~(hit_entry.bit_vec),
544      io.l1_prefetch_req.bits.bit_vec
545    )
546    val log_enable = s0_l1_valid && new_req(i) && (io.l1_prefetch_req.bits.source.value === L1_HW_PREFETCH_STREAM)
547    val log_data = Wire(new StreamPFTraceInEntry)
548
549    log_data.TriggerPC := io.l1_prefetch_req.bits.trigger_pc
550    log_data.TriggerVaddr := io.l1_prefetch_req.bits.trigger_va
551    log_data.PFVaddr := Cat(s0_l1_region, i.U(REGION_BITS.W), 0.U(log2Up(dcacheParameters.blockBytes).W))
552    log_data.PFSink := s0_l1_prefetch_req.sink
553
554    stream_pf_trace_debug_table.log(
555      data = log_data,
556      en = log_enable,
557      site = "StreamPFTrace",
558      clock = clock,
559      reset = reset
560    )
561  }
562  for (i <- 0 until BIT_VEC_WITDH) {
563    // l2 l3 enq log
564    val hit_entry = l2_array(s0_l2_index)
565    val new_req = Mux(
566      s0_l2_hit,
567      io.l2_l3_prefetch_req.bits.bit_vec & ~(hit_entry.bit_vec),
568      io.l2_l3_prefetch_req.bits.bit_vec
569    )
570    val log_enable = s0_l2_valid && new_req(i) && (io.l2_l3_prefetch_req.bits.source.value === L1_HW_PREFETCH_STREAM)
571    val log_data = Wire(new StreamPFTraceInEntry)
572
573    log_data.TriggerPC := io.l2_l3_prefetch_req.bits.trigger_pc
574    log_data.TriggerVaddr := io.l2_l3_prefetch_req.bits.trigger_va
575    log_data.PFVaddr := Cat(s0_l2_region, i.U(REGION_BITS.W), 0.U(log2Up(dcacheParameters.blockBytes).W))
576    log_data.PFSink := s0_l2_prefetch_req.sink
577
578    stream_pf_trace_debug_table.log(
579      data = log_data,
580      en = log_enable,
581      site = "StreamPFTrace",
582      clock = clock,
583      reset = reset
584    )
585  }
586
587  // tlb req
588  // s0: arb all tlb reqs
589  val s0_tlb_fire_vec = VecInit((0 until MLP_SIZE).map{case i => tlb_req_arb.io.in(i).fire})
590  val s1_tlb_fire_vec = GatedValidRegNext(s0_tlb_fire_vec)
591  val s2_tlb_fire_vec = GatedValidRegNext(s1_tlb_fire_vec)
592
593  for(i <- 0 until MLP_SIZE) {
594    val l1_evict = s1_l1_alloc && (s1_l1_index === i.U)
595    val l2_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === i.U)
596    if(i < MLP_L1_SIZE) {
597      tlb_req_arb.io.in(i).valid := l1_valids(i) && l1_array(i).is_vaddr && !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !l1_evict
598      tlb_req_arb.io.in(i).bits.vaddr := l1_array(i).get_tlb_va()
599    }else {
600      tlb_req_arb.io.in(i).valid := l2_valids(i - MLP_L1_SIZE) && l2_array(i - MLP_L1_SIZE).is_vaddr && !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !l2_evict
601      tlb_req_arb.io.in(i).bits.vaddr := l2_array(i - MLP_L1_SIZE).get_tlb_va()
602    }
603    tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read
604    tlb_req_arb.io.in(i).bits.size := 3.U
605    tlb_req_arb.io.in(i).bits.kill := false.B
606    tlb_req_arb.io.in(i).bits.no_translate := false.B
607    tlb_req_arb.io.in(i).bits.memidx := DontCare
608    tlb_req_arb.io.in(i).bits.debug := DontCare
609    tlb_req_arb.io.in(i).bits.hlvx := DontCare
610    tlb_req_arb.io.in(i).bits.hyperinst := DontCare
611  }
612
613  assert(PopCount(s0_tlb_fire_vec) <= 1.U, "s0_tlb_fire_vec should be one-hot or empty")
614
615  // s1: send out the req
616  val s1_tlb_req_valid = GatedValidRegNext(tlb_req_arb.io.out.valid)
617  val s1_tlb_req_bits = RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.valid)
618  val s1_tlb_req_index = RegEnable(OHToUInt(s0_tlb_fire_vec.asUInt), tlb_req_arb.io.out.valid)
619  val s1_l1_tlb_evict = s1_l1_alloc && (s1_l1_index === s1_tlb_req_index)
620  val s1_l2_tlb_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === s1_tlb_req_index)
621  val s1_tlb_evict = s1_l1_tlb_evict || s1_l2_tlb_evict
622  io.tlb_req.req.valid := s1_tlb_req_valid && !s1_tlb_evict
623  io.tlb_req.req.bits := s1_tlb_req_bits
624  io.tlb_req.req_kill := false.B
625  tlb_req_arb.io.out.ready := true.B
626
627  XSPerfAccumulate("s1_tlb_req_sent", io.tlb_req.req.valid)
628  XSPerfAccumulate("s1_tlb_req_evict", s1_tlb_req_valid && s1_tlb_evict)
629
630  // s2: get response from tlb
631  val s2_tlb_resp = io.tlb_req.resp
632  val s2_tlb_update_index = RegEnable(s1_tlb_req_index, s1_tlb_req_valid)
633  val s2_l1_tlb_evict = s1_l1_alloc && (s1_l1_index === s2_tlb_update_index)
634  val s2_l2_tlb_evict = s1_l2_alloc && ((s1_l2_index + MLP_L1_SIZE.U) === s2_tlb_update_index)
635  val s2_tlb_evict = s2_l1_tlb_evict || s2_l2_tlb_evict
636  when(s2_tlb_resp.valid && !s2_tlb_evict) {
637    when(s2_tlb_update_index < MLP_L1_SIZE.U) {
638      l1_array(s2_tlb_update_index).is_vaddr := s2_tlb_resp.bits.miss
639
640      when(!s2_tlb_resp.bits.miss) {
641        l1_array(s2_tlb_update_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s2_tlb_resp.bits.paddr.head(s2_tlb_resp.bits.paddr.head.getWidth - 1, REGION_TAG_OFFSET))
642        when(s2_tlb_resp.bits.excp.head.pf.ld || s2_tlb_resp.bits.excp.head.af.ld) {
643          invalid_array(s2_tlb_update_index, false)
644        }
645      }
646    }.otherwise {
647      val inner_index = s2_tlb_update_index - MLP_L1_SIZE.U
648      l2_array(inner_index).is_vaddr := s2_tlb_resp.bits.miss
649
650      when(!s2_tlb_resp.bits.miss) {
651        l2_array(inner_index).region := Cat(0.U((VAddrBits - PAddrBits).W), s2_tlb_resp.bits.paddr.head(s2_tlb_resp.bits.paddr.head.getWidth - 1, REGION_TAG_OFFSET))
652        when(s2_tlb_resp.bits.excp.head.pf.ld || s2_tlb_resp.bits.excp.head.af.ld) {
653          invalid_array(inner_index, true)
654        }
655      }
656    }
657  }
658  s2_tlb_resp.ready := true.B
659
660  XSPerfAccumulate("s2_tlb_resp_valid", s2_tlb_resp.valid)
661  XSPerfAccumulate("s2_tlb_resp_evict", s2_tlb_resp.valid && s2_tlb_evict)
662  XSPerfAccumulate("s2_tlb_resp_miss", s2_tlb_resp.valid && !s2_tlb_evict && s2_tlb_resp.bits.miss)
663  XSPerfAccumulate("s2_tlb_resp_updated", s2_tlb_resp.valid && !s2_tlb_evict && !s2_tlb_resp.bits.miss)
664  XSPerfAccumulate("s2_tlb_resp_page_fault", s2_tlb_resp.valid && !s2_tlb_evict && !s2_tlb_resp.bits.miss && s2_tlb_resp.bits.excp.head.pf.ld)
665  XSPerfAccumulate("s2_tlb_resp_access_fault", s2_tlb_resp.valid && !s2_tlb_evict && !s2_tlb_resp.bits.miss && s2_tlb_resp.bits.excp.head.af.ld)
666
667  // l1 pf
668  // s0: generate prefetch req paddr per entry, arb them
669  val s0_pf_fire_vec = VecInit((0 until MLP_L1_SIZE).map{case i => l1_pf_req_arb.io.in(i).fire})
670  val s1_pf_fire_vec = GatedValidRegNext(s0_pf_fire_vec)
671
672  val s0_pf_fire = l1_pf_req_arb.io.out.fire
673  val s0_pf_index = l1_pf_req_arb.io.chosen
674  val s0_pf_candidate_oh = get_candidate_oh(l1_pf_req_arb.io.out.bits.req.paddr)
675
676  for(i <- 0 until MLP_L1_SIZE) {
677    val evict = s1_l1_alloc && (s1_l1_index === i.U)
678    l1_pf_req_arb.io.in(i).valid := l1_array(i).can_send_pf(l1_valids(i)) && !evict
679    l1_pf_req_arb.io.in(i).bits.req.paddr := l1_array(i).get_pf_addr()
680    l1_pf_req_arb.io.in(i).bits.req.alias := l1_array(i).alias
681    l1_pf_req_arb.io.in(i).bits.req.confidence := io.confidence
682    l1_pf_req_arb.io.in(i).bits.req.is_store := false.B
683    l1_pf_req_arb.io.in(i).bits.req.pf_source := l1_array(i).source
684    l1_pf_req_arb.io.in(i).bits.debug_vaddr := l1_array(i).get_pf_debug_vaddr()
685  }
686
687  when(s0_pf_fire) {
688    l1_array(s0_pf_index).sent_vec := l1_array(s0_pf_index).sent_vec | s0_pf_candidate_oh
689  }
690
691  assert(PopCount(s0_pf_fire_vec) <= 1.U, "s0_pf_fire_vec should be one-hot or empty")
692
693  // s1: send out to dcache
694  val s1_pf_valid = Reg(Bool())
695  val s1_pf_bits = RegEnable(l1_pf_req_arb.io.out.bits, l1_pf_req_arb.io.out.fire)
696  val s1_pf_index = RegEnable(s0_pf_index, l1_pf_req_arb.io.out.fire)
697  val s1_pf_candidate_oh = RegEnable(s0_pf_candidate_oh, l1_pf_req_arb.io.out.fire)
698  val s1_pf_evict = s1_l1_alloc && (s1_l1_index === s1_pf_index)
699  val s1_pf_update = s1_l1_update && (s1_l1_index === s1_pf_index)
700  val s1_pf_can_go = io.l1_req.ready && !s1_pf_evict && !s1_pf_update
701  val s1_pf_fire = s1_pf_valid && s1_pf_can_go
702
703  when(s1_pf_can_go) {
704    s1_pf_valid := false.B
705  }
706
707  when(l1_pf_req_arb.io.out.fire) {
708    s1_pf_valid := true.B
709  }
710
711  when(s1_pf_fire) {
712    l1_array(s1_pf_index).bit_vec := l1_array(s1_pf_index).bit_vec & ~s1_pf_candidate_oh
713  }
714
715  io.l1_req.valid := s1_pf_valid && !s1_pf_evict && !s1_pf_update && (s1_pf_bits.req.paddr >= 0x80000000L.U) && io.enable
716  io.l1_req.bits := s1_pf_bits.req
717
718  l1_pf_req_arb.io.out.ready := s1_pf_can_go || !s1_pf_valid
719
720  assert(!((s1_l1_alloc || s1_l1_update) && s1_pf_fire && (s1_l1_index === s1_pf_index)), "pf pipeline & enq pipeline bit_vec harzard!")
721
722  XSPerfAccumulate("s1_pf_valid", s1_pf_valid)
723  XSPerfAccumulate("s1_pf_block_by_pipe_unready", s1_pf_valid && !io.l1_req.ready)
724  XSPerfAccumulate("s1_pf_block_by_enq_alloc_harzard", s1_pf_valid && s1_pf_evict)
725  XSPerfAccumulate("s1_pf_block_by_enq_update_harzard", s1_pf_valid && s1_pf_update)
726  XSPerfAccumulate("s1_pf_fire", s1_pf_fire)
727
728  // l2 pf
729  // s0: generate prefetch req paddr per entry, arb them, sent out
730  io.l2_pf_addr.valid := l2_pf_req_arb.io.out.valid
731  io.l2_pf_addr.bits := l2_pf_req_arb.io.out.bits.req
732
733  l2_pf_req_arb.io.out.ready := true.B
734
735  for(i <- 0 until MLP_L2L3_SIZE) {
736    val evict = s1_l2_alloc && (s1_l2_index === i.U)
737    l2_pf_req_arb.io.in(i).valid := l2_array(i).can_send_pf(l2_valids(i)) && (l2_array(i).sink === SINK_L2) && !evict
738    l2_pf_req_arb.io.in(i).bits.req.addr := l2_array(i).get_pf_addr()
739    l2_pf_req_arb.io.in(i).bits.req.source := MuxLookup(l2_array(i).source.value, MemReqSource.Prefetch2L2Unknown.id.U)(Seq(
740      L1_HW_PREFETCH_STRIDE -> MemReqSource.Prefetch2L2Stride.id.U,
741      L1_HW_PREFETCH_STREAM -> MemReqSource.Prefetch2L2Stream.id.U
742    ))
743    l2_pf_req_arb.io.in(i).bits.debug_vaddr := l2_array(i).get_pf_debug_vaddr()
744  }
745
746  when(l2_pf_req_arb.io.out.valid) {
747    l2_array(l2_pf_req_arb.io.chosen).sent_vec := l2_array(l2_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l2_pf_req_arb.io.out.bits.req.addr)
748  }
749
750  val stream_out_debug_table = ChiselDB.createTable("StreamPFTraceOut" + p(XSCoreParamsKey).HartId.toString, new StreamPFTraceOutEntry, basicDB = false)
751  val l1_debug_data = Wire(new StreamPFTraceOutEntry)
752  val l2_debug_data = Wire(new StreamPFTraceOutEntry)
753  l1_debug_data.PFVaddr := l1_pf_req_arb.io.out.bits.debug_vaddr
754  l1_debug_data.PFSink := SINK_L1
755  l2_debug_data.PFVaddr := l2_pf_req_arb.io.out.bits.debug_vaddr
756  l2_debug_data.PFSink := SINK_L2
757
758  stream_out_debug_table.log(
759    data = l1_debug_data,
760    en = l1_pf_req_arb.io.out.fire && (l1_pf_req_arb.io.out.bits.req.pf_source.value === L1_HW_PREFETCH_STREAM),
761    site = "StreamPFTraceOut",
762    clock = clock,
763    reset = reset
764  )
765  stream_out_debug_table.log(
766    data = l2_debug_data,
767    en = l2_pf_req_arb.io.out.fire && (l2_pf_req_arb.io.out.bits.req.source === MemReqSource.Prefetch2L2Stream.id.U),
768    site = "StreamPFTraceOut",
769    clock = clock,
770    reset = reset
771  )
772
773  // last level cache pf
774  // s0: generate prefetch req paddr per entry, arb them, sent out
775  io.l3_pf_addr.valid := l3_pf_req_arb.io.out.valid
776  io.l3_pf_addr.bits := l3_pf_req_arb.io.out.bits
777
778  l3_pf_req_arb.io.out.ready := true.B
779
780  for(i <- 0 until MLP_L2L3_SIZE) {
781    val evict = s1_l2_alloc && (s1_l2_index === i.U)
782    l3_pf_req_arb.io.in(i).valid := l2_array(i).can_send_pf(l2_valids(i)) && (l2_array(i).sink === SINK_L3) && !evict
783    l3_pf_req_arb.io.in(i).bits := l2_array(i).get_pf_addr()
784  }
785
786  when(l3_pf_req_arb.io.out.valid) {
787    l2_array(l3_pf_req_arb.io.chosen).sent_vec := l2_array(l3_pf_req_arb.io.chosen).sent_vec | get_candidate_oh(l3_pf_req_arb.io.out.bits)
788  }
789
790  // reset meta to avoid muti-hit problem
791  for(i <- 0 until MLP_SIZE) {
792    if(i < MLP_L1_SIZE) {
793      when(RegNext(io.flush)) {
794        reset_array(i, false)
795      }
796    }else {
797      when(RegNext(io.flush)) {
798        reset_array(i - MLP_L1_SIZE, true)
799      }
800    }
801  }
802
803  XSPerfAccumulate("l2_prefetche_queue_busby", io.l2PfqBusy)
804  XSPerfHistogram("filter_active", PopCount(VecInit(
805    l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v) } ++
806    l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) }
807    ).asUInt), true.B, 0, MLP_SIZE, 1)
808  XSPerfHistogram("l1_filter_active", PopCount(VecInit(l1_array.zip(l1_valids).map{ case (e, v) => e.can_send_pf(v)}).asUInt), true.B, 0, MLP_L1_SIZE, 1)
809  XSPerfHistogram("l2_filter_active", PopCount(VecInit(l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) && (e.sink === SINK_L2)}).asUInt), true.B, 0, MLP_L2L3_SIZE, 1)
810  XSPerfHistogram("l3_filter_active", PopCount(VecInit(l2_array.zip(l2_valids).map{ case (e, v) => e.can_send_pf(v) && (e.sink === SINK_L3)}).asUInt), true.B, 0, MLP_L2L3_SIZE, 1)
811}
812
813class L1Prefetcher(implicit p: Parameters) extends BasePrefecher with HasStreamPrefetchHelper with HasStridePrefetchHelper {
814  val pf_ctrl = IO(Input(new PrefetchControlBundle))
815  val stride_train = IO(Flipped(Vec(backendParams.LduCnt + backendParams.HyuCnt, ValidIO(new LdPrefetchTrainBundle()))))
816  val l2PfqBusy = IO(Input(Bool()))
817
818  val stride_train_filter = Module(new TrainFilter(STRIDE_FILTER_SIZE, "stride"))
819  val stride_meta_array = Module(new StrideMetaArray)
820  val stream_train_filter = Module(new TrainFilter(STREAM_FILTER_SIZE, "stream"))
821  val stream_bit_vec_array = Module(new StreamBitVectorArray)
822  val pf_queue_filter = Module(new MutiLevelPrefetchFilter)
823
824  // for now, if the stream is disabled, train and prefetch process will continue, without sending out and reqs
825  val enable = io.enable
826  val flush = pf_ctrl.flush
827
828  stream_train_filter.io.ld_in.zipWithIndex.foreach {
829    case (ld_in, i) => {
830      ld_in.valid := io.ld_in(i).valid && enable
831      ld_in.bits := io.ld_in(i).bits
832    }
833  }
834  stream_train_filter.io.enable := enable
835  stream_train_filter.io.flush := flush
836
837  stride_train_filter.io.ld_in.zipWithIndex.foreach {
838    case (ld_in, i) => {
839      ld_in.valid := stride_train(i).valid && enable
840      ld_in.bits := stride_train(i).bits
841    }
842  }
843  stride_train_filter.io.enable := enable
844  stride_train_filter.io.flush := flush
845
846  stream_bit_vec_array.io.enable := enable
847  stream_bit_vec_array.io.flush := flush
848  stream_bit_vec_array.io.dynamic_depth := pf_ctrl.dynamic_depth
849  stream_bit_vec_array.io.train_req <> stream_train_filter.io.train_req
850
851  stride_meta_array.io.enable := enable
852  stride_meta_array.io.flush := flush
853  stride_meta_array.io.dynamic_depth := 0.U
854  stride_meta_array.io.train_req <> stride_train_filter.io.train_req
855  stride_meta_array.io.stream_lookup_req <> stream_bit_vec_array.io.stream_lookup_req
856  stride_meta_array.io.stream_lookup_resp <> stream_bit_vec_array.io.stream_lookup_resp
857
858  // stream has higher priority than stride
859  pf_queue_filter.io.l1_prefetch_req.valid := stream_bit_vec_array.io.l1_prefetch_req.valid || stride_meta_array.io.l1_prefetch_req.valid
860  pf_queue_filter.io.l1_prefetch_req.bits := Mux(
861    stream_bit_vec_array.io.l1_prefetch_req.valid,
862    stream_bit_vec_array.io.l1_prefetch_req.bits,
863    stride_meta_array.io.l1_prefetch_req.bits
864  )
865
866  pf_queue_filter.io.l2_l3_prefetch_req.valid := stream_bit_vec_array.io.l2_l3_prefetch_req.valid || stride_meta_array.io.l2_l3_prefetch_req.valid
867  pf_queue_filter.io.l2_l3_prefetch_req.bits := Mux(
868    stream_bit_vec_array.io.l2_l3_prefetch_req.valid,
869    stream_bit_vec_array.io.l2_l3_prefetch_req.bits,
870    stride_meta_array.io.l2_l3_prefetch_req.bits
871  )
872
873  io.l1_req.valid := pf_queue_filter.io.l1_req.valid && enable && pf_ctrl.enable
874  io.l1_req.bits := pf_queue_filter.io.l1_req.bits
875
876  pf_queue_filter.io.l1_req.ready := Mux(pf_ctrl.enable, io.l1_req.ready, true.B)
877  pf_queue_filter.io.tlb_req <> io.tlb_req
878  pf_queue_filter.io.enable := enable
879  pf_queue_filter.io.flush := flush
880  pf_queue_filter.io.confidence := pf_ctrl.confidence
881  pf_queue_filter.io.l2PfqBusy := l2PfqBusy
882
883  io.l2_req.valid := pf_queue_filter.io.l2_pf_addr.valid && pf_queue_filter.io.l2_pf_addr.bits.addr > 0x80000000L.U && enable && pf_ctrl.enable
884  io.l2_req.bits := pf_queue_filter.io.l2_pf_addr.bits
885
886  io.l3_req.valid := pf_queue_filter.io.l3_pf_addr.valid && pf_queue_filter.io.l3_pf_addr.bits > 0x80000000L.U && enable && pf_ctrl.enable
887  io.l3_req.bits := pf_queue_filter.io.l3_pf_addr.bits
888}