1package xiangshan.mem.prefetch 2 3import org.chipsalliance.cde.config.Parameters 4import chisel3._ 5import chisel3.util._ 6import xiangshan._ 7import utils._ 8import utility._ 9import xiangshan.cache.HasDCacheParameters 10import xiangshan.cache.mmu._ 11import xiangshan.mem.{LdPrefetchTrainBundle, StPrefetchTrainBundle, L1PrefetchReq} 12import xiangshan.mem.trace._ 13import xiangshan.mem.HasL1PrefetchSourceParameter 14 15case class SMSParams 16( 17 region_size: Int = 1024, 18 vaddr_hash_width: Int = 5, 19 block_addr_raw_width: Int = 10, 20 stride_pc_bits: Int = 10, 21 max_stride: Int = 1024, 22 stride_entries: Int = 16, 23 active_gen_table_size: Int = 16, 24 pht_size: Int = 64, 25 pht_ways: Int = 2, 26 pht_hist_bits: Int = 2, 27 pht_tag_bits: Int = 13, 28 pht_lookup_queue_size: Int = 4, 29 pf_filter_size: Int = 16, 30 train_filter_size: Int = 8 31) extends PrefetcherParams 32 33trait HasSMSModuleHelper extends HasCircularQueuePtrHelper with HasDCacheParameters 34{ this: HasXSParameter => 35 val smsParams = coreParams.prefetcher.get.asInstanceOf[SMSParams] 36 val BLK_ADDR_WIDTH = VAddrBits - log2Up(dcacheParameters.blockBytes) 37 val REGION_SIZE = smsParams.region_size 38 val REGION_BLKS = smsParams.region_size / dcacheParameters.blockBytes 39 val REGION_ADDR_BITS = VAddrBits - log2Up(REGION_SIZE) 40 val REGION_OFFSET = log2Up(REGION_BLKS) 41 val VADDR_HASH_WIDTH = smsParams.vaddr_hash_width 42 val BLK_ADDR_RAW_WIDTH = smsParams.block_addr_raw_width 43 val REGION_ADDR_RAW_WIDTH = BLK_ADDR_RAW_WIDTH - REGION_OFFSET 44 val BLK_TAG_WIDTH = BLK_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH 45 val REGION_TAG_WIDTH = REGION_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH 46 val PHT_INDEX_BITS = log2Up(smsParams.pht_size / smsParams.pht_ways) 47 val PHT_TAG_BITS = smsParams.pht_tag_bits 48 val PHT_HIST_BITS = smsParams.pht_hist_bits 49 // page bit index in block addr 50 val BLOCK_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / dcacheParameters.blockBytes) 51 val REGION_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / smsParams.region_size) 52 val STRIDE_PC_BITS = smsParams.stride_pc_bits 53 val STRIDE_BLK_ADDR_BITS = log2Up(smsParams.max_stride) 54 55 def block_addr(x: UInt): UInt = { 56 val offset = log2Up(dcacheParameters.blockBytes) 57 x(x.getWidth - 1, offset) 58 } 59 60 def region_addr(x: UInt): UInt = { 61 val offset = log2Up(REGION_SIZE) 62 x(x.getWidth - 1, offset) 63 } 64 65 def region_offset_to_bits(off: UInt): UInt = { 66 (1.U << off).asUInt 67 } 68 69 def region_hash_tag(rg_addr: UInt): UInt = { 70 val low = rg_addr(REGION_ADDR_RAW_WIDTH - 1, 0) 71 val high = rg_addr(REGION_ADDR_RAW_WIDTH + 3 * VADDR_HASH_WIDTH - 1, REGION_ADDR_RAW_WIDTH) 72 val high_hash = vaddr_hash(high) 73 Cat(high_hash, low) 74 } 75 76 def page_bit(region_addr: UInt): UInt = { 77 region_addr(log2Up(dcacheParameters.pageSize/REGION_SIZE)) 78 } 79 80 def block_hash_tag(x: UInt): UInt = { 81 val blk_addr = block_addr(x) 82 val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0) 83 val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH) 84 val high_hash = vaddr_hash(high) 85 Cat(high_hash, low) 86 } 87 88 def vaddr_hash(x: UInt): UInt = { 89 val width = VADDR_HASH_WIDTH 90 val low = x(width - 1, 0) 91 val mid = x(2 * width - 1, width) 92 val high = x(3 * width - 1, 2 * width) 93 low ^ mid ^ high 94 } 95 96 def pht_index(pc: UInt): UInt = { 97 val low_bits = pc(PHT_INDEX_BITS, 2) 98 val hi_bit = pc(1) ^ pc(PHT_INDEX_BITS+1) 99 Cat(hi_bit, low_bits) 100 } 101 102 def pht_tag(pc: UInt): UInt = { 103 pc(PHT_INDEX_BITS + 2 + PHT_TAG_BITS - 1, PHT_INDEX_BITS + 2) 104 } 105 106 def get_alias_bits(region_vaddr: UInt): UInt = { 107 val offset = log2Up(REGION_SIZE) 108 get_alias(Cat(region_vaddr, 0.U(offset.W))) 109 } 110} 111 112class StridePF()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 113 val io = IO(new Bundle() { 114 val stride_en = Input(Bool()) 115 val s0_lookup = Flipped(new ValidIO(new Bundle() { 116 val pc = UInt(STRIDE_PC_BITS.W) 117 val vaddr = UInt(VAddrBits.W) 118 val paddr = UInt(PAddrBits.W) 119 })) 120 val s1_valid = Input(Bool()) 121 val s2_gen_req = ValidIO(new PfGenReq()) 122 }) 123 124 val prev_valid = RegNext(io.s0_lookup.valid, false.B) 125 val prev_pc = RegEnable(io.s0_lookup.bits.pc, io.s0_lookup.valid) 126 127 val s0_valid = io.s0_lookup.valid && !(prev_valid && prev_pc === io.s0_lookup.bits.pc) 128 129 def entry_map[T](fn: Int => T) = (0 until smsParams.stride_entries).map(fn) 130 131 val replacement = ReplacementPolicy.fromString("plru", smsParams.stride_entries) 132 val valids = entry_map(_ => RegInit(false.B)) 133 val entries_pc = entry_map(_ => Reg(UInt(STRIDE_PC_BITS.W)) ) 134 val entries_conf = entry_map(_ => RegInit(1.U(2.W))) 135 val entries_last_addr = entry_map(_ => Reg(UInt(STRIDE_BLK_ADDR_BITS.W)) ) 136 val entries_stride = entry_map(_ => Reg(SInt((STRIDE_BLK_ADDR_BITS+1).W))) 137 138 139 val s0_match_vec = valids.zip(entries_pc).map({ 140 case (v, pc) => v && pc === io.s0_lookup.bits.pc 141 }) 142 143 val s0_hit = s0_valid && Cat(s0_match_vec).orR 144 val s0_miss = s0_valid && !s0_hit 145 val s0_matched_conf = Mux1H(s0_match_vec, entries_conf) 146 val s0_matched_last_addr = Mux1H(s0_match_vec, entries_last_addr) 147 val s0_matched_last_stride = Mux1H(s0_match_vec, entries_stride) 148 149 150 val s1_vaddr = RegEnable(io.s0_lookup.bits.vaddr, s0_valid) 151 val s1_paddr = RegEnable(io.s0_lookup.bits.paddr, s0_valid) 152 val s1_hit = RegNext(s0_hit) && io.s1_valid 153 val s1_alloc = RegNext(s0_miss) && io.s1_valid 154 val s1_conf = RegNext(s0_matched_conf) 155 val s1_last_addr = RegNext(s0_matched_last_addr) 156 val s1_last_stride = RegNext(s0_matched_last_stride) 157 val s1_match_vec = RegNext(VecInit(s0_match_vec)) 158 159 val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes) 160 val s1_new_stride_vaddr = s1_vaddr(BLOCK_OFFSET + STRIDE_BLK_ADDR_BITS - 1, BLOCK_OFFSET) 161 val s1_new_stride = (0.U(1.W) ## s1_new_stride_vaddr).asSInt - (0.U(1.W) ## s1_last_addr).asSInt 162 val s1_stride_non_zero = s1_last_stride =/= 0.S 163 val s1_stride_match = s1_new_stride === s1_last_stride && s1_stride_non_zero 164 val s1_replace_idx = replacement.way 165 166 for(i <- 0 until smsParams.stride_entries){ 167 val alloc = s1_alloc && i.U === s1_replace_idx 168 val update = s1_hit && s1_match_vec(i) 169 when(update){ 170 assert(valids(i)) 171 entries_conf(i) := Mux(s1_stride_match, 172 Mux(s1_conf === 3.U, 3.U, s1_conf + 1.U), 173 Mux(s1_conf === 0.U, 0.U, s1_conf - 1.U) 174 ) 175 entries_last_addr(i) := s1_new_stride_vaddr 176 when(!s1_conf(1)){ 177 entries_stride(i) := s1_new_stride 178 } 179 } 180 when(alloc){ 181 valids(i) := true.B 182 entries_pc(i) := prev_pc 183 entries_conf(i) := 0.U 184 entries_last_addr(i) := s1_new_stride_vaddr 185 entries_stride(i) := 0.S 186 } 187 assert(!(update && alloc)) 188 } 189 when(s1_hit){ 190 replacement.access(OHToUInt(s1_match_vec.asUInt)) 191 }.elsewhen(s1_alloc){ 192 replacement.access(s1_replace_idx) 193 } 194 195 val s1_block_vaddr = block_addr(s1_vaddr) 196 val s1_pf_block_vaddr = (s1_block_vaddr.asSInt + s1_last_stride).asUInt 197 val s1_pf_cross_page = s1_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT) =/= s1_block_vaddr(BLOCK_ADDR_PAGE_BIT) 198 199 val s2_pf_gen_valid = RegNext(s1_hit && s1_stride_match, false.B) 200 val s2_pf_gen_paddr_valid = RegEnable(!s1_pf_cross_page, s1_hit && s1_stride_match) 201 val s2_pf_block_vaddr = RegEnable(s1_pf_block_vaddr, s1_hit && s1_stride_match) 202 val s2_block_paddr = RegEnable(block_addr(s1_paddr), s1_hit && s1_stride_match) 203 204 val s2_pf_block_addr = Mux(s2_pf_gen_paddr_valid, 205 Cat( 206 s2_block_paddr(PAddrBits - BLOCK_OFFSET - 1, BLOCK_ADDR_PAGE_BIT), 207 s2_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT - 1, 0) 208 ), 209 s2_pf_block_vaddr 210 ) 211 val s2_pf_full_addr = Wire(UInt(VAddrBits.W)) 212 s2_pf_full_addr := s2_pf_block_addr ## 0.U(BLOCK_OFFSET.W) 213 214 val s2_pf_region_addr = region_addr(s2_pf_full_addr) 215 val s2_pf_region_offset = s2_pf_block_addr(REGION_OFFSET - 1, 0) 216 217 val s2_full_vaddr = Wire(UInt(VAddrBits.W)) 218 s2_full_vaddr := s2_pf_block_vaddr ## 0.U(BLOCK_OFFSET.W) 219 220 val s2_region_tag = region_hash_tag(region_addr(s2_full_vaddr)) 221 222 io.s2_gen_req.valid := s2_pf_gen_valid && io.stride_en 223 io.s2_gen_req.bits.region_tag := s2_region_tag 224 io.s2_gen_req.bits.region_addr := s2_pf_region_addr 225 io.s2_gen_req.bits.alias_bits := get_alias_bits(region_addr(s2_full_vaddr)) 226 io.s2_gen_req.bits.region_bits := region_offset_to_bits(s2_pf_region_offset) 227 io.s2_gen_req.bits.paddr_valid := s2_pf_gen_paddr_valid 228 io.s2_gen_req.bits.decr_mode := false.B 229 io.s2_gen_req.bits.debug_source_type := HW_PREFETCH_STRIDE.U 230 231} 232 233class AGTEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 234 val pht_index = UInt(PHT_INDEX_BITS.W) 235 val pht_tag = UInt(PHT_TAG_BITS.W) 236 val region_bits = UInt(REGION_BLKS.W) 237 val region_tag = UInt(REGION_TAG_WIDTH.W) 238 val region_offset = UInt(REGION_OFFSET.W) 239 val access_cnt = UInt((REGION_BLKS-1).U.getWidth.W) 240 val decr_mode = Bool() 241} 242 243class PfGenReq()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 244 val region_tag = UInt(REGION_TAG_WIDTH.W) 245 val region_addr = UInt(REGION_ADDR_BITS.W) 246 val region_bits = UInt(REGION_BLKS.W) 247 val paddr_valid = Bool() 248 val decr_mode = Bool() 249 val alias_bits = UInt(2.W) 250 val debug_source_type = UInt(log2Up(nSourceType).W) 251} 252 253class AGTEvictReq()(implicit p: Parameters) extends XSBundle { 254 val vaddr = UInt(VAddrBits.W) 255} 256 257class ActiveGenerationTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 258 val io = IO(new Bundle() { 259 val agt_en = Input(Bool()) 260 val s0_lookup = Flipped(ValidIO(new Bundle() { 261 val region_tag = UInt(REGION_TAG_WIDTH.W) 262 val region_p1_tag = UInt(REGION_TAG_WIDTH.W) 263 val region_m1_tag = UInt(REGION_TAG_WIDTH.W) 264 val region_offset = UInt(REGION_OFFSET.W) 265 val pht_index = UInt(PHT_INDEX_BITS.W) 266 val pht_tag = UInt(PHT_TAG_BITS.W) 267 val allow_cross_region_p1 = Bool() 268 val allow_cross_region_m1 = Bool() 269 val region_p1_cross_page = Bool() 270 val region_m1_cross_page = Bool() 271 val region_paddr = UInt(REGION_ADDR_BITS.W) 272 val region_vaddr = UInt(REGION_ADDR_BITS.W) 273 })) 274 // dcache has released a block, evict it from agt 275 val s0_dcache_evict = Flipped(DecoupledIO(new AGTEvictReq)) 276 val s1_sel_stride = Output(Bool()) 277 val s2_stride_hit = Input(Bool()) 278 // if agt/stride missed, try lookup pht 279 val s2_pht_lookup = ValidIO(new PhtLookup()) 280 // evict entry to pht 281 val s2_evict = ValidIO(new AGTEntry()) 282 val s2_pf_gen_req = ValidIO(new PfGenReq()) 283 val act_threshold = Input(UInt(REGION_OFFSET.W)) 284 val act_stride = Input(UInt(6.W)) 285 }) 286 287 val entries = Seq.fill(smsParams.active_gen_table_size){ Reg(new AGTEntry()) } 288 val valids = Seq.fill(smsParams.active_gen_table_size){ RegInit(false.B) } 289 val replacement = ReplacementPolicy.fromString("plru", smsParams.active_gen_table_size) 290 291 val s1_replace_mask_w = Wire(UInt(smsParams.active_gen_table_size.W)) 292 293 val s0_lookup = io.s0_lookup.bits 294 val s0_lookup_valid = io.s0_lookup.valid 295 296 val s0_dcache_evict = io.s0_dcache_evict.bits 297 val s0_dcache_evict_valid = io.s0_dcache_evict.valid 298 val s0_dcache_evict_tag = block_hash_tag(s0_dcache_evict.vaddr).head(REGION_TAG_WIDTH) 299 300 val prev_lookup = RegEnable(s0_lookup, s0_lookup_valid) 301 val prev_lookup_valid = RegNext(s0_lookup_valid, false.B) 302 303 val s0_match_prev = prev_lookup_valid && s0_lookup.region_tag === prev_lookup.region_tag 304 305 def gen_match_vec(region_tag: UInt): Seq[Bool] = { 306 entries.zip(valids).map({ 307 case (ent, v) => v && ent.region_tag === region_tag 308 }) 309 } 310 311 val region_match_vec_s0 = gen_match_vec(s0_lookup.region_tag) 312 val region_p1_match_vec_s0 = gen_match_vec(s0_lookup.region_p1_tag) 313 val region_m1_match_vec_s0 = gen_match_vec(s0_lookup.region_m1_tag) 314 315 val any_region_match = Cat(region_match_vec_s0).orR 316 val any_region_p1_match = Cat(region_p1_match_vec_s0).orR && s0_lookup.allow_cross_region_p1 317 val any_region_m1_match = Cat(region_m1_match_vec_s0).orR && s0_lookup.allow_cross_region_m1 318 319 val region_match_vec_dcache_evict_s0 = gen_match_vec(s0_dcache_evict_tag) 320 val any_region_dcache_evict_match = Cat(region_match_vec_dcache_evict_s0).orR 321 // s0 dcache evict a entry that may be replaced in s1 322 val s0_dcache_evict_conflict = Cat(VecInit(region_match_vec_dcache_evict_s0).asUInt & s1_replace_mask_w).orR 323 val s0_do_dcache_evict = io.s0_dcache_evict.fire && any_region_dcache_evict_match 324 325 io.s0_dcache_evict.ready := !s0_lookup_valid && !s0_dcache_evict_conflict 326 327 val s0_region_hit = any_region_match 328 val s0_cross_region_hit = any_region_m1_match || any_region_p1_match 329 val s0_alloc = s0_lookup_valid && !s0_region_hit && !s0_match_prev 330 val s0_pf_gen_match_vec = valids.indices.map(i => { 331 Mux(any_region_match, 332 region_match_vec_s0(i), 333 Mux(any_region_m1_match, 334 region_m1_match_vec_s0(i), region_p1_match_vec_s0(i) 335 ) 336 ) 337 }) 338 val s0_agt_entry = Wire(new AGTEntry()) 339 340 s0_agt_entry.pht_index := s0_lookup.pht_index 341 s0_agt_entry.pht_tag := s0_lookup.pht_tag 342 s0_agt_entry.region_bits := region_offset_to_bits(s0_lookup.region_offset) 343 s0_agt_entry.region_tag := s0_lookup.region_tag 344 s0_agt_entry.region_offset := s0_lookup.region_offset 345 s0_agt_entry.access_cnt := 1.U 346 // lookup_region + 1 == entry_region 347 // lookup_region = entry_region - 1 => decr mode 348 s0_agt_entry.decr_mode := !s0_region_hit && !any_region_m1_match && any_region_p1_match 349 val s0_replace_way = replacement.way 350 val s0_replace_mask = UIntToOH(s0_replace_way) 351 // s0 hit a entry that may be replaced in s1 352 val s0_update_conflict = Cat(VecInit(region_match_vec_s0).asUInt & s1_replace_mask_w).orR 353 val s0_update = s0_lookup_valid && s0_region_hit && !s0_update_conflict 354 355 val s0_access_way = Mux1H( 356 Seq(s0_update, s0_alloc), 357 Seq(OHToUInt(region_match_vec_s0), s0_replace_way) 358 ) 359 when(s0_update || s0_alloc) { 360 replacement.access(s0_access_way) 361 } 362 363 // stage1: update/alloc 364 // region hit, update entry 365 val s1_update = RegNext(s0_update, false.B) 366 val s1_update_mask = RegEnable(VecInit(region_match_vec_s0), s0_lookup_valid) 367 val s1_agt_entry = RegEnable(s0_agt_entry, s0_lookup_valid) 368 val s1_cross_region_match = RegNext(s0_lookup_valid && s0_cross_region_hit, false.B) 369 val s1_alloc = RegNext(s0_alloc, false.B) 370 val s1_alloc_entry = s1_agt_entry 371 val s1_do_dcache_evict = RegNext(s0_do_dcache_evict, false.B) 372 val s1_replace_mask = Mux( 373 s1_do_dcache_evict, 374 RegEnable(VecInit(region_match_vec_dcache_evict_s0).asUInt, s0_do_dcache_evict), 375 RegEnable(s0_replace_mask, s0_lookup_valid) 376 ) 377 s1_replace_mask_w := s1_replace_mask & Fill(smsParams.active_gen_table_size, s1_alloc || s1_do_dcache_evict) 378 val s1_evict_entry = Mux1H(s1_replace_mask, entries) 379 val s1_evict_valid = Mux1H(s1_replace_mask, valids) 380 // pf gen 381 val s1_pf_gen_match_vec = RegEnable(VecInit(s0_pf_gen_match_vec), s0_lookup_valid) 382 val s1_region_paddr = RegEnable(s0_lookup.region_paddr, s0_lookup_valid) 383 val s1_region_vaddr = RegEnable(s0_lookup.region_vaddr, s0_lookup_valid) 384 val s1_region_offset = RegEnable(s0_lookup.region_offset, s0_lookup_valid) 385 for(i <- entries.indices){ 386 val alloc = s1_replace_mask(i) && s1_alloc 387 val update = s1_update_mask(i) && s1_update 388 val update_entry = WireInit(entries(i)) 389 update_entry.region_bits := entries(i).region_bits | s1_agt_entry.region_bits 390 update_entry.access_cnt := Mux(entries(i).access_cnt === (REGION_BLKS - 1).U, 391 entries(i).access_cnt, 392 entries(i).access_cnt + (s1_agt_entry.region_bits & (~entries(i).region_bits).asUInt).orR 393 ) 394 valids(i) := valids(i) || alloc 395 entries(i) := Mux(alloc, s1_alloc_entry, Mux(update, update_entry, entries(i))) 396 } 397 398 when(s1_update){ 399 assert(PopCount(s1_update_mask) === 1.U, "multi-agt-update") 400 } 401 when(s1_alloc){ 402 assert(PopCount(s1_replace_mask) === 1.U, "multi-agt-alloc") 403 } 404 405 // pf_addr 406 // 1.hit => pf_addr = lookup_addr + (decr ? -1 : 1) 407 // 2.lookup region - 1 hit => lookup_addr + 1 (incr mode) 408 // 3.lookup region + 1 hit => lookup_addr - 1 (decr mode) 409 val s1_hited_entry_decr = Mux1H(s1_update_mask, entries.map(_.decr_mode)) 410 val s1_pf_gen_decr_mode = Mux(s1_update, 411 s1_hited_entry_decr, 412 s1_agt_entry.decr_mode 413 ) 414 415 val s1_pf_gen_vaddr_inc = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) + io.act_stride 416 val s1_pf_gen_vaddr_dec = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) - io.act_stride 417 val s1_vaddr_inc_cross_page = s1_pf_gen_vaddr_inc(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT) 418 val s1_vaddr_dec_cross_page = s1_pf_gen_vaddr_dec(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT) 419 val s1_vaddr_inc_cross_max_lim = s1_pf_gen_vaddr_inc.head(1).asBool 420 val s1_vaddr_dec_cross_max_lim = s1_pf_gen_vaddr_dec.head(1).asBool 421 422 //val s1_pf_gen_vaddr_p1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) + 1.U 423 //val s1_pf_gen_vaddr_m1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) - 1.U 424 val s1_pf_gen_vaddr = Cat( 425 s1_region_vaddr(REGION_ADDR_BITS - 1, REGION_TAG_WIDTH), 426 Mux(s1_pf_gen_decr_mode, 427 s1_pf_gen_vaddr_dec.tail(1).head(REGION_TAG_WIDTH), 428 s1_pf_gen_vaddr_inc.tail(1).head(REGION_TAG_WIDTH) 429 ) 430 ) 431 val s1_pf_gen_offset = Mux(s1_pf_gen_decr_mode, 432 s1_pf_gen_vaddr_dec(REGION_OFFSET - 1, 0), 433 s1_pf_gen_vaddr_inc(REGION_OFFSET - 1, 0) 434 ) 435 val s1_pf_gen_offset_mask = UIntToOH(s1_pf_gen_offset) 436 val s1_pf_gen_access_cnt = Mux1H(s1_pf_gen_match_vec, entries.map(_.access_cnt)) 437 val s1_in_active_page = s1_pf_gen_access_cnt > io.act_threshold 438 val s1_pf_gen_valid = prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && Mux(s1_pf_gen_decr_mode, 439 !s1_vaddr_dec_cross_max_lim, 440 !s1_vaddr_inc_cross_max_lim 441 ) && s1_in_active_page && io.agt_en 442 val s1_pf_gen_paddr_valid = Mux(s1_pf_gen_decr_mode, !s1_vaddr_dec_cross_page, !s1_vaddr_inc_cross_page) 443 val s1_pf_gen_region_addr = Mux(s1_pf_gen_paddr_valid, 444 Cat(s1_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), s1_pf_gen_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)), 445 s1_pf_gen_vaddr 446 ) 447 val s1_pf_gen_region_tag = region_hash_tag(s1_pf_gen_vaddr) 448 val s1_pf_gen_incr_region_bits = VecInit((0 until REGION_BLKS).map(i => { 449 if(i == 0) true.B else !s1_pf_gen_offset_mask(i - 1, 0).orR 450 })).asUInt 451 val s1_pf_gen_decr_region_bits = VecInit((0 until REGION_BLKS).map(i => { 452 if(i == REGION_BLKS - 1) true.B 453 else !s1_pf_gen_offset_mask(REGION_BLKS - 1, i + 1).orR 454 })).asUInt 455 val s1_pf_gen_region_bits = Mux(s1_pf_gen_decr_mode, 456 s1_pf_gen_decr_region_bits, 457 s1_pf_gen_incr_region_bits 458 ) 459 val s1_pht_lookup_valid = Wire(Bool()) 460 val s1_pht_lookup = Wire(new PhtLookup()) 461 462 s1_pht_lookup_valid := !s1_pf_gen_valid && prev_lookup_valid 463 s1_pht_lookup.pht_index := s1_agt_entry.pht_index 464 s1_pht_lookup.pht_tag := s1_agt_entry.pht_tag 465 s1_pht_lookup.region_vaddr := s1_region_vaddr 466 s1_pht_lookup.region_paddr := s1_region_paddr 467 s1_pht_lookup.region_offset := s1_region_offset 468 469 io.s1_sel_stride := prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && !s1_in_active_page 470 471 // stage2: gen pf reg / evict entry to pht 472 val s2_do_dcache_evict = RegNext(s1_do_dcache_evict, false.B) 473 val s2_evict_entry = RegEnable(s1_evict_entry, s1_alloc || s1_do_dcache_evict) 474 val s2_evict_valid = RegNext((s1_alloc || s1_do_dcache_evict) && s1_evict_valid, false.B) 475 val s2_paddr_valid = RegEnable(s1_pf_gen_paddr_valid, s1_pf_gen_valid) 476 val s2_pf_gen_region_tag = RegEnable(s1_pf_gen_region_tag, s1_pf_gen_valid) 477 val s2_pf_gen_decr_mode = RegEnable(s1_pf_gen_decr_mode, s1_pf_gen_valid) 478 val s2_pf_gen_region_paddr = RegEnable(s1_pf_gen_region_addr, s1_pf_gen_valid) 479 val s2_pf_gen_alias_bits = RegEnable(get_alias_bits(s1_pf_gen_vaddr), s1_pf_gen_valid) 480 val s2_pf_gen_region_bits = RegEnable(s1_pf_gen_region_bits, s1_pf_gen_valid) 481 val s2_pf_gen_valid = RegNext(s1_pf_gen_valid, false.B) 482 val s2_pht_lookup_valid = RegNext(s1_pht_lookup_valid, false.B) && !io.s2_stride_hit 483 val s2_pht_lookup = RegEnable(s1_pht_lookup, s1_pht_lookup_valid) 484 485 io.s2_evict.valid := s2_evict_valid && (s2_evict_entry.access_cnt > 1.U) 486 io.s2_evict.bits := s2_evict_entry 487 488 io.s2_pf_gen_req.bits.region_tag := s2_pf_gen_region_tag 489 io.s2_pf_gen_req.bits.region_addr := s2_pf_gen_region_paddr 490 io.s2_pf_gen_req.bits.alias_bits := s2_pf_gen_alias_bits 491 io.s2_pf_gen_req.bits.region_bits := s2_pf_gen_region_bits 492 io.s2_pf_gen_req.bits.paddr_valid := s2_paddr_valid 493 io.s2_pf_gen_req.bits.decr_mode := s2_pf_gen_decr_mode 494 io.s2_pf_gen_req.valid := false.B 495 io.s2_pf_gen_req.bits.debug_source_type := HW_PREFETCH_AGT.U 496 497 io.s2_pht_lookup.valid := s2_pht_lookup_valid 498 io.s2_pht_lookup.bits := s2_pht_lookup 499 500 XSPerfAccumulate("sms_agt_in", io.s0_lookup.valid) 501 XSPerfAccumulate("sms_agt_alloc", s1_alloc) // cross region match or filter evict 502 XSPerfAccumulate("sms_agt_update", s1_update) // entry hit 503 XSPerfAccumulate("sms_agt_pf_gen", io.s2_pf_gen_req.valid) 504 XSPerfAccumulate("sms_agt_pf_gen_paddr_valid", 505 io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.paddr_valid 506 ) 507 XSPerfAccumulate("sms_agt_pf_gen_decr_mode", 508 io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.decr_mode 509 ) 510 for(i <- 0 until smsParams.active_gen_table_size){ 511 XSPerfAccumulate(s"sms_agt_access_entry_$i", 512 s1_alloc && s1_replace_mask(i) || s1_update && s1_update_mask(i) 513 ) 514 } 515 XSPerfAccumulate("sms_agt_evict", s2_evict_valid) 516 XSPerfAccumulate("sms_agt_evict_by_plru", s2_evict_valid && !s2_do_dcache_evict) 517 XSPerfAccumulate("sms_agt_evict_by_dcache", s2_evict_valid && s2_do_dcache_evict) 518 XSPerfAccumulate("sms_agt_evict_one_hot_pattern", s2_evict_valid && (s2_evict_entry.access_cnt === 1.U)) 519} 520 521class PhtLookup()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 522 val pht_index = UInt(PHT_INDEX_BITS.W) 523 val pht_tag = UInt(PHT_TAG_BITS.W) 524 val region_paddr = UInt(REGION_ADDR_BITS.W) 525 val region_vaddr = UInt(REGION_ADDR_BITS.W) 526 val region_offset = UInt(REGION_OFFSET.W) 527} 528 529class PhtEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 530 val hist = Vec(2 * (REGION_BLKS - 1), UInt(PHT_HIST_BITS.W)) 531 val tag = UInt(PHT_TAG_BITS.W) 532 val decr_mode = Bool() 533} 534 535class PatternHistoryTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 536 val io = IO(new Bundle() { 537 // receive agt evicted entry 538 val agt_update = Flipped(ValidIO(new AGTEntry())) 539 // at stage2, if we know agt missed, lookup pht 540 val s2_agt_lookup = Flipped(ValidIO(new PhtLookup())) 541 // pht-generated prefetch req 542 val pf_gen_req = ValidIO(new PfGenReq()) 543 }) 544 545 val pht_ram = Module(new SRAMTemplate[PhtEntry](new PhtEntry, 546 set = smsParams.pht_size / smsParams.pht_ways, 547 way =smsParams.pht_ways, 548 singlePort = true 549 )) 550 def PHT_SETS = smsParams.pht_size / smsParams.pht_ways 551 val pht_valids = Seq.fill(smsParams.pht_ways){ 552 RegInit(VecInit(Seq.fill(PHT_SETS){false.B})) 553 } 554 val replacement = Seq.fill(PHT_SETS) { ReplacementPolicy.fromString("plru", smsParams.pht_ways) } 555 556 val lookup_queue = Module(new OverrideableQueue(new PhtLookup, smsParams.pht_lookup_queue_size)) 557 lookup_queue.io.in := io.s2_agt_lookup 558 val lookup = lookup_queue.io.out 559 560 val evict_queue = Module(new OverrideableQueue(new AGTEntry, smsParams.pht_lookup_queue_size)) 561 evict_queue.io.in := io.agt_update 562 val evict = evict_queue.io.out 563 564 XSPerfAccumulate("sms_pht_lookup_in", lookup_queue.io.in.fire) 565 XSPerfAccumulate("sms_pht_lookup_out", lookup_queue.io.out.fire) 566 XSPerfAccumulate("sms_pht_evict_in", evict_queue.io.in.fire) 567 XSPerfAccumulate("sms_pht_evict_out", evict_queue.io.out.fire) 568 569 val s3_ram_en = Wire(Bool()) 570 val s1_valid = Wire(Bool()) 571 // if s1.raddr == s2.waddr or s3 is using ram port, block s1 572 val s1_wait = Wire(Bool()) 573 // pipe s0: select an op from [lookup, update], generate ram read addr 574 val s0_valid = lookup.valid || evict.valid 575 576 evict.ready := !s1_valid || !s1_wait 577 lookup.ready := evict.ready && !evict.valid 578 579 val s0_ram_raddr = Mux(evict.valid, 580 evict.bits.pht_index, 581 lookup.bits.pht_index 582 ) 583 val s0_tag = Mux(evict.valid, evict.bits.pht_tag, lookup.bits.pht_tag) 584 val s0_region_offset = Mux(evict.valid, evict.bits.region_offset, lookup.bits.region_offset) 585 val s0_region_paddr = lookup.bits.region_paddr 586 val s0_region_vaddr = lookup.bits.region_vaddr 587 val s0_region_bits = evict.bits.region_bits 588 val s0_decr_mode = evict.bits.decr_mode 589 val s0_evict = evict.valid 590 591 // pipe s1: send addr to ram 592 val s1_valid_r = RegInit(false.B) 593 s1_valid_r := Mux(s1_valid && s1_wait, true.B, s0_valid) 594 s1_valid := s1_valid_r 595 val s1_reg_en = s0_valid && (!s1_wait || !s1_valid) 596 val s1_ram_raddr = RegEnable(s0_ram_raddr, s1_reg_en) 597 val s1_tag = RegEnable(s0_tag, s1_reg_en) 598 val s1_region_bits = RegEnable(s0_region_bits, s1_reg_en) 599 val s1_decr_mode = RegEnable(s0_decr_mode, s1_reg_en) 600 val s1_region_paddr = RegEnable(s0_region_paddr, s1_reg_en) 601 val s1_region_vaddr = RegEnable(s0_region_vaddr, s1_reg_en) 602 val s1_region_offset = RegEnable(s0_region_offset, s1_reg_en) 603 val s1_pht_valids = pht_valids.map(way => Mux1H( 604 (0 until PHT_SETS).map(i => i.U === s1_ram_raddr), 605 way 606 )) 607 val s1_evict = RegEnable(s0_evict, s1_reg_en) 608 val s1_replace_way = Mux1H( 609 (0 until PHT_SETS).map(i => i.U === s1_ram_raddr), 610 replacement.map(_.way) 611 ) 612 val s1_hist_update_mask = Cat( 613 Fill(REGION_BLKS - 1, true.B), 0.U((REGION_BLKS - 1).W) 614 ) >> s1_region_offset 615 val s1_hist_bits = Cat( 616 s1_region_bits.head(REGION_BLKS - 1) >> s1_region_offset, 617 (Cat( 618 s1_region_bits.tail(1), 0.U((REGION_BLKS - 1).W) 619 ) >> s1_region_offset)(REGION_BLKS - 2, 0) 620 ) 621 622 // pipe s2: generate ram write addr/data 623 val s2_valid = RegNext(s1_valid && !s1_wait, false.B) 624 val s2_reg_en = s1_valid && !s1_wait 625 val s2_hist_update_mask = RegEnable(s1_hist_update_mask, s2_reg_en) 626 val s2_hist_bits = RegEnable(s1_hist_bits, s2_reg_en) 627 val s2_tag = RegEnable(s1_tag, s2_reg_en) 628 val s2_region_bits = RegEnable(s1_region_bits, s2_reg_en) 629 val s2_decr_mode = RegEnable(s1_decr_mode, s2_reg_en) 630 val s2_region_paddr = RegEnable(s1_region_paddr, s2_reg_en) 631 val s2_region_vaddr = RegEnable(s1_region_vaddr, s2_reg_en) 632 val s2_region_offset = RegEnable(s1_region_offset, s2_reg_en) 633 val s2_region_offset_mask = region_offset_to_bits(s2_region_offset) 634 val s2_evict = RegEnable(s1_evict, s2_reg_en) 635 val s2_pht_valids = s1_pht_valids.map(v => RegEnable(v, s2_reg_en)) 636 val s2_replace_way = RegEnable(s1_replace_way, s2_reg_en) 637 val s2_ram_waddr = RegEnable(s1_ram_raddr, s2_reg_en) 638 val s2_ram_rdata = pht_ram.io.r.resp.data 639 val s2_ram_rtags = s2_ram_rdata.map(_.tag) 640 val s2_tag_match_vec = s2_ram_rtags.map(t => t === s2_tag) 641 val s2_hit_vec = s2_tag_match_vec.zip(s2_pht_valids).map({ 642 case (tag_match, v) => v && tag_match 643 }) 644 val s2_hist_update = s2_ram_rdata.map(way => VecInit(way.hist.zipWithIndex.map({ 645 case (h, i) => 646 val do_update = s2_hist_update_mask(i) 647 val hist_updated = Mux(s2_hist_bits(i), 648 Mux(h.andR, h, h + 1.U), 649 Mux(h === 0.U, 0.U, h - 1.U) 650 ) 651 Mux(do_update, hist_updated, h) 652 }))) 653 val s2_hist_pf_gen = Mux1H(s2_hit_vec, s2_ram_rdata.map(way => VecInit(way.hist.map(_.head(1))).asUInt)) 654 val s2_new_hist = VecInit(s2_hist_bits.asBools.map(b => Cat(0.U((PHT_HIST_BITS - 1).W), b))) 655 val s2_pht_hit = Cat(s2_hit_vec).orR 656 val s2_hist = Mux(s2_pht_hit, Mux1H(s2_hit_vec, s2_hist_update), s2_new_hist) 657 val s2_repl_way_mask = UIntToOH(s2_replace_way) 658 val s2_incr_region_vaddr = s2_region_vaddr + 1.U 659 val s2_decr_region_vaddr = s2_region_vaddr - 1.U 660 661 // pipe s3: send addr/data to ram, gen pf_req 662 val s3_valid = RegNext(s2_valid, false.B) 663 val s3_evict = RegEnable(s2_evict, s2_valid) 664 val s3_hist = RegEnable(s2_hist, s2_valid) 665 val s3_hist_pf_gen = RegEnable(s2_hist_pf_gen, s2_valid) 666 val s3_hist_update_mask = RegEnable(s2_hist_update_mask.asUInt, s2_valid) 667 val s3_region_offset = RegEnable(s2_region_offset, s2_valid) 668 val s3_region_offset_mask = RegEnable(s2_region_offset_mask, s2_valid) 669 val s3_decr_mode = RegEnable(s2_decr_mode, s2_valid) 670 val s3_region_paddr = RegEnable(s2_region_paddr, s2_valid) 671 val s3_region_vaddr = RegEnable(s2_region_vaddr, s2_valid) 672 val s3_pht_tag = RegEnable(s2_tag, s2_valid) 673 val s3_hit_vec = s2_hit_vec.map(h => RegEnable(h, s2_valid)) 674 val s3_hit = Cat(s3_hit_vec).orR 675 val s3_hit_way = OHToUInt(s3_hit_vec) 676 val s3_repl_way = RegEnable(s2_replace_way, s2_valid) 677 val s3_repl_way_mask = RegEnable(s2_repl_way_mask, s2_valid) 678 val s3_repl_update_mask = RegEnable(VecInit((0 until PHT_SETS).map(i => i.U === s2_ram_waddr)), s2_valid) 679 val s3_ram_waddr = RegEnable(s2_ram_waddr, s2_valid) 680 val s3_incr_region_vaddr = RegEnable(s2_incr_region_vaddr, s2_valid) 681 val s3_decr_region_vaddr = RegEnable(s2_decr_region_vaddr, s2_valid) 682 s3_ram_en := s3_valid && s3_evict 683 val s3_ram_wdata = Wire(new PhtEntry()) 684 s3_ram_wdata.hist := s3_hist 685 s3_ram_wdata.tag := s3_pht_tag 686 s3_ram_wdata.decr_mode := s3_decr_mode 687 688 s1_wait := (s2_valid && s2_evict && s2_ram_waddr === s1_ram_raddr) || s3_ram_en 689 690 for((valids, way_idx) <- pht_valids.zipWithIndex){ 691 val update_way = s3_repl_way_mask(way_idx) 692 for((v, set_idx) <- valids.zipWithIndex){ 693 val update_set = s3_repl_update_mask(set_idx) 694 when(s3_valid && s3_evict && !s3_hit && update_set && update_way){ 695 v := true.B 696 } 697 } 698 } 699 for((r, i) <- replacement.zipWithIndex){ 700 when(s3_valid && s3_repl_update_mask(i)){ 701 when(s3_hit){ 702 r.access(s3_hit_way) 703 }.elsewhen(s3_evict){ 704 r.access(s3_repl_way) 705 } 706 } 707 } 708 709 val s3_way_mask = Mux(s3_hit, 710 VecInit(s3_hit_vec).asUInt, 711 s3_repl_way_mask, 712 ).asUInt 713 714 pht_ram.io.r( 715 s1_valid, s1_ram_raddr 716 ) 717 pht_ram.io.w( 718 s3_ram_en, s3_ram_wdata, s3_ram_waddr, s3_way_mask 719 ) 720 721 when(s3_valid && s3_hit){ 722 assert(!Cat(s3_hit_vec).andR, "sms_pht: multi-hit!") 723 } 724 725 // generate pf req if hit 726 val s3_hist_hi = s3_hist_pf_gen.head(REGION_BLKS - 1) 727 val s3_hist_lo = s3_hist_pf_gen.tail(REGION_BLKS - 1) 728 val s3_hist_hi_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_hi) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0) 729 val s3_hist_lo_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_lo) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0) 730 val s3_cur_region_bits = Cat(s3_hist_hi_shifted.tail(REGION_BLKS - 1), 0.U(1.W)) | 731 Cat(0.U(1.W), s3_hist_lo_shifted.head(REGION_BLKS - 1)) 732 val s3_incr_region_bits = Cat(0.U(1.W), s3_hist_hi_shifted.head(REGION_BLKS - 1)) 733 val s3_decr_region_bits = Cat(s3_hist_lo_shifted.tail(REGION_BLKS - 1), 0.U(1.W)) 734 val s3_pf_gen_valid = s3_valid && s3_hit && !s3_evict 735 val s3_cur_region_valid = s3_pf_gen_valid && (s3_hist_pf_gen & s3_hist_update_mask).orR 736 val s3_incr_region_valid = s3_pf_gen_valid && (s3_hist_hi & (~s3_hist_update_mask.head(REGION_BLKS - 1)).asUInt).orR 737 val s3_decr_region_valid = s3_pf_gen_valid && (s3_hist_lo & (~s3_hist_update_mask.tail(REGION_BLKS - 1)).asUInt).orR 738 val s3_incr_alias_bits = get_alias_bits(s3_incr_region_vaddr) 739 val s3_decr_alias_bits = get_alias_bits(s3_decr_region_vaddr) 740 val s3_incr_region_paddr = Cat( 741 s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), 742 s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0) 743 ) 744 val s3_decr_region_paddr = Cat( 745 s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), 746 s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0) 747 ) 748 val s3_incr_crosspage = s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT) 749 val s3_decr_crosspage = s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT) 750 val s3_cur_region_tag = region_hash_tag(s3_region_vaddr) 751 val s3_incr_region_tag = region_hash_tag(s3_incr_region_vaddr) 752 val s3_decr_region_tag = region_hash_tag(s3_decr_region_vaddr) 753 754 val pf_gen_req_arb = Module(new Arbiter(new PfGenReq, 3)) 755 val s4_pf_gen_cur_region_valid = RegInit(false.B) 756 val s4_pf_gen_cur_region = Reg(new PfGenReq) 757 val s4_pf_gen_incr_region_valid = RegInit(false.B) 758 val s4_pf_gen_incr_region = Reg(new PfGenReq) 759 val s4_pf_gen_decr_region_valid = RegInit(false.B) 760 val s4_pf_gen_decr_region = Reg(new PfGenReq) 761 762 s4_pf_gen_cur_region_valid := s3_cur_region_valid 763 when(s3_cur_region_valid){ 764 s4_pf_gen_cur_region.region_addr := s3_region_paddr 765 s4_pf_gen_cur_region.alias_bits := get_alias_bits(s3_region_vaddr) 766 s4_pf_gen_cur_region.region_tag := s3_cur_region_tag 767 s4_pf_gen_cur_region.region_bits := s3_cur_region_bits 768 s4_pf_gen_cur_region.paddr_valid := true.B 769 s4_pf_gen_cur_region.decr_mode := false.B 770 } 771 s4_pf_gen_incr_region_valid := s3_incr_region_valid || 772 (!pf_gen_req_arb.io.in(1).ready && s4_pf_gen_incr_region_valid) 773 when(s3_incr_region_valid){ 774 s4_pf_gen_incr_region.region_addr := Mux(s3_incr_crosspage, s3_incr_region_vaddr, s3_incr_region_paddr) 775 s4_pf_gen_incr_region.alias_bits := s3_incr_alias_bits 776 s4_pf_gen_incr_region.region_tag := s3_incr_region_tag 777 s4_pf_gen_incr_region.region_bits := s3_incr_region_bits 778 s4_pf_gen_incr_region.paddr_valid := !s3_incr_crosspage 779 s4_pf_gen_incr_region.decr_mode := false.B 780 } 781 s4_pf_gen_decr_region_valid := s3_decr_region_valid || 782 (!pf_gen_req_arb.io.in(2).ready && s4_pf_gen_decr_region_valid) 783 when(s3_decr_region_valid){ 784 s4_pf_gen_decr_region.region_addr := Mux(s3_decr_crosspage, s3_decr_region_vaddr, s3_decr_region_paddr) 785 s4_pf_gen_decr_region.alias_bits := s3_decr_alias_bits 786 s4_pf_gen_decr_region.region_tag := s3_decr_region_tag 787 s4_pf_gen_decr_region.region_bits := s3_decr_region_bits 788 s4_pf_gen_decr_region.paddr_valid := !s3_decr_crosspage 789 s4_pf_gen_decr_region.decr_mode := true.B 790 } 791 792 pf_gen_req_arb.io.in.head.valid := s4_pf_gen_cur_region_valid 793 pf_gen_req_arb.io.in.head.bits := s4_pf_gen_cur_region 794 pf_gen_req_arb.io.in.head.bits.debug_source_type := HW_PREFETCH_PHT_CUR.U 795 pf_gen_req_arb.io.in(1).valid := s4_pf_gen_incr_region_valid 796 pf_gen_req_arb.io.in(1).bits := s4_pf_gen_incr_region 797 pf_gen_req_arb.io.in(1).bits.debug_source_type := HW_PREFETCH_PHT_INC.U 798 pf_gen_req_arb.io.in(2).valid := s4_pf_gen_decr_region_valid 799 pf_gen_req_arb.io.in(2).bits := s4_pf_gen_decr_region 800 pf_gen_req_arb.io.in(2).bits.debug_source_type := HW_PREFETCH_PHT_DEC.U 801 pf_gen_req_arb.io.out.ready := true.B 802 803 io.pf_gen_req.valid := pf_gen_req_arb.io.out.valid 804 io.pf_gen_req.bits := pf_gen_req_arb.io.out.bits 805 806 XSPerfAccumulate("sms_pht_update", io.agt_update.valid) 807 XSPerfAccumulate("sms_pht_update_hit", s2_valid && s2_evict && s2_pht_hit) 808 XSPerfAccumulate("sms_pht_lookup", io.s2_agt_lookup.valid) 809 XSPerfAccumulate("sms_pht_lookup_hit", s2_valid && !s2_evict && s2_pht_hit) 810 for(i <- 0 until smsParams.pht_ways){ 811 XSPerfAccumulate(s"sms_pht_write_way_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.waymask.get(i)) 812 } 813 for(i <- 0 until PHT_SETS){ 814 XSPerfAccumulate(s"sms_pht_write_set_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.setIdx === i.U) 815 } 816 XSPerfAccumulate(s"sms_pht_pf_gen", io.pf_gen_req.valid) 817} 818 819class PrefetchFilterEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 820 val region_tag = UInt(REGION_TAG_WIDTH.W) 821 val region_addr = UInt(REGION_ADDR_BITS.W) 822 val region_bits = UInt(REGION_BLKS.W) 823 val filter_bits = UInt(REGION_BLKS.W) 824 val alias_bits = UInt(2.W) 825 val paddr_valid = Bool() 826 val decr_mode = Bool() 827 val debug_source_type = UInt(log2Up(nSourceType).W) 828} 829 830class PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 831 val io = IO(new Bundle() { 832 val gen_req = Flipped(ValidIO(new PfGenReq())) 833 val tlb_req = new TlbRequestIO(2) 834 val l2_pf_addr = ValidIO(UInt(PAddrBits.W)) 835 val pf_alias_bits = Output(UInt(2.W)) 836 val debug_source_type = Output(UInt(log2Up(nSourceType).W)) 837 }) 838 val entries = Seq.fill(smsParams.pf_filter_size){ Reg(new PrefetchFilterEntry()) } 839 val valids = Seq.fill(smsParams.pf_filter_size){ RegInit(false.B) } 840 val replacement = ReplacementPolicy.fromString("plru", smsParams.pf_filter_size) 841 842 val prev_valid = RegNext(io.gen_req.valid, false.B) 843 val prev_gen_req = RegEnable(io.gen_req.bits, io.gen_req.valid) 844 845 val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, smsParams.pf_filter_size)) 846 val pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), smsParams.pf_filter_size)) 847 848 io.l2_pf_addr.valid := pf_req_arb.io.out.valid 849 io.l2_pf_addr.bits := pf_req_arb.io.out.bits 850 io.pf_alias_bits := Mux1H(entries.zipWithIndex.map({ 851 case (entry, i) => (i.U === pf_req_arb.io.chosen) -> entry.alias_bits 852 })) 853 pf_req_arb.io.out.ready := true.B 854 855 io.debug_source_type := VecInit(entries.map(_.debug_source_type))(pf_req_arb.io.chosen) 856 857 val s1_valid = Wire(Bool()) 858 val s1_hit = Wire(Bool()) 859 val s1_replace_vec = Wire(UInt(smsParams.pf_filter_size.W)) 860 val s1_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W)) 861 val s2_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W)) 862 863 // s0: entries lookup 864 val s0_gen_req = io.gen_req.bits 865 val s0_match_prev = prev_valid && (s0_gen_req.region_tag === prev_gen_req.region_tag) 866 val s0_gen_req_valid = io.gen_req.valid && !s0_match_prev 867 val s0_match_vec = valids.indices.map(i => { 868 valids(i) && entries(i).region_tag === s0_gen_req.region_tag && !(s1_valid && !s1_hit && s1_replace_vec(i)) 869 }) 870 val s0_any_matched = Cat(s0_match_vec).orR 871 val s0_replace_vec = UIntToOH(replacement.way) 872 val s0_hit = s0_gen_req_valid && s0_any_matched 873 874 for(((v, ent), i) <- valids.zip(entries).zipWithIndex){ 875 val is_evicted = s1_valid && s1_replace_vec(i) 876 tlb_req_arb.io.in(i).valid := v && !s1_tlb_fire_vec(i) && !s2_tlb_fire_vec(i) && !ent.paddr_valid && !is_evicted 877 tlb_req_arb.io.in(i).bits.vaddr := Cat(ent.region_addr, 0.U(log2Up(REGION_SIZE).W)) 878 tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read 879 tlb_req_arb.io.in(i).bits.size := 3.U 880 tlb_req_arb.io.in(i).bits.kill := false.B 881 tlb_req_arb.io.in(i).bits.no_translate := false.B 882 tlb_req_arb.io.in(i).bits.memidx := DontCare 883 tlb_req_arb.io.in(i).bits.debug := DontCare 884 885 val pending_req_vec = ent.region_bits & (~ent.filter_bits).asUInt 886 val first_one_offset = PriorityMux( 887 pending_req_vec.asBools, 888 (0 until smsParams.pf_filter_size).map(_.U(REGION_OFFSET.W)) 889 ) 890 val last_one_offset = PriorityMux( 891 pending_req_vec.asBools.reverse, 892 (0 until smsParams.pf_filter_size).reverse.map(_.U(REGION_OFFSET.W)) 893 ) 894 val pf_addr = Cat( 895 ent.region_addr, 896 Mux(ent.decr_mode, last_one_offset, first_one_offset), 897 0.U(log2Up(dcacheParameters.blockBytes).W) 898 ) 899 pf_req_arb.io.in(i).valid := v && Cat(pending_req_vec).orR && ent.paddr_valid && !is_evicted 900 pf_req_arb.io.in(i).bits := pf_addr 901 } 902 903 val s0_tlb_fire_vec = VecInit(tlb_req_arb.io.in.map(_.fire)) 904 val s0_pf_fire_vec = VecInit(pf_req_arb.io.in.map(_.fire)) 905 906 val s0_update_way = OHToUInt(s0_match_vec) 907 val s0_replace_way = replacement.way 908 val s0_access_way = Mux(s0_any_matched, s0_update_way, s0_replace_way) 909 when(s0_gen_req_valid){ 910 replacement.access(s0_access_way) 911 } 912 913 // s1: update or alloc 914 val s1_valid_r = RegNext(s0_gen_req_valid, false.B) 915 val s1_hit_r = RegEnable(s0_hit, false.B, s0_gen_req_valid) 916 val s1_gen_req = RegEnable(s0_gen_req, s0_gen_req_valid) 917 val s1_replace_vec_r = RegEnable(s0_replace_vec, s0_gen_req_valid && !s0_hit) 918 val s1_update_vec = RegEnable(VecInit(s0_match_vec).asUInt, s0_gen_req_valid && s0_hit) 919 val s1_tlb_fire_vec_r = RegNext(s0_tlb_fire_vec, 0.U.asTypeOf(s0_tlb_fire_vec)) 920 // tlb req will latch one cycle after tlb_arb 921 val s1_tlb_req_valid = RegNext(tlb_req_arb.io.out.fire) 922 val s1_tlb_req_bits = RegEnable(tlb_req_arb.io.out.bits, tlb_req_arb.io.out.fire) 923 val s1_alloc_entry = Wire(new PrefetchFilterEntry()) 924 s1_valid := s1_valid_r 925 s1_hit := s1_hit_r 926 s1_replace_vec := s1_replace_vec_r 927 s1_tlb_fire_vec := s1_tlb_fire_vec_r.asUInt 928 s1_alloc_entry.region_tag := s1_gen_req.region_tag 929 s1_alloc_entry.region_addr := s1_gen_req.region_addr 930 s1_alloc_entry.region_bits := s1_gen_req.region_bits 931 s1_alloc_entry.paddr_valid := s1_gen_req.paddr_valid 932 s1_alloc_entry.decr_mode := s1_gen_req.decr_mode 933 s1_alloc_entry.filter_bits := 0.U 934 s1_alloc_entry.alias_bits := s1_gen_req.alias_bits 935 s1_alloc_entry.debug_source_type := s1_gen_req.debug_source_type 936 io.tlb_req.req.valid := s1_tlb_req_valid && !((s1_tlb_fire_vec & s1_replace_vec).orR && s1_valid && !s1_hit) 937 io.tlb_req.req.bits := s1_tlb_req_bits 938 io.tlb_req.resp.ready := true.B 939 io.tlb_req.req_kill := false.B 940 tlb_req_arb.io.out.ready := true.B 941 942 val s2_tlb_fire_vec_r = RegNext(s1_tlb_fire_vec, 0.U.asTypeOf(s1_tlb_fire_vec)) 943 s2_tlb_fire_vec := s2_tlb_fire_vec_r.asUInt 944 945 for(((v, ent), i) <- valids.zip(entries).zipWithIndex){ 946 val alloc = s1_valid && !s1_hit && s1_replace_vec(i) 947 val update = s1_valid && s1_hit && s1_update_vec(i) 948 // for pf: use s0 data 949 val pf_fired = s0_pf_fire_vec(i) 950 val tlb_fired = s2_tlb_fire_vec(i) && !io.tlb_req.resp.bits.miss && io.tlb_req.resp.fire 951 when(tlb_fired){ 952 ent.paddr_valid := !io.tlb_req.resp.bits.miss 953 ent.region_addr := region_addr(io.tlb_req.resp.bits.paddr.head) 954 } 955 when(update){ 956 ent.region_bits := ent.region_bits | s1_gen_req.region_bits 957 } 958 when(pf_fired){ 959 val curr_bit = UIntToOH(block_addr(pf_req_arb.io.in(i).bits)(REGION_OFFSET - 1, 0)) 960 ent.filter_bits := ent.filter_bits | curr_bit 961 } 962 when(alloc){ 963 ent := s1_alloc_entry 964 v := true.B 965 } 966 } 967 when(s1_valid && s1_hit){ 968 assert(PopCount(s1_update_vec) === 1.U, "sms_pf_filter: multi-hit") 969 } 970 assert(!io.tlb_req.resp.fire || Cat(s2_tlb_fire_vec).orR, "sms_pf_filter: tlb resp fires, but no tlb req from tlb_req_arb 2 cycles ago") 971 972 XSPerfAccumulate("sms_pf_filter_recv_req", io.gen_req.valid) 973 XSPerfAccumulate("sms_pf_filter_hit", s1_valid && s1_hit) 974 XSPerfAccumulate("sms_pf_filter_tlb_req", io.tlb_req.req.fire) 975 XSPerfAccumulate("sms_pf_filter_tlb_resp_miss", io.tlb_req.resp.fire && io.tlb_req.resp.bits.miss) 976 for(i <- 0 until smsParams.pf_filter_size){ 977 XSPerfAccumulate(s"sms_pf_filter_access_way_$i", s0_gen_req_valid && s0_access_way === i.U) 978 } 979 XSPerfAccumulate("sms_pf_filter_l2_req", io.l2_pf_addr.valid) 980} 981 982class SMSTrainFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper with HasTrainFilterHelper { 983 val io = IO(new Bundle() { 984 // train input 985 // hybrid load store 986 val ld_in = Flipped(Vec(exuParameters.LduCnt, ValidIO(new LdPrefetchTrainBundle()))) 987 val st_in = Flipped(Vec(exuParameters.StuCnt, ValidIO(new StPrefetchTrainBundle()))) 988 // filter out 989 val train_req = ValidIO(new PrefetchReqBundle()) 990 }) 991 992 class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr]( 993 p => smsParams.train_filter_size 994 ){ 995 } 996 997 object Ptr { 998 def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = { 999 val ptr = Wire(new Ptr) 1000 ptr.flag := f 1001 ptr.value := v 1002 ptr 1003 } 1004 } 1005 1006 val entries = RegInit(VecInit(Seq.fill(smsParams.train_filter_size){ (0.U.asTypeOf(new PrefetchReqBundle())) })) 1007 val valids = RegInit(VecInit(Seq.fill(smsParams.train_filter_size){ (false.B) })) 1008 1009 val enqLen = exuParameters.LduCnt + exuParameters.StuCnt 1010 val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr)))) 1011 val deqPtrExt = RegInit(0.U.asTypeOf(new Ptr)) 1012 1013 val deqPtr = WireInit(deqPtrExt.value) 1014 1015 require(smsParams.train_filter_size >= enqLen) 1016 1017 val ld_reorder = reorder(io.ld_in) 1018 val st_reorder = reorder(io.st_in) 1019 val reqs_ls = ld_reorder.map(_.bits.asPrefetchReqBundle()) ++ st_reorder.map(_.bits.asPrefetchReqBundle()) 1020 val reqs_vls = ld_reorder.map(_.valid) ++ st_reorder.map(_.valid) 1021 val needAlloc = Wire(Vec(enqLen, Bool())) 1022 val canAlloc = Wire(Vec(enqLen, Bool())) 1023 1024 for(i <- (0 until enqLen)) { 1025 val req = reqs_ls(i) 1026 val req_v = reqs_vls(i) 1027 val index = PopCount(needAlloc.take(i)) 1028 val allocPtr = enqPtrExt(index) 1029 val entry_match = Cat(entries.zip(valids).map { 1030 case(e, v) => v && block_hash_tag(e.vaddr) === block_hash_tag(req.vaddr) 1031 }).orR 1032 val prev_enq_match = if(i == 0) false.B else Cat(reqs_ls.zip(reqs_vls).take(i).map { 1033 case(pre, pre_v) => pre_v && block_hash_tag(pre.vaddr) === block_hash_tag(req.vaddr) 1034 }).orR 1035 1036 needAlloc(i) := req_v && !entry_match && !prev_enq_match 1037 canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt 1038 1039 when(canAlloc(i)) { 1040 valids(allocPtr.value) := true.B 1041 entries(allocPtr.value) := req 1042 } 1043 } 1044 val allocNum = PopCount(canAlloc) 1045 1046 enqPtrExt.foreach{case x => x := x + allocNum} 1047 1048 io.train_req.valid := false.B 1049 io.train_req.bits := DontCare 1050 valids.zip(entries).zipWithIndex.foreach { 1051 case((valid, entry), i) => { 1052 when(deqPtr === i.U) { 1053 io.train_req.valid := valid 1054 io.train_req.bits := entry 1055 } 1056 } 1057 } 1058 1059 when(io.train_req.valid) { 1060 valids(deqPtr) := false.B 1061 deqPtrExt := deqPtrExt + 1.U 1062 } 1063 1064 XSPerfAccumulate("sms_train_filter_full", PopCount(valids) === (smsParams.train_filter_size).U) 1065 XSPerfAccumulate("sms_train_filter_half", PopCount(valids) >= (smsParams.train_filter_size / 2).U) 1066 XSPerfAccumulate("sms_train_filter_empty", PopCount(valids) === 0.U) 1067 1068 val raw_enq_pattern = Cat(reqs_vls) 1069 val filtered_enq_pattern = Cat(needAlloc) 1070 val actual_enq_pattern = Cat(canAlloc) 1071 XSPerfAccumulate("sms_train_filter_enq", allocNum > 0.U) 1072 XSPerfAccumulate("sms_train_filter_deq", io.train_req.fire) 1073 def toBinary(n: Int): String = n match { 1074 case 0|1 => s"$n" 1075 case _ => s"${toBinary(n/2)}${n%2}" 1076 } 1077 for(i <- 0 until (1 << enqLen)) { 1078 XSPerfAccumulate(s"sms_train_filter_raw_enq_pattern_${toBinary(i)}", raw_enq_pattern === i.U) 1079 XSPerfAccumulate(s"sms_train_filter_filtered_enq_pattern_${toBinary(i)}", filtered_enq_pattern === i.U) 1080 XSPerfAccumulate(s"sms_train_filter_actual_enq_pattern_${toBinary(i)}", actual_enq_pattern === i.U) 1081 } 1082} 1083 1084class SMSPrefetcher()(implicit p: Parameters) extends BasePrefecher with HasSMSModuleHelper with HasL1PrefetchSourceParameter { 1085 1086 val maxLduCnt = LoadPipelineWidth 1087 require(exuParameters.LduCnt == maxLduCnt) 1088 1089 val io_agt_en = IO(Input(Bool())) 1090 val io_stride_en = IO(Input(Bool())) 1091 val io_pht_en = IO(Input(Bool())) 1092 val io_act_threshold = IO(Input(UInt(REGION_OFFSET.W))) 1093 val io_act_stride = IO(Input(UInt(6.W))) 1094 val io_dcache_evict = IO(Flipped(DecoupledIO(new AGTEvictReq))) 1095 1096 val train_filter = Module(new SMSTrainFilter) 1097 1098 train_filter.io.ld_in <> io.ld_in 1099 train_filter.io.st_in <> io.st_in 1100 1101 val train_ld = train_filter.io.train_req.bits 1102 1103 val train_block_tag = block_hash_tag(train_ld.vaddr) 1104 val train_region_tag = train_block_tag.head(REGION_TAG_WIDTH) 1105 1106 val train_region_addr_raw = region_addr(train_ld.vaddr)(REGION_TAG_WIDTH + 2 * VADDR_HASH_WIDTH - 1, 0) 1107 val train_region_addr_p1 = Cat(0.U(1.W), train_region_addr_raw) + 1.U 1108 val train_region_addr_m1 = Cat(0.U(1.W), train_region_addr_raw) - 1.U 1109 // addr_p1 or addr_m1 is valid? 1110 val train_allow_cross_region_p1 = !train_region_addr_p1.head(1).asBool 1111 val train_allow_cross_region_m1 = !train_region_addr_m1.head(1).asBool 1112 1113 val train_region_p1_tag = region_hash_tag(train_region_addr_p1.tail(1)) 1114 val train_region_m1_tag = region_hash_tag(train_region_addr_m1.tail(1)) 1115 1116 val train_region_p1_cross_page = page_bit(train_region_addr_p1) ^ page_bit(train_region_addr_raw) 1117 val train_region_m1_cross_page = page_bit(train_region_addr_m1) ^ page_bit(train_region_addr_raw) 1118 1119 val train_region_paddr = region_addr(train_ld.paddr) 1120 val train_region_vaddr = region_addr(train_ld.vaddr) 1121 val train_region_offset = train_block_tag(REGION_OFFSET - 1, 0) 1122 // val train_vld = RegNext(pending_vld || Cat(ld_curr_vld).orR, false.B) 1123 val train_vld = train_filter.io.train_req.valid 1124 1125 1126 // prefetch stage0 1127 val active_gen_table = Module(new ActiveGenerationTable()) 1128 val stride = Module(new StridePF()) 1129 val pht = Module(new PatternHistoryTable()) 1130 val pf_filter = Module(new PrefetchFilter()) 1131 1132 val train_vld_s0 = RegNext(train_vld, false.B) 1133 val train_s0 = RegEnable(train_ld, train_vld) 1134 val train_region_tag_s0 = RegEnable(train_region_tag, train_vld) 1135 val train_region_p1_tag_s0 = RegEnable(train_region_p1_tag, train_vld) 1136 val train_region_m1_tag_s0 = RegEnable(train_region_m1_tag, train_vld) 1137 val train_allow_cross_region_p1_s0 = RegEnable(train_allow_cross_region_p1, train_vld) 1138 val train_allow_cross_region_m1_s0 = RegEnable(train_allow_cross_region_m1, train_vld) 1139 val train_pht_tag_s0 = RegEnable(pht_tag(train_ld.pc), train_vld) 1140 val train_pht_index_s0 = RegEnable(pht_index(train_ld.pc), train_vld) 1141 val train_region_offset_s0 = RegEnable(train_region_offset, train_vld) 1142 val train_region_p1_cross_page_s0 = RegEnable(train_region_p1_cross_page, train_vld) 1143 val train_region_m1_cross_page_s0 = RegEnable(train_region_m1_cross_page, train_vld) 1144 val train_region_paddr_s0 = RegEnable(train_region_paddr, train_vld) 1145 val train_region_vaddr_s0 = RegEnable(train_region_vaddr, train_vld) 1146 1147 active_gen_table.io.agt_en := io_agt_en 1148 active_gen_table.io.act_threshold := io_act_threshold 1149 active_gen_table.io.act_stride := io_act_stride 1150 active_gen_table.io.s0_lookup.valid := train_vld_s0 1151 active_gen_table.io.s0_lookup.bits.region_tag := train_region_tag_s0 1152 active_gen_table.io.s0_lookup.bits.region_p1_tag := train_region_p1_tag_s0 1153 active_gen_table.io.s0_lookup.bits.region_m1_tag := train_region_m1_tag_s0 1154 active_gen_table.io.s0_lookup.bits.region_offset := train_region_offset_s0 1155 active_gen_table.io.s0_lookup.bits.pht_index := train_pht_index_s0 1156 active_gen_table.io.s0_lookup.bits.pht_tag := train_pht_tag_s0 1157 active_gen_table.io.s0_lookup.bits.allow_cross_region_p1 := train_allow_cross_region_p1_s0 1158 active_gen_table.io.s0_lookup.bits.allow_cross_region_m1 := train_allow_cross_region_m1_s0 1159 active_gen_table.io.s0_lookup.bits.region_p1_cross_page := train_region_p1_cross_page_s0 1160 active_gen_table.io.s0_lookup.bits.region_m1_cross_page := train_region_m1_cross_page_s0 1161 active_gen_table.io.s0_lookup.bits.region_paddr := train_region_paddr_s0 1162 active_gen_table.io.s0_lookup.bits.region_vaddr := train_region_vaddr_s0 1163 active_gen_table.io.s2_stride_hit := stride.io.s2_gen_req.valid 1164 active_gen_table.io.s0_dcache_evict <> io_dcache_evict 1165 1166 stride.io.stride_en := io_stride_en 1167 stride.io.s0_lookup.valid := train_vld_s0 1168 stride.io.s0_lookup.bits.pc := train_s0.pc(STRIDE_PC_BITS - 1, 0) 1169 stride.io.s0_lookup.bits.vaddr := Cat( 1170 train_region_vaddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W) 1171 ) 1172 stride.io.s0_lookup.bits.paddr := Cat( 1173 train_region_paddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W) 1174 ) 1175 stride.io.s1_valid := active_gen_table.io.s1_sel_stride 1176 1177 pht.io.s2_agt_lookup := active_gen_table.io.s2_pht_lookup 1178 pht.io.agt_update := active_gen_table.io.s2_evict 1179 1180 val pht_gen_valid = pht.io.pf_gen_req.valid && io_pht_en 1181 val agt_gen_valid = active_gen_table.io.s2_pf_gen_req.valid 1182 val stride_gen_valid = stride.io.s2_gen_req.valid 1183 val pf_gen_req = Mux(agt_gen_valid || stride_gen_valid, 1184 Mux1H(Seq( 1185 agt_gen_valid -> active_gen_table.io.s2_pf_gen_req.bits, 1186 stride_gen_valid -> stride.io.s2_gen_req.bits 1187 )), 1188 pht.io.pf_gen_req.bits 1189 ) 1190 assert(!(agt_gen_valid && stride_gen_valid)) 1191 pf_filter.io.gen_req.valid := pht_gen_valid || agt_gen_valid || stride_gen_valid 1192 pf_filter.io.gen_req.bits := pf_gen_req 1193 io.tlb_req <> pf_filter.io.tlb_req 1194 val is_valid_address = pf_filter.io.l2_pf_addr.bits > 0x80000000L.U 1195 1196 io.l2_req.valid := pf_filter.io.l2_pf_addr.valid && io.enable && is_valid_address 1197 io.l2_req.bits.addr := pf_filter.io.l2_pf_addr.bits 1198 io.l2_req.bits.source := MemReqSource.Prefetch2L2SMS.id.U 1199 1200 // for now, sms will not send l1 prefetch requests 1201 io.l1_req.bits.paddr := pf_filter.io.l2_pf_addr.bits 1202 io.l1_req.bits.alias := pf_filter.io.pf_alias_bits 1203 io.l1_req.bits.is_store := true.B 1204 io.l1_req.bits.confidence := 1.U 1205 io.l1_req.bits.pf_source.value := L1_HW_PREFETCH_NULL 1206 io.l1_req.valid := false.B 1207 1208 for((train, i) <- io.ld_in.zipWithIndex){ 1209 XSPerfAccumulate(s"pf_train_miss_${i}", train.valid && train.bits.miss) 1210 XSPerfAccumulate(s"pf_train_prefetched_${i}", train.valid && isFromL1Prefetch(train.bits.meta_prefetch)) 1211 } 1212 val trace = Wire(new L1MissTrace) 1213 trace.vaddr := 0.U 1214 trace.pc := 0.U 1215 trace.paddr := io.l2_req.bits.addr 1216 trace.source := pf_filter.io.debug_source_type 1217 val table = ChiselDB.createTable("L1SMSMissTrace_hart"+ p(XSCoreParamsKey).HartId.toString, new L1MissTrace) 1218 table.log(trace, io.l2_req.fire, "SMSPrefetcher", clock, reset) 1219 1220 XSPerfAccumulate("sms_pf_gen_conflict", 1221 pht_gen_valid && agt_gen_valid 1222 ) 1223 XSPerfAccumulate("sms_pht_disabled", pht.io.pf_gen_req.valid && !io_pht_en) 1224 XSPerfAccumulate("sms_agt_disabled", active_gen_table.io.s2_pf_gen_req.valid && !io_agt_en) 1225 XSPerfAccumulate("sms_pf_real_issued", io.l2_req.valid) 1226 XSPerfAccumulate("sms_l1_req_valid", io.l1_req.valid) 1227 XSPerfAccumulate("sms_l1_req_fire", io.l1_req.fire) 1228}