1package xiangshan.mem.prefetch 2 3import chipsalliance.rocketchip.config.Parameters 4import chisel3._ 5import chisel3.util._ 6import xiangshan._ 7import utils._ 8import utility._ 9import xiangshan.cache.HasDCacheParameters 10import xiangshan.cache.mmu._ 11import xiangshan.mem.L1PrefetchReq 12import xiangshan.mem.trace._ 13 14case class SMSParams 15( 16 region_size: Int = 1024, 17 vaddr_hash_width: Int = 5, 18 block_addr_raw_width: Int = 10, 19 stride_pc_bits: Int = 10, 20 max_stride: Int = 1024, 21 stride_entries: Int = 16, 22 active_gen_table_size: Int = 16, 23 pht_size: Int = 64, 24 pht_ways: Int = 2, 25 pht_hist_bits: Int = 2, 26 pht_tag_bits: Int = 13, 27 pht_lookup_queue_size: Int = 4, 28 pf_filter_size: Int = 16 29) extends PrefetcherParams 30 31trait HasSMSModuleHelper extends HasCircularQueuePtrHelper with HasDCacheParameters 32{ this: HasXSParameter => 33 val smsParams = coreParams.prefetcher.get.asInstanceOf[SMSParams] 34 val BLK_ADDR_WIDTH = VAddrBits - log2Up(dcacheParameters.blockBytes) 35 val REGION_SIZE = smsParams.region_size 36 val REGION_BLKS = smsParams.region_size / dcacheParameters.blockBytes 37 val REGION_ADDR_BITS = VAddrBits - log2Up(REGION_SIZE) 38 val REGION_OFFSET = log2Up(REGION_BLKS) 39 val VADDR_HASH_WIDTH = smsParams.vaddr_hash_width 40 val BLK_ADDR_RAW_WIDTH = smsParams.block_addr_raw_width 41 val REGION_ADDR_RAW_WIDTH = BLK_ADDR_RAW_WIDTH - REGION_OFFSET 42 val BLK_TAG_WIDTH = BLK_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH 43 val REGION_TAG_WIDTH = REGION_ADDR_RAW_WIDTH + VADDR_HASH_WIDTH 44 val PHT_INDEX_BITS = log2Up(smsParams.pht_size / smsParams.pht_ways) 45 val PHT_TAG_BITS = smsParams.pht_tag_bits 46 val PHT_HIST_BITS = smsParams.pht_hist_bits 47 // page bit index in block addr 48 val BLOCK_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / dcacheParameters.blockBytes) 49 val REGION_ADDR_PAGE_BIT = log2Up(dcacheParameters.pageSize / smsParams.region_size) 50 val STRIDE_PC_BITS = smsParams.stride_pc_bits 51 val STRIDE_BLK_ADDR_BITS = log2Up(smsParams.max_stride) 52 53 def block_addr(x: UInt): UInt = { 54 val offset = log2Up(dcacheParameters.blockBytes) 55 x(x.getWidth - 1, offset) 56 } 57 58 def region_addr(x: UInt): UInt = { 59 val offset = log2Up(REGION_SIZE) 60 x(x.getWidth - 1, offset) 61 } 62 63 def region_offset_to_bits(off: UInt): UInt = { 64 (1.U << off).asUInt 65 } 66 67 def region_hash_tag(rg_addr: UInt): UInt = { 68 val low = rg_addr(REGION_ADDR_RAW_WIDTH - 1, 0) 69 val high = rg_addr(REGION_ADDR_RAW_WIDTH + 3 * VADDR_HASH_WIDTH - 1, REGION_ADDR_RAW_WIDTH) 70 val high_hash = vaddr_hash(high) 71 Cat(high_hash, low) 72 } 73 74 def page_bit(region_addr: UInt): UInt = { 75 region_addr(log2Up(dcacheParameters.pageSize/REGION_SIZE)) 76 } 77 78 def block_hash_tag(x: UInt): UInt = { 79 val blk_addr = block_addr(x) 80 val low = blk_addr(BLK_ADDR_RAW_WIDTH - 1, 0) 81 val high = blk_addr(BLK_ADDR_RAW_WIDTH - 1 + 3 * VADDR_HASH_WIDTH, BLK_ADDR_RAW_WIDTH) 82 val high_hash = vaddr_hash(high) 83 Cat(high_hash, low) 84 } 85 86 def vaddr_hash(x: UInt): UInt = { 87 val width = VADDR_HASH_WIDTH 88 val low = x(width - 1, 0) 89 val mid = x(2 * width - 1, width) 90 val high = x(3 * width - 1, 2 * width) 91 low ^ mid ^ high 92 } 93 94 def pht_index(pc: UInt): UInt = { 95 val low_bits = pc(PHT_INDEX_BITS, 2) 96 val hi_bit = pc(1) ^ pc(PHT_INDEX_BITS+1) 97 Cat(hi_bit, low_bits) 98 } 99 100 def pht_tag(pc: UInt): UInt = { 101 pc(PHT_INDEX_BITS + 2 + PHT_TAG_BITS - 1, PHT_INDEX_BITS + 2) 102 } 103 104 def get_alias_bits(region_vaddr: UInt): UInt = region_vaddr(7, 6) 105} 106 107class StridePF()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 108 val io = IO(new Bundle() { 109 val stride_en = Input(Bool()) 110 val s0_lookup = Flipped(new ValidIO(new Bundle() { 111 val pc = UInt(STRIDE_PC_BITS.W) 112 val vaddr = UInt(VAddrBits.W) 113 val paddr = UInt(PAddrBits.W) 114 })) 115 val s1_valid = Input(Bool()) 116 val s2_gen_req = ValidIO(new PfGenReq()) 117 }) 118 119 val prev_valid = RegNext(io.s0_lookup.valid, false.B) 120 val prev_pc = RegEnable(io.s0_lookup.bits.pc, io.s0_lookup.valid) 121 122 val s0_valid = io.s0_lookup.valid && !(prev_valid && prev_pc === io.s0_lookup.bits.pc) 123 124 def entry_map[T](fn: Int => T) = (0 until smsParams.stride_entries).map(fn) 125 126 val replacement = ReplacementPolicy.fromString("plru", smsParams.stride_entries) 127 val valids = entry_map(_ => RegInit(false.B)) 128 val entries_pc = entry_map(_ => Reg(UInt(STRIDE_PC_BITS.W)) ) 129 val entries_conf = entry_map(_ => RegInit(1.U(2.W))) 130 val entries_last_addr = entry_map(_ => Reg(UInt(STRIDE_BLK_ADDR_BITS.W)) ) 131 val entries_stride = entry_map(_ => Reg(SInt((STRIDE_BLK_ADDR_BITS+1).W))) 132 133 134 val s0_match_vec = valids.zip(entries_pc).map({ 135 case (v, pc) => v && pc === io.s0_lookup.bits.pc 136 }) 137 138 val s0_hit = s0_valid && Cat(s0_match_vec).orR 139 val s0_miss = s0_valid && !s0_hit 140 val s0_matched_conf = Mux1H(s0_match_vec, entries_conf) 141 val s0_matched_last_addr = Mux1H(s0_match_vec, entries_last_addr) 142 val s0_matched_last_stride = Mux1H(s0_match_vec, entries_stride) 143 144 145 val s1_vaddr = RegEnable(io.s0_lookup.bits.vaddr, s0_valid) 146 val s1_paddr = RegEnable(io.s0_lookup.bits.paddr, s0_valid) 147 val s1_hit = RegNext(s0_hit) && io.s1_valid 148 val s1_alloc = RegNext(s0_miss) && io.s1_valid 149 val s1_conf = RegNext(s0_matched_conf) 150 val s1_last_addr = RegNext(s0_matched_last_addr) 151 val s1_last_stride = RegNext(s0_matched_last_stride) 152 val s1_match_vec = RegNext(VecInit(s0_match_vec)) 153 154 val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes) 155 val s1_new_stride_vaddr = s1_vaddr(BLOCK_OFFSET + STRIDE_BLK_ADDR_BITS - 1, BLOCK_OFFSET) 156 val s1_new_stride = (0.U(1.W) ## s1_new_stride_vaddr).asSInt - (0.U(1.W) ## s1_last_addr).asSInt 157 val s1_stride_non_zero = s1_last_stride =/= 0.S 158 val s1_stride_match = s1_new_stride === s1_last_stride && s1_stride_non_zero 159 val s1_replace_idx = replacement.way 160 161 for(i <- 0 until smsParams.stride_entries){ 162 val alloc = s1_alloc && i.U === s1_replace_idx 163 val update = s1_hit && s1_match_vec(i) 164 when(update){ 165 assert(valids(i)) 166 entries_conf(i) := Mux(s1_stride_match, 167 Mux(s1_conf === 3.U, 3.U, s1_conf + 1.U), 168 Mux(s1_conf === 0.U, 0.U, s1_conf - 1.U) 169 ) 170 entries_last_addr(i) := s1_new_stride_vaddr 171 when(!s1_conf(1)){ 172 entries_stride(i) := s1_new_stride 173 } 174 } 175 when(alloc){ 176 valids(i) := true.B 177 entries_pc(i) := prev_pc 178 entries_conf(i) := 0.U 179 entries_last_addr(i) := s1_new_stride_vaddr 180 entries_stride(i) := 0.S 181 } 182 assert(!(update && alloc)) 183 } 184 when(s1_hit){ 185 replacement.access(OHToUInt(s1_match_vec.asUInt)) 186 }.elsewhen(s1_alloc){ 187 replacement.access(s1_replace_idx) 188 } 189 190 val s1_block_vaddr = block_addr(s1_vaddr) 191 val s1_pf_block_vaddr = (s1_block_vaddr.asSInt + s1_last_stride).asUInt 192 val s1_pf_cross_page = s1_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT) =/= s1_block_vaddr(BLOCK_ADDR_PAGE_BIT) 193 194 val s2_pf_gen_valid = RegNext(s1_hit && s1_stride_match, false.B) 195 val s2_pf_gen_paddr_valid = RegEnable(!s1_pf_cross_page, s1_hit && s1_stride_match) 196 val s2_pf_block_vaddr = RegEnable(s1_pf_block_vaddr, s1_hit && s1_stride_match) 197 val s2_block_paddr = RegEnable(block_addr(s1_paddr), s1_hit && s1_stride_match) 198 199 val s2_pf_block_addr = Mux(s2_pf_gen_paddr_valid, 200 Cat( 201 s2_block_paddr(PAddrBits - BLOCK_OFFSET - 1, BLOCK_ADDR_PAGE_BIT), 202 s2_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT - 1, 0) 203 ), 204 s2_pf_block_vaddr 205 ) 206 val s2_pf_full_addr = Wire(UInt(VAddrBits.W)) 207 s2_pf_full_addr := s2_pf_block_addr ## 0.U(BLOCK_OFFSET.W) 208 209 val s2_pf_region_addr = region_addr(s2_pf_full_addr) 210 val s2_pf_region_offset = s2_pf_block_addr(REGION_OFFSET - 1, 0) 211 212 val s2_full_vaddr = Wire(UInt(VAddrBits.W)) 213 s2_full_vaddr := s2_pf_block_vaddr ## 0.U(BLOCK_OFFSET.W) 214 215 val s2_region_tag = region_hash_tag(region_addr(s2_full_vaddr)) 216 217 io.s2_gen_req.valid := s2_pf_gen_valid && io.stride_en 218 io.s2_gen_req.bits.region_tag := s2_region_tag 219 io.s2_gen_req.bits.region_addr := s2_pf_region_addr 220 io.s2_gen_req.bits.alias_bits := get_alias_bits(region_addr(s2_full_vaddr)) 221 io.s2_gen_req.bits.region_bits := region_offset_to_bits(s2_pf_region_offset) 222 io.s2_gen_req.bits.paddr_valid := s2_pf_gen_paddr_valid 223 io.s2_gen_req.bits.decr_mode := false.B 224 io.s2_gen_req.bits.debug_source_type := HW_PREFETCH_STRIDE.U 225 226} 227 228class StridePF()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 229 val io = IO(new Bundle() { 230 val stride_en = Input(Bool()) 231 val s0_lookup = Flipped(new ValidIO(new Bundle() { 232 val pc = UInt(STRIDE_PC_BITS.W) 233 val vaddr = UInt(VAddrBits.W) 234 val paddr = UInt(PAddrBits.W) 235 })) 236 val s1_valid = Input(Bool()) 237 val s2_gen_req = ValidIO(new PfGenReq()) 238 }) 239 240 val prev_valid = RegNext(io.s0_lookup.valid, false.B) 241 val prev_pc = RegEnable(io.s0_lookup.bits.pc, io.s0_lookup.valid) 242 243 val s0_valid = io.s0_lookup.valid && !(prev_valid && prev_pc === io.s0_lookup.bits.pc) 244 245 def entry_map[T](fn: Int => T) = (0 until smsParams.stride_entries).map(fn) 246 247 val replacement = ReplacementPolicy.fromString("plru", smsParams.stride_entries) 248 val valids = entry_map(_ => RegInit(false.B)) 249 val entries_pc = entry_map(_ => Reg(UInt(STRIDE_PC_BITS.W)) ) 250 val entries_conf = entry_map(_ => RegInit(1.U(2.W))) 251 val entries_last_addr = entry_map(_ => Reg(UInt(STRIDE_BLK_ADDR_BITS.W)) ) 252 val entries_stride = entry_map(_ => Reg(SInt((STRIDE_BLK_ADDR_BITS+1).W))) 253 254 255 val s0_match_vec = valids.zip(entries_pc).map({ 256 case (v, pc) => v && pc === io.s0_lookup.bits.pc 257 }) 258 259 val s0_hit = s0_valid && Cat(s0_match_vec).orR 260 val s0_miss = s0_valid && !s0_hit 261 val s0_matched_conf = Mux1H(s0_match_vec, entries_conf) 262 val s0_matched_last_addr = Mux1H(s0_match_vec, entries_last_addr) 263 val s0_matched_last_stride = Mux1H(s0_match_vec, entries_stride) 264 265 266 val s1_vaddr = RegEnable(io.s0_lookup.bits.vaddr, s0_valid) 267 val s1_paddr = RegEnable(io.s0_lookup.bits.paddr, s0_valid) 268 val s1_hit = RegNext(s0_hit) && io.s1_valid 269 val s1_alloc = RegNext(s0_miss) && io.s1_valid 270 val s1_conf = RegNext(s0_matched_conf) 271 val s1_last_addr = RegNext(s0_matched_last_addr) 272 val s1_last_stride = RegNext(s0_matched_last_stride) 273 val s1_match_vec = RegNext(VecInit(s0_match_vec)) 274 275 val BLOCK_OFFSET = log2Up(dcacheParameters.blockBytes) 276 val s1_new_stride_vaddr = s1_vaddr(BLOCK_OFFSET + STRIDE_BLK_ADDR_BITS - 1, BLOCK_OFFSET) 277 val s1_new_stride = (0.U(1.W) ## s1_new_stride_vaddr).asSInt - (0.U(1.W) ## s1_last_addr).asSInt 278 val s1_stride_non_zero = s1_last_stride =/= 0.S 279 val s1_stride_match = s1_new_stride === s1_last_stride && s1_stride_non_zero 280 val s1_replace_idx = replacement.way 281 282 for(i <- 0 until smsParams.stride_entries){ 283 val alloc = s1_alloc && i.U === s1_replace_idx 284 val update = s1_hit && s1_match_vec(i) 285 when(update){ 286 assert(valids(i)) 287 entries_conf(i) := Mux(s1_stride_match, 288 Mux(s1_conf === 3.U, 3.U, s1_conf + 1.U), 289 Mux(s1_conf === 0.U, 0.U, s1_conf - 1.U) 290 ) 291 entries_last_addr(i) := s1_new_stride_vaddr 292 when(!s1_conf(1)){ 293 entries_stride(i) := s1_new_stride 294 } 295 } 296 when(alloc){ 297 valids(i) := true.B 298 entries_pc(i) := prev_pc 299 entries_conf(i) := 0.U 300 entries_last_addr(i) := s1_new_stride_vaddr 301 entries_stride(i) := 0.S 302 } 303 assert(!(update && alloc)) 304 } 305 when(s1_hit){ 306 replacement.access(OHToUInt(s1_match_vec.asUInt)) 307 }.elsewhen(s1_alloc){ 308 replacement.access(s1_replace_idx) 309 } 310 311 val s1_block_vaddr = block_addr(s1_vaddr) 312 val s1_pf_block_vaddr = (s1_block_vaddr.asSInt + s1_last_stride).asUInt 313 val s1_pf_cross_page = s1_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT) =/= s1_block_vaddr(BLOCK_ADDR_PAGE_BIT) 314 315 val s2_pf_gen_valid = RegNext(s1_hit && s1_stride_match, false.B) 316 val s2_pf_gen_paddr_valid = RegEnable(!s1_pf_cross_page, s1_hit && s1_stride_match) 317 val s2_pf_block_vaddr = RegEnable(s1_pf_block_vaddr, s1_hit && s1_stride_match) 318 val s2_block_paddr = RegEnable(block_addr(s1_paddr), s1_hit && s1_stride_match) 319 320 val s2_pf_block_addr = Mux(s2_pf_gen_paddr_valid, 321 Cat( 322 s2_block_paddr(PAddrBits - BLOCK_OFFSET - 1, BLOCK_ADDR_PAGE_BIT), 323 s2_pf_block_vaddr(BLOCK_ADDR_PAGE_BIT - 1, 0) 324 ), 325 s2_pf_block_vaddr 326 ) 327 val s2_pf_full_addr = Wire(UInt(VAddrBits.W)) 328 s2_pf_full_addr := s2_pf_block_addr ## 0.U(BLOCK_OFFSET.W) 329 330 val s2_pf_region_addr = region_addr(s2_pf_full_addr) 331 val s2_pf_region_offset = s2_pf_block_addr(REGION_OFFSET - 1, 0) 332 333 val s2_full_vaddr = Wire(UInt(VAddrBits.W)) 334 s2_full_vaddr := s2_pf_block_vaddr ## 0.U(BLOCK_OFFSET.W) 335 336 val s2_region_tag = region_hash_tag(region_addr(s2_full_vaddr)) 337 338 io.s2_gen_req.valid := s2_pf_gen_valid && io.stride_en 339 io.s2_gen_req.bits.region_tag := s2_region_tag 340 io.s2_gen_req.bits.region_addr := s2_pf_region_addr 341 io.s2_gen_req.bits.region_bits := region_offset_to_bits(s2_pf_region_offset) 342 io.s2_gen_req.bits.paddr_valid := s2_pf_gen_paddr_valid 343 io.s2_gen_req.bits.decr_mode := false.B 344 345} 346 347class AGTEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 348 val pht_index = UInt(PHT_INDEX_BITS.W) 349 val pht_tag = UInt(PHT_TAG_BITS.W) 350 val region_bits = UInt(REGION_BLKS.W) 351 val region_tag = UInt(REGION_TAG_WIDTH.W) 352 val region_offset = UInt(REGION_OFFSET.W) 353 val access_cnt = UInt((REGION_BLKS-1).U.getWidth.W) 354 val decr_mode = Bool() 355} 356 357class PfGenReq()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 358 val region_tag = UInt(REGION_TAG_WIDTH.W) 359 val region_addr = UInt(REGION_ADDR_BITS.W) 360 val region_bits = UInt(REGION_BLKS.W) 361 val paddr_valid = Bool() 362 val decr_mode = Bool() 363 val alias_bits = UInt(2.W) 364 val debug_source_type = UInt(log2Up(nSourceType).W) 365} 366 367class ActiveGenerationTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 368 val io = IO(new Bundle() { 369 val agt_en = Input(Bool()) 370 val s0_lookup = Flipped(ValidIO(new Bundle() { 371 val region_tag = UInt(REGION_TAG_WIDTH.W) 372 val region_p1_tag = UInt(REGION_TAG_WIDTH.W) 373 val region_m1_tag = UInt(REGION_TAG_WIDTH.W) 374 val region_offset = UInt(REGION_OFFSET.W) 375 val pht_index = UInt(PHT_INDEX_BITS.W) 376 val pht_tag = UInt(PHT_TAG_BITS.W) 377 val allow_cross_region_p1 = Bool() 378 val allow_cross_region_m1 = Bool() 379 val region_p1_cross_page = Bool() 380 val region_m1_cross_page = Bool() 381 val region_paddr = UInt(REGION_ADDR_BITS.W) 382 val region_vaddr = UInt(REGION_ADDR_BITS.W) 383 })) 384 val s1_sel_stride = Output(Bool()) 385 val s2_stride_hit = Input(Bool()) 386 // if agt/stride missed, try lookup pht 387 val s2_pht_lookup = ValidIO(new PhtLookup()) 388 // evict entry to pht 389 val s2_evict = ValidIO(new AGTEntry()) 390 val s2_pf_gen_req = ValidIO(new PfGenReq()) 391 val act_threshold = Input(UInt(REGION_OFFSET.W)) 392 val act_stride = Input(UInt(6.W)) 393 }) 394 395 val entries = Seq.fill(smsParams.active_gen_table_size){ Reg(new AGTEntry()) } 396 val valids = Seq.fill(smsParams.active_gen_table_size){ RegInit(false.B) } 397 val replacement = ReplacementPolicy.fromString("plru", smsParams.active_gen_table_size) 398 399 val s1_replace_mask_w = Wire(UInt(smsParams.active_gen_table_size.W)) 400 401 val s0_lookup = io.s0_lookup.bits 402 val s0_lookup_valid = io.s0_lookup.valid 403 404 val prev_lookup = RegEnable(s0_lookup, s0_lookup_valid) 405 val prev_lookup_valid = RegNext(s0_lookup_valid, false.B) 406 407 val s0_match_prev = prev_lookup_valid && s0_lookup.region_tag === prev_lookup.region_tag 408 409 def gen_match_vec(region_tag: UInt): Seq[Bool] = { 410 entries.zip(valids).map({ 411 case (ent, v) => v && ent.region_tag === region_tag 412 }) 413 } 414 415 val region_match_vec_s0 = gen_match_vec(s0_lookup.region_tag) 416 val region_p1_match_vec_s0 = gen_match_vec(s0_lookup.region_p1_tag) 417 val region_m1_match_vec_s0 = gen_match_vec(s0_lookup.region_m1_tag) 418 419 val any_region_match = Cat(region_match_vec_s0).orR 420 val any_region_p1_match = Cat(region_p1_match_vec_s0).orR && s0_lookup.allow_cross_region_p1 421 val any_region_m1_match = Cat(region_m1_match_vec_s0).orR && s0_lookup.allow_cross_region_m1 422 423 val s0_region_hit = any_region_match 424 val s0_cross_region_hit = any_region_m1_match || any_region_p1_match 425 val s0_alloc = s0_lookup_valid && !s0_region_hit && !s0_match_prev 426 val s0_pf_gen_match_vec = valids.indices.map(i => { 427 Mux(any_region_match, 428 region_match_vec_s0(i), 429 Mux(any_region_m1_match, 430 region_m1_match_vec_s0(i), region_p1_match_vec_s0(i) 431 ) 432 ) 433 }) 434 val s0_agt_entry = Wire(new AGTEntry()) 435 436 s0_agt_entry.pht_index := s0_lookup.pht_index 437 s0_agt_entry.pht_tag := s0_lookup.pht_tag 438 s0_agt_entry.region_bits := region_offset_to_bits(s0_lookup.region_offset) 439 s0_agt_entry.region_tag := s0_lookup.region_tag 440 s0_agt_entry.region_offset := s0_lookup.region_offset 441 s0_agt_entry.access_cnt := 1.U 442 // lookup_region + 1 == entry_region 443 // lookup_region = entry_region - 1 => decr mode 444 s0_agt_entry.decr_mode := !s0_region_hit && !any_region_m1_match && any_region_p1_match 445 val s0_replace_way = replacement.way 446 val s0_replace_mask = UIntToOH(s0_replace_way) 447 // s0 hit a entry that may be replaced in s1 448 val s0_update_conflict = Cat(VecInit(region_match_vec_s0).asUInt & s1_replace_mask_w).orR 449 val s0_update = s0_lookup_valid && s0_region_hit && !s0_update_conflict 450 451 val s0_access_way = Mux1H( 452 Seq(s0_update, s0_alloc), 453 Seq(OHToUInt(region_match_vec_s0), s0_replace_way) 454 ) 455 when(s0_update || s0_alloc) { 456 replacement.access(s0_access_way) 457 } 458 459 // stage1: update/alloc 460 // region hit, update entry 461 val s1_update = RegNext(s0_update, false.B) 462 val s1_update_mask = RegEnable(VecInit(region_match_vec_s0), s0_lookup_valid) 463 val s1_agt_entry = RegEnable(s0_agt_entry, s0_lookup_valid) 464 val s1_cross_region_match = RegNext(s0_lookup_valid && s0_cross_region_hit, false.B) 465 val s1_alloc = RegNext(s0_alloc, false.B) 466 val s1_alloc_entry = s1_agt_entry 467 val s1_replace_mask = RegEnable(s0_replace_mask, s0_lookup_valid) 468 s1_replace_mask_w := s1_replace_mask & Fill(smsParams.active_gen_table_size, s1_alloc) 469 val s1_evict_entry = Mux1H(s1_replace_mask, entries) 470 val s1_evict_valid = Mux1H(s1_replace_mask, valids) 471 // pf gen 472 val s1_pf_gen_match_vec = RegEnable(VecInit(s0_pf_gen_match_vec), s0_lookup_valid) 473 val s1_region_paddr = RegEnable(s0_lookup.region_paddr, s0_lookup_valid) 474 val s1_region_vaddr = RegEnable(s0_lookup.region_vaddr, s0_lookup_valid) 475 val s1_region_offset = RegEnable(s0_lookup.region_offset, s0_lookup_valid) 476 for(i <- entries.indices){ 477 val alloc = s1_replace_mask(i) && s1_alloc 478 val update = s1_update_mask(i) && s1_update 479 val update_entry = WireInit(entries(i)) 480 update_entry.region_bits := entries(i).region_bits | s1_agt_entry.region_bits 481 update_entry.access_cnt := Mux(entries(i).access_cnt === (REGION_BLKS - 1).U, 482 entries(i).access_cnt, 483 entries(i).access_cnt + (s1_agt_entry.region_bits & (~entries(i).region_bits).asUInt).orR 484 ) 485 valids(i) := valids(i) || alloc 486 entries(i) := Mux(alloc, s1_alloc_entry, Mux(update, update_entry, entries(i))) 487 } 488 489 when(s1_update){ 490 assert(PopCount(s1_update_mask) === 1.U, "multi-agt-update") 491 } 492 when(s1_alloc){ 493 assert(PopCount(s1_replace_mask) === 1.U, "multi-agt-alloc") 494 } 495 496 // pf_addr 497 // 1.hit => pf_addr = lookup_addr + (decr ? -1 : 1) 498 // 2.lookup region - 1 hit => lookup_addr + 1 (incr mode) 499 // 3.lookup region + 1 hit => lookup_addr - 1 (decr mode) 500 val s1_hited_entry_decr = Mux1H(s1_update_mask, entries.map(_.decr_mode)) 501 val s1_pf_gen_decr_mode = Mux(s1_update, 502 s1_hited_entry_decr, 503 s1_agt_entry.decr_mode 504 ) 505 506 val s1_pf_gen_vaddr_inc = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) + io.act_stride 507 val s1_pf_gen_vaddr_dec = Cat(0.U, s1_region_vaddr(REGION_TAG_WIDTH - 1, 0), s1_region_offset) - io.act_stride 508 val s1_vaddr_inc_cross_page = s1_pf_gen_vaddr_inc(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT) 509 val s1_vaddr_dec_cross_page = s1_pf_gen_vaddr_dec(BLOCK_ADDR_PAGE_BIT) =/= s1_region_vaddr(REGION_ADDR_PAGE_BIT) 510 val s1_vaddr_inc_cross_max_lim = s1_pf_gen_vaddr_inc.head(1).asBool 511 val s1_vaddr_dec_cross_max_lim = s1_pf_gen_vaddr_dec.head(1).asBool 512 513 //val s1_pf_gen_vaddr_p1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) + 1.U 514 //val s1_pf_gen_vaddr_m1 = s1_region_vaddr(REGION_TAG_WIDTH - 1, 0) - 1.U 515 val s1_pf_gen_vaddr = Cat( 516 s1_region_vaddr(REGION_ADDR_BITS - 1, REGION_TAG_WIDTH), 517 Mux(s1_pf_gen_decr_mode, 518 s1_pf_gen_vaddr_dec.tail(1).head(REGION_TAG_WIDTH), 519 s1_pf_gen_vaddr_inc.tail(1).head(REGION_TAG_WIDTH) 520 ) 521 ) 522 val s1_pf_gen_offset = Mux(s1_pf_gen_decr_mode, 523 s1_pf_gen_vaddr_dec(REGION_OFFSET - 1, 0), 524 s1_pf_gen_vaddr_inc(REGION_OFFSET - 1, 0) 525 ) 526 val s1_pf_gen_offset_mask = UIntToOH(s1_pf_gen_offset) 527 val s1_pf_gen_access_cnt = Mux1H(s1_pf_gen_match_vec, entries.map(_.access_cnt)) 528 val s1_in_active_page = s1_pf_gen_access_cnt > io.act_threshold 529 val s1_pf_gen_valid = prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && Mux(s1_pf_gen_decr_mode, 530 !s1_vaddr_dec_cross_max_lim, 531 !s1_vaddr_inc_cross_max_lim 532 ) && s1_in_active_page && io.agt_en 533 val s1_pf_gen_paddr_valid = Mux(s1_pf_gen_decr_mode, !s1_vaddr_dec_cross_page, !s1_vaddr_inc_cross_page) 534 val s1_pf_gen_region_addr = Mux(s1_pf_gen_paddr_valid, 535 Cat(s1_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), s1_pf_gen_vaddr(REGION_ADDR_PAGE_BIT - 1, 0)), 536 s1_pf_gen_vaddr 537 ) 538 val s1_pf_gen_region_tag = region_hash_tag(s1_pf_gen_vaddr) 539 val s1_pf_gen_incr_region_bits = VecInit((0 until REGION_BLKS).map(i => { 540 if(i == 0) true.B else !s1_pf_gen_offset_mask(i - 1, 0).orR 541 })).asUInt 542 val s1_pf_gen_decr_region_bits = VecInit((0 until REGION_BLKS).map(i => { 543 if(i == REGION_BLKS - 1) true.B 544 else !s1_pf_gen_offset_mask(REGION_BLKS - 1, i + 1).orR 545 })).asUInt 546 val s1_pf_gen_region_bits = Mux(s1_pf_gen_decr_mode, 547 s1_pf_gen_decr_region_bits, 548 s1_pf_gen_incr_region_bits 549 ) 550 val s1_pht_lookup_valid = Wire(Bool()) 551 val s1_pht_lookup = Wire(new PhtLookup()) 552 553 s1_pht_lookup_valid := !s1_pf_gen_valid && prev_lookup_valid 554 s1_pht_lookup.pht_index := s1_agt_entry.pht_index 555 s1_pht_lookup.pht_tag := s1_agt_entry.pht_tag 556 s1_pht_lookup.region_vaddr := s1_region_vaddr 557 s1_pht_lookup.region_paddr := s1_region_paddr 558 s1_pht_lookup.region_offset := s1_region_offset 559 560 io.s1_sel_stride := prev_lookup_valid && (s1_alloc && s1_cross_region_match || s1_update) && !s1_in_active_page 561 562 // stage2: gen pf reg / evict entry to pht 563 val s2_evict_entry = RegEnable(s1_evict_entry, s1_alloc) 564 val s2_evict_valid = RegNext(s1_alloc && s1_evict_valid, false.B) 565 val s2_paddr_valid = RegEnable(s1_pf_gen_paddr_valid, s1_pf_gen_valid) 566 val s2_pf_gen_region_tag = RegEnable(s1_pf_gen_region_tag, s1_pf_gen_valid) 567 val s2_pf_gen_decr_mode = RegEnable(s1_pf_gen_decr_mode, s1_pf_gen_valid) 568 val s2_pf_gen_region_paddr = RegEnable(s1_pf_gen_region_addr, s1_pf_gen_valid) 569 val s2_pf_gen_alias_bits = RegEnable(get_alias_bits(s1_pf_gen_vaddr), s1_pf_gen_valid) 570 val s2_pf_gen_region_bits = RegEnable(s1_pf_gen_region_bits, s1_pf_gen_valid) 571 val s2_pf_gen_valid = RegNext(s1_pf_gen_valid, false.B) 572 val s2_pht_lookup_valid = RegNext(s1_pht_lookup_valid, false.B) && !io.s2_stride_hit 573 val s2_pht_lookup = RegEnable(s1_pht_lookup, s1_pht_lookup_valid) 574 575 io.s2_evict.valid := s2_evict_valid 576 io.s2_evict.bits := s2_evict_entry 577 578 io.s2_pf_gen_req.bits.region_tag := s2_pf_gen_region_tag 579 io.s2_pf_gen_req.bits.region_addr := s2_pf_gen_region_paddr 580 io.s2_pf_gen_req.bits.alias_bits := s2_pf_gen_alias_bits 581 io.s2_pf_gen_req.bits.region_bits := s2_pf_gen_region_bits 582 io.s2_pf_gen_req.bits.paddr_valid := s2_paddr_valid 583 io.s2_pf_gen_req.bits.decr_mode := s2_pf_gen_decr_mode 584 io.s2_pf_gen_req.valid := s2_pf_gen_valid 585 io.s2_pf_gen_req.bits.debug_source_type := HW_PREFETCH_AGT.U 586 587 io.s2_pht_lookup.valid := s2_pht_lookup_valid 588 io.s2_pht_lookup.bits := s2_pht_lookup 589 590 XSPerfAccumulate("sms_agt_in", io.s0_lookup.valid) 591 XSPerfAccumulate("sms_agt_alloc", s1_alloc) // cross region match or filter evict 592 XSPerfAccumulate("sms_agt_update", s1_update) // entry hit 593 XSPerfAccumulate("sms_agt_pf_gen", io.s2_pf_gen_req.valid) 594 XSPerfAccumulate("sms_agt_pf_gen_paddr_valid", 595 io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.paddr_valid 596 ) 597 XSPerfAccumulate("sms_agt_pf_gen_decr_mode", 598 io.s2_pf_gen_req.valid && io.s2_pf_gen_req.bits.decr_mode 599 ) 600 for(i <- 0 until smsParams.active_gen_table_size){ 601 XSPerfAccumulate(s"sms_agt_access_entry_$i", 602 s1_alloc && s1_replace_mask(i) || s1_update && s1_update_mask(i) 603 ) 604 } 605 606} 607 608class PhtLookup()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 609 val pht_index = UInt(PHT_INDEX_BITS.W) 610 val pht_tag = UInt(PHT_TAG_BITS.W) 611 val region_paddr = UInt(REGION_ADDR_BITS.W) 612 val region_vaddr = UInt(REGION_ADDR_BITS.W) 613 val region_offset = UInt(REGION_OFFSET.W) 614} 615 616class PhtEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 617 val hist = Vec(2 * (REGION_BLKS - 1), UInt(PHT_HIST_BITS.W)) 618 val tag = UInt(PHT_TAG_BITS.W) 619 val decr_mode = Bool() 620} 621 622class PatternHistoryTable()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 623 val io = IO(new Bundle() { 624 // receive agt evicted entry 625 val agt_update = Flipped(ValidIO(new AGTEntry())) 626 // at stage2, if we know agt missed, lookup pht 627 val s2_agt_lookup = Flipped(ValidIO(new PhtLookup())) 628 // pht-generated prefetch req 629 val pf_gen_req = ValidIO(new PfGenReq()) 630 }) 631 632 val pht_ram = Module(new SRAMTemplate[PhtEntry](new PhtEntry, 633 set = smsParams.pht_size / smsParams.pht_ways, 634 way =smsParams.pht_ways, 635 singlePort = true 636 )) 637 def PHT_SETS = smsParams.pht_size / smsParams.pht_ways 638 val pht_valids = Seq.fill(smsParams.pht_ways){ 639 RegInit(VecInit(Seq.fill(PHT_SETS){false.B})) 640 } 641 val replacement = Seq.fill(PHT_SETS) { ReplacementPolicy.fromString("plru", smsParams.pht_ways) } 642 643 val lookup_queue = Module(new OverrideableQueue(new PhtLookup, smsParams.pht_lookup_queue_size)) 644 lookup_queue.io.in := io.s2_agt_lookup 645 val lookup = lookup_queue.io.out 646 647 val evict_queue = Module(new OverrideableQueue(new AGTEntry, smsParams.pht_lookup_queue_size)) 648 evict_queue.io.in := io.agt_update 649 val evict = evict_queue.io.out 650 651 XSPerfAccumulate("sms_pht_lookup_in", lookup_queue.io.in.fire) 652 XSPerfAccumulate("sms_pht_lookup_out", lookup_queue.io.out.fire) 653 XSPerfAccumulate("sms_pht_evict_in", evict_queue.io.in.fire) 654 XSPerfAccumulate("sms_pht_evict_out", evict_queue.io.out.fire) 655 656 val s3_ram_en = Wire(Bool()) 657 val s1_valid = Wire(Bool()) 658 // if s1.raddr == s2.waddr or s3 is using ram port, block s1 659 val s1_wait = Wire(Bool()) 660 // pipe s0: select an op from [lookup, update], generate ram read addr 661 val s0_valid = lookup.valid || evict.valid 662 663 evict.ready := !s1_valid || !s1_wait 664 lookup.ready := evict.ready && !evict.valid 665 666 val s0_ram_raddr = Mux(evict.valid, 667 evict.bits.pht_index, 668 lookup.bits.pht_index 669 ) 670 val s0_tag = Mux(evict.valid, evict.bits.pht_tag, lookup.bits.pht_tag) 671 val s0_region_offset = Mux(evict.valid, evict.bits.region_offset, lookup.bits.region_offset) 672 val s0_region_paddr = lookup.bits.region_paddr 673 val s0_region_vaddr = lookup.bits.region_vaddr 674 val s0_region_bits = evict.bits.region_bits 675 val s0_decr_mode = evict.bits.decr_mode 676 val s0_evict = evict.valid 677 678 // pipe s1: send addr to ram 679 val s1_valid_r = RegInit(false.B) 680 s1_valid_r := Mux(s1_valid && s1_wait, true.B, s0_valid) 681 s1_valid := s1_valid_r 682 val s1_reg_en = s0_valid && (!s1_wait || !s1_valid) 683 val s1_ram_raddr = RegEnable(s0_ram_raddr, s1_reg_en) 684 val s1_tag = RegEnable(s0_tag, s1_reg_en) 685 val s1_region_bits = RegEnable(s0_region_bits, s1_reg_en) 686 val s1_decr_mode = RegEnable(s0_decr_mode, s1_reg_en) 687 val s1_region_paddr = RegEnable(s0_region_paddr, s1_reg_en) 688 val s1_region_vaddr = RegEnable(s0_region_vaddr, s1_reg_en) 689 val s1_region_offset = RegEnable(s0_region_offset, s1_reg_en) 690 val s1_pht_valids = pht_valids.map(way => Mux1H( 691 (0 until PHT_SETS).map(i => i.U === s1_ram_raddr), 692 way 693 )) 694 val s1_evict = RegEnable(s0_evict, s1_reg_en) 695 val s1_replace_way = Mux1H( 696 (0 until PHT_SETS).map(i => i.U === s1_ram_raddr), 697 replacement.map(_.way) 698 ) 699 val s1_hist_update_mask = Cat( 700 Fill(REGION_BLKS - 1, true.B), 0.U((REGION_BLKS - 1).W) 701 ) >> s1_region_offset 702 val s1_hist_bits = Cat( 703 s1_region_bits.head(REGION_BLKS - 1) >> s1_region_offset, 704 (Cat( 705 s1_region_bits.tail(1), 0.U((REGION_BLKS - 1).W) 706 ) >> s1_region_offset)(REGION_BLKS - 2, 0) 707 ) 708 709 // pipe s2: generate ram write addr/data 710 val s2_valid = RegNext(s1_valid && !s1_wait, false.B) 711 val s2_reg_en = s1_valid && !s1_wait 712 val s2_hist_update_mask = RegEnable(s1_hist_update_mask, s2_reg_en) 713 val s2_hist_bits = RegEnable(s1_hist_bits, s2_reg_en) 714 val s2_tag = RegEnable(s1_tag, s2_reg_en) 715 val s2_region_bits = RegEnable(s1_region_bits, s2_reg_en) 716 val s2_decr_mode = RegEnable(s1_decr_mode, s2_reg_en) 717 val s2_region_paddr = RegEnable(s1_region_paddr, s2_reg_en) 718 val s2_region_vaddr = RegEnable(s1_region_vaddr, s2_reg_en) 719 val s2_region_offset = RegEnable(s1_region_offset, s2_reg_en) 720 val s2_region_offset_mask = region_offset_to_bits(s2_region_offset) 721 val s2_evict = RegEnable(s1_evict, s2_reg_en) 722 val s2_pht_valids = s1_pht_valids.map(v => RegEnable(v, s2_reg_en)) 723 val s2_replace_way = RegEnable(s1_replace_way, s2_reg_en) 724 val s2_ram_waddr = RegEnable(s1_ram_raddr, s2_reg_en) 725 val s2_ram_rdata = pht_ram.io.r.resp.data 726 val s2_ram_rtags = s2_ram_rdata.map(_.tag) 727 val s2_tag_match_vec = s2_ram_rtags.map(t => t === s2_tag) 728 val s2_hit_vec = s2_tag_match_vec.zip(s2_pht_valids).map({ 729 case (tag_match, v) => v && tag_match 730 }) 731 val s2_hist_update = s2_ram_rdata.map(way => VecInit(way.hist.zipWithIndex.map({ 732 case (h, i) => 733 val do_update = s2_hist_update_mask(i) 734 val hist_updated = Mux(s2_hist_bits(i), 735 Mux(h.andR, h, h + 1.U), 736 Mux(h === 0.U, 0.U, h - 1.U) 737 ) 738 Mux(do_update, hist_updated, h) 739 }))) 740 val s2_hist_pf_gen = Mux1H(s2_hit_vec, s2_ram_rdata.map(way => VecInit(way.hist.map(_.head(1))).asUInt)) 741 val s2_new_hist = VecInit(s2_hist_bits.asBools.map(b => Cat(0.U((PHT_HIST_BITS - 1).W), b))) 742 val s2_pht_hit = Cat(s2_hit_vec).orR 743 val s2_hist = Mux(s2_pht_hit, Mux1H(s2_hit_vec, s2_hist_update), s2_new_hist) 744 val s2_repl_way_mask = UIntToOH(s2_replace_way) 745 746 // pipe s3: send addr/data to ram, gen pf_req 747 val s3_valid = RegNext(s2_valid, false.B) 748 val s3_evict = RegEnable(s2_evict, s2_valid) 749 val s3_hist = RegEnable(s2_hist, s2_valid) 750 val s3_hist_pf_gen = RegEnable(s2_hist_pf_gen, s2_valid) 751 val s3_hist_update_mask = RegEnable(s2_hist_update_mask.asUInt, s2_valid) 752 val s3_region_offset = RegEnable(s2_region_offset, s2_valid) 753 val s3_region_offset_mask = RegEnable(s2_region_offset_mask, s2_valid) 754 val s3_decr_mode = RegEnable(s2_decr_mode, s2_valid) 755 val s3_region_paddr = RegEnable(s2_region_paddr, s2_valid) 756 val s3_region_vaddr = RegEnable(s2_region_vaddr, s2_valid) 757 val s3_pht_tag = RegEnable(s2_tag, s2_valid) 758 val s3_hit_vec = s2_hit_vec.map(h => RegEnable(h, s2_valid)) 759 val s3_hit = Cat(s3_hit_vec).orR 760 val s3_hit_way = OHToUInt(s3_hit_vec) 761 val s3_repl_way = RegEnable(s2_replace_way, s2_valid) 762 val s3_repl_way_mask = RegEnable(s2_repl_way_mask, s2_valid) 763 val s3_repl_update_mask = RegEnable(VecInit((0 until PHT_SETS).map(i => i.U === s2_ram_waddr)), s2_valid) 764 val s3_ram_waddr = RegEnable(s2_ram_waddr, s2_valid) 765 s3_ram_en := s3_valid && s3_evict 766 val s3_ram_wdata = Wire(new PhtEntry()) 767 s3_ram_wdata.hist := s3_hist 768 s3_ram_wdata.tag := s3_pht_tag 769 s3_ram_wdata.decr_mode := s3_decr_mode 770 771 s1_wait := (s2_valid && s2_evict && s2_ram_waddr === s1_ram_raddr) || s3_ram_en 772 773 for((valids, way_idx) <- pht_valids.zipWithIndex){ 774 val update_way = s3_repl_way_mask(way_idx) 775 for((v, set_idx) <- valids.zipWithIndex){ 776 val update_set = s3_repl_update_mask(set_idx) 777 when(s3_valid && s3_evict && !s3_hit && update_set && update_way){ 778 v := true.B 779 } 780 } 781 } 782 for((r, i) <- replacement.zipWithIndex){ 783 when(s3_valid && s3_repl_update_mask(i)){ 784 when(s3_hit){ 785 r.access(s3_hit_way) 786 }.elsewhen(s3_evict){ 787 r.access(s3_repl_way) 788 } 789 } 790 } 791 792 val s3_way_mask = Mux(s3_hit, 793 VecInit(s3_hit_vec).asUInt, 794 s3_repl_way_mask, 795 ).asUInt 796 797 pht_ram.io.r( 798 s1_valid, s1_ram_raddr 799 ) 800 pht_ram.io.w( 801 s3_ram_en, s3_ram_wdata, s3_ram_waddr, s3_way_mask 802 ) 803 804 when(s3_valid && s3_hit){ 805 assert(!Cat(s3_hit_vec).andR, "sms_pht: multi-hit!") 806 } 807 808 // generate pf req if hit 809 val s3_hist_hi = s3_hist_pf_gen.head(REGION_BLKS - 1) 810 val s3_hist_lo = s3_hist_pf_gen.tail(REGION_BLKS - 1) 811 val s3_hist_hi_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_hi) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0) 812 val s3_hist_lo_shifted = (Cat(0.U((REGION_BLKS - 1).W), s3_hist_lo) << s3_region_offset)(2 * (REGION_BLKS - 1) - 1, 0) 813 val s3_cur_region_bits = Cat(s3_hist_hi_shifted.tail(REGION_BLKS - 1), 0.U(1.W)) | 814 Cat(0.U(1.W), s3_hist_lo_shifted.head(REGION_BLKS - 1)) 815 val s3_incr_region_bits = Cat(0.U(1.W), s3_hist_hi_shifted.head(REGION_BLKS - 1)) 816 val s3_decr_region_bits = Cat(s3_hist_lo_shifted.tail(REGION_BLKS - 1), 0.U(1.W)) 817 val s3_pf_gen_valid = s3_valid && s3_hit && !s3_evict 818 val s3_cur_region_valid = s3_pf_gen_valid && (s3_hist_pf_gen & s3_hist_update_mask).orR 819 val s3_incr_region_valid = s3_pf_gen_valid && (s3_hist_hi & (~s3_hist_update_mask.head(REGION_BLKS - 1)).asUInt).orR 820 val s3_decr_region_valid = s3_pf_gen_valid && (s3_hist_lo & (~s3_hist_update_mask.tail(REGION_BLKS - 1)).asUInt).orR 821 val s3_incr_region_vaddr = s3_region_vaddr + 1.U 822 val s3_incr_alias_bits = get_alias_bits(s3_incr_region_vaddr) 823 val s3_decr_region_vaddr = s3_region_vaddr - 1.U 824 val s3_decr_alias_bits = get_alias_bits(s3_decr_region_vaddr) 825 val s3_incr_region_paddr = Cat( 826 s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), 827 s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0) 828 ) 829 val s3_decr_region_paddr = Cat( 830 s3_region_paddr(REGION_ADDR_BITS - 1, REGION_ADDR_PAGE_BIT), 831 s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT - 1, 0) 832 ) 833 val s3_incr_crosspage = s3_incr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT) 834 val s3_decr_crosspage = s3_decr_region_vaddr(REGION_ADDR_PAGE_BIT) =/= s3_region_vaddr(REGION_ADDR_PAGE_BIT) 835 val s3_cur_region_tag = region_hash_tag(s3_region_vaddr) 836 val s3_incr_region_tag = region_hash_tag(s3_incr_region_vaddr) 837 val s3_decr_region_tag = region_hash_tag(s3_decr_region_vaddr) 838 839 val pf_gen_req_arb = Module(new Arbiter(new PfGenReq, 3)) 840 val s4_pf_gen_cur_region_valid = RegInit(false.B) 841 val s4_pf_gen_cur_region = Reg(new PfGenReq) 842 val s4_pf_gen_incr_region_valid = RegInit(false.B) 843 val s4_pf_gen_incr_region = Reg(new PfGenReq) 844 val s4_pf_gen_decr_region_valid = RegInit(false.B) 845 val s4_pf_gen_decr_region = Reg(new PfGenReq) 846 847 s4_pf_gen_cur_region_valid := s3_cur_region_valid 848 when(s3_cur_region_valid){ 849 s4_pf_gen_cur_region.region_addr := s3_region_paddr 850 s4_pf_gen_cur_region.alias_bits := get_alias_bits(s3_region_vaddr) 851 s4_pf_gen_cur_region.region_tag := s3_cur_region_tag 852 s4_pf_gen_cur_region.region_bits := s3_cur_region_bits 853 s4_pf_gen_cur_region.paddr_valid := true.B 854 s4_pf_gen_cur_region.decr_mode := false.B 855 } 856 s4_pf_gen_incr_region_valid := s3_incr_region_valid || 857 (!pf_gen_req_arb.io.in(1).ready && s4_pf_gen_incr_region_valid) 858 when(s3_incr_region_valid){ 859 s4_pf_gen_incr_region.region_addr := Mux(s3_incr_crosspage, s3_incr_region_vaddr, s3_incr_region_paddr) 860 s4_pf_gen_incr_region.alias_bits := s3_incr_alias_bits 861 s4_pf_gen_incr_region.region_tag := s3_incr_region_tag 862 s4_pf_gen_incr_region.region_bits := s3_incr_region_bits 863 s4_pf_gen_incr_region.paddr_valid := !s3_incr_crosspage 864 s4_pf_gen_incr_region.decr_mode := false.B 865 } 866 s4_pf_gen_decr_region_valid := s3_decr_region_valid || 867 (!pf_gen_req_arb.io.in(2).ready && s4_pf_gen_decr_region_valid) 868 when(s3_decr_region_valid){ 869 s4_pf_gen_decr_region.region_addr := Mux(s3_decr_crosspage, s3_decr_region_vaddr, s3_decr_region_paddr) 870 s4_pf_gen_decr_region.alias_bits := s3_decr_alias_bits 871 s4_pf_gen_decr_region.region_tag := s3_decr_region_tag 872 s4_pf_gen_decr_region.region_bits := s3_decr_region_bits 873 s4_pf_gen_decr_region.paddr_valid := !s3_decr_crosspage 874 s4_pf_gen_decr_region.decr_mode := true.B 875 } 876 877 pf_gen_req_arb.io.in.head.valid := s4_pf_gen_cur_region_valid 878 pf_gen_req_arb.io.in.head.bits := s4_pf_gen_cur_region 879 pf_gen_req_arb.io.in.head.bits.debug_source_type := HW_PREFETCH_PHT_CUR.U 880 pf_gen_req_arb.io.in(1).valid := s4_pf_gen_incr_region_valid 881 pf_gen_req_arb.io.in(1).bits := s4_pf_gen_incr_region 882 pf_gen_req_arb.io.in(1).bits.debug_source_type := HW_PREFETCH_PHT_INC.U 883 pf_gen_req_arb.io.in(2).valid := s4_pf_gen_decr_region_valid 884 pf_gen_req_arb.io.in(2).bits := s4_pf_gen_decr_region 885 pf_gen_req_arb.io.in(2).bits.debug_source_type := HW_PREFETCH_PHT_DEC.U 886 pf_gen_req_arb.io.out.ready := true.B 887 888 io.pf_gen_req.valid := pf_gen_req_arb.io.out.valid 889 io.pf_gen_req.bits := pf_gen_req_arb.io.out.bits 890 891 XSPerfAccumulate("sms_pht_update", io.agt_update.valid) 892 XSPerfAccumulate("sms_pht_update_hit", s2_valid && s2_evict && s2_pht_hit) 893 XSPerfAccumulate("sms_pht_lookup", io.s2_agt_lookup.valid) 894 XSPerfAccumulate("sms_pht_lookup_hit", s2_valid && !s2_evict && s2_pht_hit) 895 for(i <- 0 until smsParams.pht_ways){ 896 XSPerfAccumulate(s"sms_pht_write_way_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.waymask.get(i)) 897 } 898 for(i <- 0 until PHT_SETS){ 899 XSPerfAccumulate(s"sms_pht_write_set_$i", pht_ram.io.w.req.fire && pht_ram.io.w.req.bits.setIdx === i.U) 900 } 901 XSPerfAccumulate(s"sms_pht_pf_gen", io.pf_gen_req.valid) 902} 903 904class PrefetchFilterEntry()(implicit p: Parameters) extends XSBundle with HasSMSModuleHelper { 905 val region_tag = UInt(REGION_TAG_WIDTH.W) 906 val region_addr = UInt(REGION_ADDR_BITS.W) 907 val region_bits = UInt(REGION_BLKS.W) 908 val filter_bits = UInt(REGION_BLKS.W) 909 val alias_bits = UInt(2.W) 910 val paddr_valid = Bool() 911 val decr_mode = Bool() 912 val debug_source_type = UInt(log2Up(nSourceType).W) 913} 914 915class PrefetchFilter()(implicit p: Parameters) extends XSModule with HasSMSModuleHelper { 916 val io = IO(new Bundle() { 917 val gen_req = Flipped(ValidIO(new PfGenReq())) 918 val tlb_req = new TlbRequestIO(2) 919 val l2_pf_addr = ValidIO(UInt(PAddrBits.W)) 920 val pf_alias_bits = Output(UInt(2.W)) 921 val debug_source_type = Output(UInt(log2Up(nSourceType).W)) 922 }) 923 val entries = Seq.fill(smsParams.pf_filter_size){ Reg(new PrefetchFilterEntry()) } 924 val valids = Seq.fill(smsParams.pf_filter_size){ RegInit(false.B) } 925 val replacement = ReplacementPolicy.fromString("plru", smsParams.pf_filter_size) 926 927 val prev_valid = RegNext(io.gen_req.valid, false.B) 928 val prev_gen_req = RegEnable(io.gen_req.bits, io.gen_req.valid) 929 930 val tlb_req_arb = Module(new RRArbiterInit(new TlbReq, smsParams.pf_filter_size)) 931 val pf_req_arb = Module(new RRArbiterInit(UInt(PAddrBits.W), smsParams.pf_filter_size)) 932 933 io.tlb_req.req <> tlb_req_arb.io.out 934 io.tlb_req.resp.ready := true.B 935 io.tlb_req.req_kill := false.B 936 io.l2_pf_addr.valid := pf_req_arb.io.out.valid 937 io.l2_pf_addr.bits := pf_req_arb.io.out.bits 938 io.pf_alias_bits := Mux1H(entries.zipWithIndex.map({ 939 case (entry, i) => (i.U === pf_req_arb.io.chosen) -> entry.alias_bits 940 })) 941 pf_req_arb.io.out.ready := true.B 942 943 io.debug_source_type := VecInit(entries.map(_.debug_source_type))(pf_req_arb.io.chosen) 944 945 val s1_valid = Wire(Bool()) 946 val s1_hit = Wire(Bool()) 947 val s1_replace_vec = Wire(UInt(smsParams.pf_filter_size.W)) 948 val s1_tlb_fire_vec = Wire(UInt(smsParams.pf_filter_size.W)) 949 950 // s0: entries lookup 951 val s0_gen_req = io.gen_req.bits 952 val s0_match_prev = prev_valid && (s0_gen_req.region_tag === prev_gen_req.region_tag) 953 val s0_gen_req_valid = io.gen_req.valid && !s0_match_prev 954 val s0_match_vec = valids.indices.map(i => { 955 valids(i) && entries(i).region_tag === s0_gen_req.region_tag && !(s1_valid && !s1_hit && s1_replace_vec(i)) 956 }) 957 val s0_any_matched = Cat(s0_match_vec).orR 958 val s0_replace_vec = UIntToOH(replacement.way) 959 val s0_hit = s0_gen_req_valid && s0_any_matched 960 961 for(((v, ent), i) <- valids.zip(entries).zipWithIndex){ 962 val is_evicted = s1_valid && s1_replace_vec(i) 963 tlb_req_arb.io.in(i).valid := v && !s1_tlb_fire_vec(i) && !ent.paddr_valid && !is_evicted 964 tlb_req_arb.io.in(i).bits.vaddr := Cat(ent.region_addr, 0.U(log2Up(REGION_SIZE).W)) 965 tlb_req_arb.io.in(i).bits.cmd := TlbCmd.read 966 tlb_req_arb.io.in(i).bits.size := 3.U 967 tlb_req_arb.io.in(i).bits.debug.robIdx := DontCare 968 tlb_req_arb.io.in(i).bits.no_translate := false.B 969 tlb_req_arb.io.in(i).bits.debug := DontCare 970 971 val pending_req_vec = ent.region_bits & (~ent.filter_bits).asUInt 972 val first_one_offset = PriorityMux( 973 pending_req_vec.asBools, 974 (0 until smsParams.pf_filter_size).map(_.U(REGION_OFFSET.W)) 975 ) 976 val last_one_offset = PriorityMux( 977 pending_req_vec.asBools.reverse, 978 (0 until smsParams.pf_filter_size).reverse.map(_.U(REGION_OFFSET.W)) 979 ) 980 val pf_addr = Cat( 981 ent.region_addr, 982 Mux(ent.decr_mode, last_one_offset, first_one_offset), 983 0.U(log2Up(dcacheParameters.blockBytes).W) 984 ) 985 pf_req_arb.io.in(i).valid := v && Cat(pending_req_vec).orR && ent.paddr_valid && !is_evicted 986 pf_req_arb.io.in(i).bits := pf_addr 987 } 988 989 val s0_tlb_fire_vec = VecInit(tlb_req_arb.io.in.map(_.fire)) 990 val s0_pf_fire_vec = VecInit(pf_req_arb.io.in.map(_.fire)) 991 992 val s0_update_way = OHToUInt(s0_match_vec) 993 val s0_replace_way = replacement.way 994 val s0_access_way = Mux(s0_any_matched, s0_update_way, s0_replace_way) 995 when(s0_gen_req_valid){ 996 replacement.access(s0_access_way) 997 } 998 999 // s1: update or alloc 1000 val s1_valid_r = RegNext(s0_gen_req_valid, false.B) 1001 val s1_hit_r = RegEnable(s0_hit, false.B, s0_gen_req_valid) 1002 val s1_gen_req = RegEnable(s0_gen_req, s0_gen_req_valid) 1003 val s1_replace_vec_r = RegEnable(s0_replace_vec, s0_gen_req_valid && !s0_hit) 1004 val s1_update_vec = RegEnable(VecInit(s0_match_vec).asUInt, s0_gen_req_valid && s0_hit) 1005 val s1_tlb_fire_vec_r = RegNext(s0_tlb_fire_vec, 0.U.asTypeOf(s0_tlb_fire_vec)) 1006 val s1_alloc_entry = Wire(new PrefetchFilterEntry()) 1007 s1_valid := s1_valid_r 1008 s1_hit := s1_hit_r 1009 s1_replace_vec := s1_replace_vec_r 1010 s1_tlb_fire_vec := s1_tlb_fire_vec_r.asUInt 1011 s1_alloc_entry.region_tag := s1_gen_req.region_tag 1012 s1_alloc_entry.region_addr := s1_gen_req.region_addr 1013 s1_alloc_entry.region_bits := s1_gen_req.region_bits 1014 s1_alloc_entry.paddr_valid := s1_gen_req.paddr_valid 1015 s1_alloc_entry.decr_mode := s1_gen_req.decr_mode 1016 s1_alloc_entry.filter_bits := 0.U 1017 s1_alloc_entry.alias_bits := s1_gen_req.alias_bits 1018 s1_alloc_entry.debug_source_type := s1_gen_req.debug_source_type 1019 for(((v, ent), i) <- valids.zip(entries).zipWithIndex){ 1020 val alloc = s1_valid && !s1_hit && s1_replace_vec(i) 1021 val update = s1_valid && s1_hit && s1_update_vec(i) 1022 // for pf: use s0 data 1023 val pf_fired = s0_pf_fire_vec(i) 1024 val tlb_fired = s1_tlb_fire_vec(i) && !io.tlb_req.resp.bits.miss 1025 when(tlb_fired){ 1026 ent.paddr_valid := !io.tlb_req.resp.bits.miss 1027 ent.region_addr := region_addr(io.tlb_req.resp.bits.paddr.head) 1028 } 1029 when(update){ 1030 ent.region_bits := ent.region_bits | s1_gen_req.region_bits 1031 } 1032 when(pf_fired){ 1033 val curr_bit = UIntToOH(block_addr(pf_req_arb.io.in(i).bits)(REGION_OFFSET - 1, 0)) 1034 ent.filter_bits := ent.filter_bits | curr_bit 1035 } 1036 when(alloc){ 1037 ent := s1_alloc_entry 1038 v := true.B 1039 } 1040 } 1041 when(s1_valid && s1_hit){ 1042 assert(PopCount(s1_update_vec) === 1.U, "sms_pf_filter: multi-hit") 1043 } 1044 1045 XSPerfAccumulate("sms_pf_filter_recv_req", io.gen_req.valid) 1046 XSPerfAccumulate("sms_pf_filter_hit", s1_valid && s1_hit) 1047 XSPerfAccumulate("sms_pf_filter_tlb_req", io.tlb_req.req.fire) 1048 XSPerfAccumulate("sms_pf_filter_tlb_resp_miss", io.tlb_req.resp.fire && io.tlb_req.resp.bits.miss) 1049 for(i <- 0 until smsParams.pf_filter_size){ 1050 XSPerfAccumulate(s"sms_pf_filter_access_way_$i", s0_gen_req_valid && s0_access_way === i.U) 1051 } 1052 XSPerfAccumulate("sms_pf_filter_l2_req", io.l2_pf_addr.valid) 1053} 1054 1055class SMSPrefetcher()(implicit p: Parameters) extends BasePrefecher with HasSMSModuleHelper { 1056 1057 require(exuParameters.LduCnt == 2) 1058 1059 val io_agt_en = IO(Input(Bool())) 1060 val io_stride_en = IO(Input(Bool())) 1061 val io_pht_en = IO(Input(Bool())) 1062 val io_act_threshold = IO(Input(UInt(REGION_OFFSET.W))) 1063 val io_act_stride = IO(Input(UInt(6.W))) 1064 1065 val ld_curr = io.ld_in.map(_.bits) 1066 val ld_curr_block_tag = ld_curr.map(x => block_hash_tag(x.vaddr)) 1067 1068 // block filter 1069 val ld_prev = io.ld_in.map(ld => RegEnable(ld.bits, ld.valid)) 1070 val ld_prev_block_tag = ld_curr_block_tag.zip(io.ld_in.map(_.valid)).map({ 1071 case (tag, v) => RegEnable(tag, v) 1072 }) 1073 val ld_prev_vld = io.ld_in.map(ld => RegNext(ld.valid, false.B)) 1074 1075 val ld_curr_match_prev = ld_curr_block_tag.map(cur_tag => 1076 Cat(ld_prev_block_tag.zip(ld_prev_vld).map({ 1077 case (prev_tag, prev_vld) => prev_vld && prev_tag === cur_tag 1078 })).orR 1079 ) 1080 val ld0_match_ld1 = io.ld_in.head.valid && io.ld_in.last.valid && ld_curr_block_tag.head === ld_curr_block_tag.last 1081 val ld_curr_vld = Seq( 1082 io.ld_in.head.valid && !ld_curr_match_prev.head, 1083 io.ld_in.last.valid && !ld_curr_match_prev.last && !ld0_match_ld1 1084 ) 1085 val ld0_older_than_ld1 = Cat(ld_curr_vld).andR && isBefore(ld_curr.head.uop.robIdx, ld_curr.last.uop.robIdx) 1086 val pending_vld = RegNext(Cat(ld_curr_vld).andR, false.B) 1087 val pending_sel_ld0 = RegNext(Mux(pending_vld, ld0_older_than_ld1, !ld0_older_than_ld1)) 1088 val pending_ld = Mux(pending_sel_ld0, ld_prev.head, ld_prev.last) 1089 val pending_ld_block_tag = Mux(pending_sel_ld0, ld_prev_block_tag.head, ld_prev_block_tag.last) 1090 val oldest_ld = Mux(pending_vld, 1091 pending_ld, 1092 Mux(ld0_older_than_ld1 || !ld_curr_vld.last, ld_curr.head, ld_curr.last) 1093 ) 1094 1095 val train_ld = RegEnable(oldest_ld, pending_vld || Cat(ld_curr_vld).orR) 1096 1097 val train_block_tag = block_hash_tag(train_ld.vaddr) 1098 val train_region_tag = train_block_tag.head(REGION_TAG_WIDTH) 1099 1100 val train_region_addr_raw = region_addr(train_ld.vaddr)(REGION_TAG_WIDTH + 2 * VADDR_HASH_WIDTH - 1, 0) 1101 val train_region_addr_p1 = Cat(0.U(1.W), train_region_addr_raw) + 1.U 1102 val train_region_addr_m1 = Cat(0.U(1.W), train_region_addr_raw) - 1.U 1103 // addr_p1 or addr_m1 is valid? 1104 val train_allow_cross_region_p1 = !train_region_addr_p1.head(1).asBool 1105 val train_allow_cross_region_m1 = !train_region_addr_m1.head(1).asBool 1106 1107 val train_region_p1_tag = region_hash_tag(train_region_addr_p1.tail(1)) 1108 val train_region_m1_tag = region_hash_tag(train_region_addr_m1.tail(1)) 1109 1110 val train_region_p1_cross_page = page_bit(train_region_addr_p1) ^ page_bit(train_region_addr_raw) 1111 val train_region_m1_cross_page = page_bit(train_region_addr_m1) ^ page_bit(train_region_addr_raw) 1112 1113 val train_region_paddr = region_addr(train_ld.paddr) 1114 val train_region_vaddr = region_addr(train_ld.vaddr) 1115 val train_region_offset = train_block_tag(REGION_OFFSET - 1, 0) 1116 val train_vld = RegNext(pending_vld || Cat(ld_curr_vld).orR, false.B) 1117 1118 1119 // prefetch stage0 1120 val active_gen_table = Module(new ActiveGenerationTable()) 1121 val stride = Module(new StridePF()) 1122 val pht = Module(new PatternHistoryTable()) 1123 val pf_filter = Module(new PrefetchFilter()) 1124 1125 val train_vld_s0 = RegNext(train_vld, false.B) 1126 val train_s0 = RegEnable(train_ld, train_vld) 1127 val train_region_tag_s0 = RegEnable(train_region_tag, train_vld) 1128 val train_region_p1_tag_s0 = RegEnable(train_region_p1_tag, train_vld) 1129 val train_region_m1_tag_s0 = RegEnable(train_region_m1_tag, train_vld) 1130 val train_allow_cross_region_p1_s0 = RegEnable(train_allow_cross_region_p1, train_vld) 1131 val train_allow_cross_region_m1_s0 = RegEnable(train_allow_cross_region_m1, train_vld) 1132 val train_pht_tag_s0 = RegEnable(pht_tag(train_ld.uop.cf.pc), train_vld) 1133 val train_pht_index_s0 = RegEnable(pht_index(train_ld.uop.cf.pc), train_vld) 1134 val train_region_offset_s0 = RegEnable(train_region_offset, train_vld) 1135 val train_region_p1_cross_page_s0 = RegEnable(train_region_p1_cross_page, train_vld) 1136 val train_region_m1_cross_page_s0 = RegEnable(train_region_m1_cross_page, train_vld) 1137 val train_region_paddr_s0 = RegEnable(train_region_paddr, train_vld) 1138 val train_region_vaddr_s0 = RegEnable(train_region_vaddr, train_vld) 1139 1140 active_gen_table.io.agt_en := io_agt_en 1141 active_gen_table.io.act_threshold := io_act_threshold 1142 active_gen_table.io.act_stride := io_act_stride 1143 active_gen_table.io.s0_lookup.valid := train_vld_s0 1144 active_gen_table.io.s0_lookup.bits.region_tag := train_region_tag_s0 1145 active_gen_table.io.s0_lookup.bits.region_p1_tag := train_region_p1_tag_s0 1146 active_gen_table.io.s0_lookup.bits.region_m1_tag := train_region_m1_tag_s0 1147 active_gen_table.io.s0_lookup.bits.region_offset := train_region_offset_s0 1148 active_gen_table.io.s0_lookup.bits.pht_index := train_pht_index_s0 1149 active_gen_table.io.s0_lookup.bits.pht_tag := train_pht_tag_s0 1150 active_gen_table.io.s0_lookup.bits.allow_cross_region_p1 := train_allow_cross_region_p1_s0 1151 active_gen_table.io.s0_lookup.bits.allow_cross_region_m1 := train_allow_cross_region_m1_s0 1152 active_gen_table.io.s0_lookup.bits.region_p1_cross_page := train_region_p1_cross_page_s0 1153 active_gen_table.io.s0_lookup.bits.region_m1_cross_page := train_region_m1_cross_page_s0 1154 active_gen_table.io.s0_lookup.bits.region_paddr := train_region_paddr_s0 1155 active_gen_table.io.s0_lookup.bits.region_vaddr := train_region_vaddr_s0 1156 active_gen_table.io.s2_stride_hit := stride.io.s2_gen_req.valid 1157 1158 stride.io.stride_en := io_stride_en 1159 stride.io.s0_lookup.valid := train_vld_s0 1160 stride.io.s0_lookup.bits.pc := train_s0.uop.cf.pc(STRIDE_PC_BITS - 1, 0) 1161 stride.io.s0_lookup.bits.vaddr := Cat( 1162 train_region_vaddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W) 1163 ) 1164 stride.io.s0_lookup.bits.paddr := Cat( 1165 train_region_paddr_s0, train_region_offset_s0, 0.U(log2Up(dcacheParameters.blockBytes).W) 1166 ) 1167 stride.io.s1_valid := active_gen_table.io.s1_sel_stride 1168 1169 pht.io.s2_agt_lookup := active_gen_table.io.s2_pht_lookup 1170 pht.io.agt_update := active_gen_table.io.s2_evict 1171 1172 val pht_gen_valid = pht.io.pf_gen_req.valid && io_pht_en 1173 val agt_gen_valid = active_gen_table.io.s2_pf_gen_req.valid 1174 val stride_gen_valid = stride.io.s2_gen_req.valid 1175 val pf_gen_req = Mux(agt_gen_valid || stride_gen_valid, 1176 Mux1H(Seq( 1177 agt_gen_valid -> active_gen_table.io.s2_pf_gen_req.bits, 1178 stride_gen_valid -> stride.io.s2_gen_req.bits 1179 )), 1180 pht.io.pf_gen_req.bits 1181 ) 1182 assert(!(agt_gen_valid && stride_gen_valid)) 1183 pf_filter.io.gen_req.valid := pht_gen_valid || agt_gen_valid || stride_gen_valid 1184 pf_filter.io.gen_req.bits := pf_gen_req 1185 io.tlb_req <> pf_filter.io.tlb_req 1186 val is_valid_address = pf_filter.io.l2_pf_addr.bits > 0x80000000L.U 1187 io.pf_addr.valid := pf_filter.io.l2_pf_addr.valid && io.enable && is_valid_address 1188 io.pf_addr.bits := pf_filter.io.l2_pf_addr.bits 1189 io.l1_req.bits.paddr := pf_filter.io.l2_pf_addr.bits 1190 io.l1_req.bits.alias := pf_filter.io.pf_alias_bits 1191 io.l1_req.bits.is_store := true.B 1192 io.l1_req.bits.confidence := 1.U 1193 io.l1_req.valid := false.B 1194 1195 for((train, i) <- io.ld_in.zipWithIndex){ 1196 XSPerfAccumulate(s"pf_train_miss_${i}", train.valid && train.bits.miss) 1197 XSPerfAccumulate(s"pf_train_prefetched_${i}", train.valid && train.bits.meta_prefetch) 1198 } 1199 val trace = Wire(new L1MissTrace) 1200 trace.vaddr := 0.U 1201 trace.pc := 0.U 1202 trace.paddr := io.pf_addr.bits 1203 trace.source := pf_filter.io.debug_source_type 1204 val table = ChiselDB.createTable("L1MissTrace", new L1MissTrace) 1205 table.log(trace, io.pf_addr.fire, "SMSPrefetcher", clock, reset) 1206 1207 XSPerfAccumulate("sms_pf_gen_conflict", 1208 pht_gen_valid && agt_gen_valid 1209 ) 1210 XSPerfAccumulate("sms_pht_disabled", pht.io.pf_gen_req.valid && !io_pht_en) 1211 XSPerfAccumulate("sms_agt_disabled", active_gen_table.io.s2_pf_gen_req.valid && !io_agt_en) 1212 XSPerfAccumulate("sms_pf_real_issued", io.pf_addr.valid) 1213 XSPerfAccumulate("sms_l1_req_valid", io.l1_req.valid) 1214 XSPerfAccumulate("sms_l1_req_fire", io.l1_req.fire) 1215}