1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem.prefetch 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import freechips.rocketchip.tilelink.ClientStates._ 23import freechips.rocketchip.tilelink.MemoryOpCategories._ 24import freechips.rocketchip.tilelink.TLPermissions._ 25import freechips.rocketchip.tilelink.{ClientMetadata, ClientStates, TLPermissions} 26import xiangshan.backend.rob.RobDebugRollingIO 27import utils._ 28import utility._ 29import xiangshan.{L1CacheErrorInfo, XSCoreParamsKey} 30import xiangshan.mem.HasL1PrefetchSourceParameter 31import utility.{CircularQueuePtr} 32import xiangshan.cache._ 33import xiangshan.{XSBundle, XSModule} 34 35//---------------------------------------- 36// Feedback Direct Prefetching 37class CounterFilterDataBundle(implicit p: Parameters) extends DCacheBundle { 38 val idx = UInt(idxBits.W) 39 val way = UInt(wayBits.W) 40} 41 42class CounterFilterQueryBundle(implicit p: Parameters) extends DCacheBundle { 43 val req = ValidIO(new CounterFilterDataBundle()) 44 val resp = Input(Bool()) 45} 46 47// no Set Blocking in LoadPipe, so when counting useful prefetch counter, duplicate result occurs 48// s0 s1 s2 s3 49// r w 50// if 3 load insts is accessing the same cache line(set0, way0) in s0, s1, s2 51// they think they all prefetch hit, increment useful prefetch counter 3 times 52// so when load arrives at s3, save it's set&way to an FIFO, all loads will search this FIFO to avoid this case 53class CounterFilter()(implicit p: Parameters) extends DCacheModule { 54 private val LduCnt = backendParams.LduCnt 55 56 val io = IO(new Bundle() { 57 // input, only from load for now 58 val ld_in = Flipped(Vec(LduCnt, ValidIO(new CounterFilterDataBundle()))) 59 val query = Flipped(Vec(LduCnt, new CounterFilterQueryBundle())) 60 }) 61 62 val LduStages = 4 63 val SIZE = (LduStages) * LduCnt 64 class Ptr(implicit p: Parameters) extends CircularQueuePtr[Ptr]( p => SIZE ){} 65 object Ptr { 66 def apply(f: Bool, v: UInt)(implicit p: Parameters): Ptr = { 67 val ptr = Wire(new Ptr) 68 ptr.flag := f 69 ptr.value := v 70 ptr 71 } 72 } 73 74 val entries = RegInit(VecInit(Seq.fill(SIZE){ (0.U.asTypeOf(new CounterFilterDataBundle())) })) 75 val valids = RegInit(VecInit(Seq.fill(SIZE){ (false.B) })) 76 77 // enq 78 val enqLen = LduCnt 79 val deqLen = LduCnt 80 val enqPtrExt = RegInit(VecInit((0 until enqLen).map(_.U.asTypeOf(new Ptr)))) 81 val deqPtrExt = RegInit(VecInit((0 until deqLen).map(_.U.asTypeOf(new Ptr)))) 82 83 val deqPtr = WireInit(deqPtrExt(0).value) 84 85 val reqs_l = io.ld_in.map(_.bits) 86 val reqs_vl = io.ld_in.map(_.valid) 87 val needAlloc = Wire(Vec(enqLen, Bool())) 88 val canAlloc = Wire(Vec(enqLen, Bool())) 89 val last3CycleAlloc = RegInit(0.U(log2Ceil(LduCnt + 1).W)) 90 91 for(i <- (0 until enqLen)) { 92 val req = reqs_l(i) 93 val req_v = reqs_vl(i) 94 val index = PopCount(needAlloc.take(i)) 95 val allocPtr = enqPtrExt(index) 96 97 needAlloc(i) := req_v 98 canAlloc(i) := needAlloc(i) && allocPtr >= deqPtrExt(0) 99 100 when(canAlloc(i)) { 101 valids(allocPtr.value) := true.B 102 entries(allocPtr.value) := req 103 } 104 105 assert(!needAlloc(i) || canAlloc(i), s"port${i} can not accept CounterFilter enq request, check if SIZE >= (Ldu stages - 2) * LduCnt") 106 } 107 val allocNum = PopCount(canAlloc) 108 109 enqPtrExt.foreach{case x => x := x + allocNum} 110 last3CycleAlloc := RegNext(RegNext(allocNum)) 111 112 // deq 113 for(i <- (0 until deqLen)) { 114 when(i.U < last3CycleAlloc) { 115 valids(deqPtrExt(i).value) := false.B 116 } 117 } 118 119 deqPtrExt.foreach{case x => x := x + last3CycleAlloc} 120 121 // query 122 val querys_l = io.query.map(_.req.bits) 123 val querys_vl = io.query.map(_.req.valid) 124 for(i <- (0 until LduCnt)) { 125 val q = querys_l(i) 126 val q_v = querys_vl(i) 127 128 val entry_match = Cat(entries.zip(valids).map { 129 case(e, v) => v && (q.idx === e.idx) && (q.way === e.way) 130 }).orR 131 132 io.query(i).resp := q_v && entry_match 133 } 134 135 XSPerfAccumulate("req_nums", PopCount(io.query.map(_.req.valid))) 136 XSPerfAccumulate("req_set_way_match", PopCount(io.query.map(_.resp))) 137} 138 139class BloomQueryBundle(n: Int)(implicit p: Parameters) extends DCacheBundle { 140 val addr = UInt(BLOOMADDRWIDTH.W) 141 142 def BLOOMADDRWIDTH = log2Ceil(n) 143 144 def get_addr(paddr: UInt): UInt = { 145 assert(paddr.getWidth == PAddrBits) 146 assert(paddr.getWidth >= (blockOffBits + 2 * BLOOMADDRWIDTH)) 147 val block_paddr = paddr(paddr.getWidth - 1, blockOffBits) 148 val low_part = block_paddr(BLOOMADDRWIDTH - 1, 0) 149 val high_part = block_paddr(2 * BLOOMADDRWIDTH - 1, BLOOMADDRWIDTH) 150 low_part ^ high_part 151 } 152} 153 154class BloomRespBundle(implicit p: Parameters) extends DCacheBundle { 155 val res = Bool() 156} 157class BloomFilter(n: Int, bypass: Boolean = true)(implicit p: Parameters) extends DCacheModule { 158 val io = IO(new DCacheBundle { 159 val set = Flipped(ValidIO(new BloomQueryBundle(n))) 160 val clr = Flipped(ValidIO(new BloomQueryBundle(n))) 161 val query = Vec(LoadPipelineWidth, Flipped(ValidIO(new BloomQueryBundle(n)))) 162 val resp = Vec(LoadPipelineWidth, ValidIO(new BloomRespBundle)) 163 }) 164 165 val data = RegInit(0.U(n.W)) 166 val data_next = Wire(Vec(n, Bool())) 167 168 for (i <- 0 until n) { 169 when(io.clr.valid && i.U === io.clr.bits.addr) { 170 data_next(i) := false.B 171 }.elsewhen(io.set.valid && i.U === io.set.bits.addr) { 172 data_next(i) := true.B 173 }.otherwise { 174 data_next(i) := data(i).asBool 175 } 176 } 177 178 // resp will valid in next cycle 179 for(i <- 0 until LoadPipelineWidth) { 180 io.resp(i).valid := RegNext(io.query(i).valid) 181 if(bypass) { 182 io.resp(i).bits.res := RegEnable(data_next(io.query(i).bits.addr), io.query(i).valid) 183 }else { 184 io.resp(i).bits.res := RegEnable(data(io.query(i).bits.addr), io.query(i).valid) 185 } 186 } 187 188 data := data_next.asUInt 189 190 assert(PopCount(data ^ data_next.asUInt) <= 2.U) 191 192 XSPerfHistogram("valid_nums", PopCount(data), true.B, 0, n + 1, 20) 193} 194 195class FDPrefetcherMonitorBundle()(implicit p: Parameters) extends XSBundle { 196 val refill = Input(Bool()) // from refill pipe, fire 197 val accuracy = new XSBundle { 198 val total_prefetch = Input(Bool()) // from mshr enq, fire, alloc, prefetch 199 val useful_prefetch = Vec(LoadPipelineWidth, Input(Bool())) // from load pipeline, prefetch hit 200 } 201 202 val timely = new XSBundle { 203 val late_prefetch = Input(Bool()) // from mshr enq, a load matches a mshr caused by prefetch 204 } 205 206 val pollution = new XSBundle { 207 val demand_miss = Vec(LoadPipelineWidth, Input(Bool())) // from load pipeline, fisrt miss 208 val cache_pollution = Vec(LoadPipelineWidth, Input(Bool())) // from load pipeline, fisrt miss and pollution caused 209 } 210 211 val pf_ctrl = Output(new PrefetchControlBundle) 212 val debugRolling = Flipped(new RobDebugRollingIO) 213} 214 215class FDPrefetcherMonitor()(implicit p: Parameters) extends XSModule { 216 val io = IO(new FDPrefetcherMonitorBundle) 217 218 val INTERVAL = 8192 219 val CNTWIDTH = log2Up(INTERVAL) + 1 220 221 io.pf_ctrl := DontCare 222 223 val refill_cnt = RegInit(0.U(CNTWIDTH.W)) 224 225 val total_prefetch_prev_cnt = RegInit(0.U(CNTWIDTH.W)) 226 val useful_prefetch_prev_cnt = RegInit(0.U(CNTWIDTH.W)) 227 val late_prefetch_prev_cnt = RegInit(0.U(CNTWIDTH.W)) 228 val demand_miss_prev_cnt = RegInit(0.U(CNTWIDTH.W)) 229 val pollution_prev_cnt = RegInit(0.U(CNTWIDTH.W)) 230 val prev_cnts = Seq(total_prefetch_prev_cnt, useful_prefetch_prev_cnt, late_prefetch_prev_cnt, demand_miss_prev_cnt, pollution_prev_cnt) 231 232 val total_prefetch_interval_cnt = RegInit(0.U(CNTWIDTH.W)) 233 val useful_prefetch_interval_cnt = RegInit(0.U(CNTWIDTH.W)) 234 val late_prefetch_interval_cnt = RegInit(0.U(CNTWIDTH.W)) 235 val demand_miss_interval_cnt = RegInit(0.U(CNTWIDTH.W)) 236 val pollution_interval_cnt = RegInit(0.U(CNTWIDTH.W)) 237 val interval_cnts = Seq(total_prefetch_interval_cnt, useful_prefetch_interval_cnt, late_prefetch_interval_cnt, demand_miss_interval_cnt, pollution_interval_cnt) 238 239 val interval_trigger = refill_cnt === INTERVAL.U 240 241 val io_ens = Seq(io.accuracy.total_prefetch, io.accuracy.useful_prefetch, io.timely.late_prefetch, io.pollution.demand_miss, io.pollution.cache_pollution) 242 243 for((interval, en) <- interval_cnts.zip(io_ens)) { 244 interval := interval + PopCount(en.asUInt) 245 } 246 247 when(io.refill) { 248 refill_cnt := refill_cnt + 1.U 249 } 250 251 when(interval_trigger) { 252 refill_cnt := 0.U 253 for((prev, interval) <- prev_cnts.zip(interval_cnts)) { 254 prev := Cat(0.U(1.W), prev(prev.getWidth - 1, 1)) + Cat(0.U(1.W), interval(interval.getWidth - 1, 1)) 255 interval := 0.U 256 } 257 } 258 259 // rolling by instr 260 XSPerfRolling( 261 "L1PrefetchAccuracyIns", 262 PopCount(io.accuracy.useful_prefetch), PopCount(io.accuracy.total_prefetch), 263 1000, io.debugRolling.robTrueCommit, clock, reset 264 ) 265 266 XSPerfRolling( 267 "L1PrefetchLatenessIns", 268 PopCount(io.timely.late_prefetch), PopCount(io.accuracy.total_prefetch), 269 1000, io.debugRolling.robTrueCommit, clock, reset 270 ) 271 272 XSPerfRolling( 273 "L1PrefetchPollutionIns", 274 PopCount(io.pollution.cache_pollution), PopCount(io.pollution.demand_miss), 275 1000, io.debugRolling.robTrueCommit, clock, reset 276 ) 277 278 XSPerfRolling( 279 "IPCIns", 280 io.debugRolling.robTrueCommit, 1.U, 281 1000, io.debugRolling.robTrueCommit, clock, reset 282 ) 283 284 XSPerfAccumulate("io_refill", io.refill) 285 XSPerfAccumulate("total_prefetch_en", io.accuracy.total_prefetch) 286 XSPerfAccumulate("useful_prefetch_en", PopCount(io.accuracy.useful_prefetch) + io.timely.late_prefetch) 287 XSPerfAccumulate("late_prefetch_en", io.timely.late_prefetch) 288 XSPerfAccumulate("demand_miss_en", PopCount(io.pollution.demand_miss)) 289 XSPerfAccumulate("cache_pollution_en", PopCount(io.pollution.cache_pollution)) 290}