1/*************************************************************************************** 2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences 3* Copyright (c) 2020-2021 Peng Cheng Laboratory 4* 5* XiangShan is licensed under Mulan PSL v2. 6* You can use this software according to the terms and conditions of the Mulan PSL v2. 7* You may obtain a copy of Mulan PSL v2 at: 8* http://license.coscl.org.cn/MulanPSL2 9* 10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, 11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, 12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE. 13* 14* See the Mulan PSL v2 for more details. 15***************************************************************************************/ 16 17package xiangshan.mem 18 19import org.chipsalliance.cde.config.Parameters 20import chisel3._ 21import chisel3.util._ 22import xiangshan._ 23import utils._ 24import utility._ 25import xiangshan.cache._ 26 27trait HasStorePrefetchHelper extends HasCircularQueuePtrHelper with HasDCacheParameters { 28 // common 29 val PAGEOFFSET = 12 // page offset 4096 Bytes 30 val BLOCKOFFSET = log2Up(dcacheParameters.blockBytes) // cache block offset 64 Bytes 31 32 // spb parameters 33 val ENABLE_SPB = EnableStorePrefetchSPB 34 val ONLY_ON_MEMSET = false 35 val SATURATE_COUNTER_BITS = 7 36 val BURST_ENGINE_SIZE = 2 37 val SPB_N = 48 38 39 // serializer parameters 40 val SERIALIZER_SIZE = 12 41 42 def block_addr(x: UInt): UInt = { 43 val offset = log2Up(dcacheParameters.blockBytes) 44 x(x.getWidth - 1, offset) 45 } 46 47 // filter logic (granularity: a page) 48 def same_page_addr(addr0: UInt, addr1: UInt): Bool = { 49 addr0(addr0.getWidth - 1, PAGEOFFSET) === addr1(addr1.getWidth - 1, PAGEOFFSET) 50 } 51 52 def filter_by_page_addr(valid_vec: Vec[Bool], data_vec: Vec[UInt], incoming_vaddr: UInt) : Bool = { 53 val match_vec = (valid_vec zip data_vec).map{ 54 case(v, e_vaddr) => v && same_page_addr(e_vaddr, incoming_vaddr) 55 } 56 VecInit(match_vec).asUInt.orR 57 } 58 59 def cache_block_addr_difference(req_addr: UInt, last_addr: UInt): UInt = { 60 (block_addr(req_addr).asSInt - block_addr(last_addr).asSInt)(SATURATE_COUNTER_BITS - 1, 0) 61 } 62 63 def get_store_count_divided_by_8(st_count: UInt): UInt = { 64 st_count(st_count.getWidth - 1, 3) 65 } 66 67 def trigger_check(st_count: UInt, N: UInt): Bool = { 68 st_count > N 69 } 70 71 def can_burst(st_count: UInt, N: UInt, sa_count: SInt): Bool = { 72 // 1.counter overflows 73 // 2.counter / 8 == saturate counter 74 // 3.saturate counter is not negtive 75 trigger_check(st_count, N) && get_store_count_divided_by_8(st_count) === sa_count.asUInt && sa_count(sa_count.getWidth - 1) === false.B 76 } 77} 78 79// L1 Store prefetch component 80 81// an prefetch request generator used by spb to burst some prefetch request to L1 Dcache 82class PrefetchBurstGenerator(is_store: Boolean)(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper { 83 val io = IO(new DCacheBundle { 84 val alloc = Input(Bool()) 85 val vaddr = Input(UInt(VAddrBits.W)) 86 val prefetch_req = Vec(StorePipelineWidth, DecoupledIO(new StorePrefetchReq)) 87 }) 88 89 val maxStorePipelineWidth = (if (Enable3Load3Store) 3 else 2) 90 require(StorePipelineWidth == maxStorePipelineWidth) 91 92 val SIZE = BURST_ENGINE_SIZE 93 94 val valids = RegInit(VecInit(List.tabulate(SIZE){_ => false.B})) 95 val datas = RegInit(VecInit(List.tabulate(SIZE){_ => 0.U.asTypeOf(io.vaddr)})) 96 val pagebits = RegInit(VecInit(List.tabulate(SIZE){_ => 0.U(1.W)})) 97 98 // enq 99 val enq_valids = ~(valids.asUInt) 100 val full = !(enq_valids.orR) 101 val enq_idx = PriorityEncoder(enq_valids) 102 val enq_filter = filter_by_page_addr(valids, datas, io.vaddr) 103 104 when(io.alloc && !full && !enq_filter) { 105 valids(enq_idx) := true.B 106 datas(enq_idx) := io.vaddr 107 pagebits(enq_idx) := io.vaddr(PAGEOFFSET) 108 } 109 110 XSPerfAccumulate("burst_generator_alloc_success", io.alloc && !full && !enq_filter) 111 XSPerfAccumulate("burst_generator_alloc_fail", io.alloc && full && !enq_filter) 112 XSPerfAccumulate("burst_generator_full", full) 113 114 // next prefetch address 115 val datas_next = Wire(Vec(SIZE, chiselTypeOf(datas(0)))) 116 datas_next := datas.map(_ + Cat(1.U(1.W), 0.U(BLOCKOFFSET.W))) 117 // double next prefetch address 118 val datas_next_next = Wire(Vec(SIZE, chiselTypeOf(datas(0)))) 119 datas_next_next := datas.map(_ + Cat(2.U(2.W), 0.U(BLOCKOFFSET.W))) 120 121 // deq 122 // val deq_valids = (valids zip datas zip pagebits).map{case (v, vaddr, pgbit) => v && vaddr(PAGEOFFSET) === pagebits} 123 val deq_valids = valids 124 val deq_decoupled = Wire(Vec(SIZE, Vec(StorePipelineWidth, Decoupled(new StorePrefetchReq)))) 125 126 (deq_valids zip deq_decoupled zip datas zip datas_next zip datas_next_next zip pagebits zip valids).foreach{case ((((((deq_valid, out_decouple), data), data_next), data_next_next), pg_bit), v) => { 127 out_decouple(0).valid := deq_valid 128 out_decouple(0).bits := DontCare 129 out_decouple(0).bits.vaddr := data 130 out_decouple(1).valid := deq_valid && data_next(PAGEOFFSET) === pg_bit && out_decouple(0).fire 131 out_decouple(1).bits := DontCare 132 out_decouple(1).bits.vaddr := data_next 133 if (Enable3Load3Store) { 134 out_decouple(2).valid := false.B 135 out_decouple(2).bits := DontCare 136 } 137 when(out_decouple(1).fire) { 138 // fired 2 prefetch reqs 139 data := data_next_next 140 when(data_next_next(PAGEOFFSET) =/= pg_bit) { 141 // cross page, invalid this entry 142 v := false.B 143 } 144 }.elsewhen(out_decouple(0).fire) { 145 // fired 1 prefetch req 146 data := data_next 147 when(data_next(PAGEOFFSET) =/= pg_bit) { 148 // cross page, invalid this entry 149 v := false.B 150 } 151 } 152 }} 153 for (i <- 0 until StorePipelineWidth) { 154 arbiter(deq_decoupled.map(_(i)), io.prefetch_req(i), Some(s"spb_deq_arb${i}")) 155 } 156 157 XSPerfAccumulate("burst_valid_num", PopCount(valids)) 158 XSPerfAccumulate("prefetch_req_fire_by_generator", PopCount(VecInit(io.prefetch_req.map(_.fire)))) 159} 160 161class StorePrefetchBursts(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper { 162 val io = IO(new DCacheBundle { 163 val enable = Input(Bool()) 164 val memSetPattenDetected = Input(Bool()) 165 val sbuffer_enq = Flipped(Valid(new DCacheWordReqWithVaddr)) 166 val prefetch_req = Vec(StorePipelineWidth, DecoupledIO(new StorePrefetchReq)) 167 }) 168 require(EnsbufferWidth == 2) 169 170 // meta for SPB 171 val N = SPB_N 172 val last_st_block_addr = RegInit(0.U(VAddrBits.W)) 173 val saturate_counter = RegInit(0.S(SATURATE_COUNTER_BITS.W)) 174 val store_count = RegInit(0.U((log2Up(N) + 1).W)) 175 val burst_engine = Module(new PrefetchBurstGenerator(is_store = true)) 176 177 val sbuffer_fire = io.sbuffer_enq.valid 178 val sbuffer_vaddr = io.sbuffer_enq.bits.vaddr 179 180 val next_store_count = store_count + Mux(sbuffer_fire, 1.U, 0.U) 181 val next_saturate_count = (saturate_counter + Mux(sbuffer_fire, cache_block_addr_difference(sbuffer_vaddr, last_st_block_addr).asSInt, 0.S)).asSInt 182 183 when(sbuffer_fire) { 184 last_st_block_addr := sbuffer_vaddr 185 } 186 187 val check = trigger_check(next_store_count, N.U) 188 val burst = can_burst(next_store_count, N.U, next_saturate_count) 189 190 store_count := Mux(burst || check, 0.U, next_store_count) 191 saturate_counter := Mux(burst || check, 0.S, next_saturate_count) 192 193 if(ONLY_ON_MEMSET) { 194 // very strict: only burst on memset 195 burst_engine.io.alloc := burst && io.enable && io.memSetPattenDetected 196 }else { 197 burst_engine.io.alloc := burst && io.enable 198 } 199 burst_engine.io.vaddr := get_block_addr(io.sbuffer_enq.bits.vaddr) 200 burst_engine.io.prefetch_req <> io.prefetch_req 201 202 // perf 203 XSPerfAccumulate("trigger_burst", burst && io.enable) 204 XSPerfAccumulate("trigger_check", check && io.enable) 205} 206 207// L2 Store prefetch component 208 209// Serializer: FIFO queue, recieve EnsbufferWidth requests sent from sq to sbuffer 210// save them to a FIFO queue, pop them in order 211class Serializer(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper { 212 val io = IO(new DCacheBundle { 213 val sbuffer_enq = Vec(EnsbufferWidth, Flipped(Valid(new DCacheWordReqWithVaddr))) 214 val prefetch_train = DecoupledIO(new DCacheWordReqWithVaddr) 215 }) 216 val QueueSize = SERIALIZER_SIZE 217 218 class SerializerPtr(implicit p: Parameters) extends CircularQueuePtr[SerializerPtr](p => QueueSize){} 219 220 object SerializerPtr { 221 def apply(f: Bool, v: UInt)(implicit p: Parameters): SerializerPtr = { 222 val ptr = Wire(new SerializerPtr) 223 ptr.flag := f 224 ptr.value := v 225 ptr 226 } 227 } 228 229 val enqPtrExt = RegInit(VecInit((0 until EnsbufferWidth).map(_.U.asTypeOf(new SerializerPtr)))) 230 val deqPtrExt = RegInit(0.U.asTypeOf(new SerializerPtr)) 231 232 val deqPtr = deqPtrExt.value 233 234 val reqs = RegInit(VecInit((0 until QueueSize).map(_.U.asTypeOf(Valid(new DCacheWordReqWithVaddr))))) 235 236 // deq 237 io.prefetch_train.valid := reqs(deqPtr).valid 238 io.prefetch_train.bits := reqs(deqPtr).bits 239 240 when(io.prefetch_train.fire) { 241 deqPtrExt := deqPtrExt + 1.U 242 reqs(deqPtr).valid := false.B 243 } 244 245 // enq 246 val count_vsreq = PopCount(io.sbuffer_enq.map(_.valid)) 247 val canEnqueue = (distanceBetween(enqPtrExt(0), deqPtrExt) + count_vsreq) <= QueueSize.U 248 249 when(canEnqueue) { 250 for(i <- 0 until EnsbufferWidth) { 251 when(io.sbuffer_enq(i).valid) { 252 reqs(enqPtrExt(i).value) := io.sbuffer_enq(i) 253 } 254 } 255 enqPtrExt.map(ptr => ptr := ptr + count_vsreq) 256 } 257 258 XSPerfAccumulate("canNotEnqueue", !canEnqueue) 259 XSPerfAccumulate("prefetch_train_fire", io.prefetch_train.fire) 260 XSPerfAccumulate("full", PopCount(reqs.map(_.valid)) === QueueSize.U) 261} 262 263class StorePfWrapper()(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper { 264 val io = IO(new DCacheBundle { 265 val sbuffer_enq = Vec(EnsbufferWidth, Flipped(Valid(new DCacheWordReqWithVaddr))) 266 val prefetch_req = Vec(StorePipelineWidth, DecoupledIO(new StorePrefetchReq)) 267 val memSetPattenDetected = Input(Bool()) 268 }) 269 270 // TODO: remove serializer, use a ptr in sq 271 val serializer = Module(new Serializer()) 272 val spb = Module(new StorePrefetchBursts()) 273 274 // give mutiple reqs to serializer, serializer will give out one req per cycle 275 for(i <- 0 until EnsbufferWidth) { 276 serializer.io.sbuffer_enq(i).valid := io.sbuffer_enq(i).valid && ENABLE_SPB.B 277 serializer.io.sbuffer_enq(i).bits := io.sbuffer_enq(i).bits 278 } 279 280 // train spb 281 spb.io.enable := ENABLE_SPB.B 282 spb.io.memSetPattenDetected := io.memSetPattenDetected 283 spb.io.sbuffer_enq.valid := serializer.io.prefetch_train.valid 284 spb.io.sbuffer_enq.bits := serializer.io.prefetch_train.bits 285 // spb will always recieve train req 286 serializer.io.prefetch_train.ready := true.B 287 288 // fire a prefetch req 289 io.prefetch_req <> spb.io.prefetch_req 290}