xref: /XiangShan/src/main/scala/xiangshan/mem/sbuffer/StorePrefetchBursts.scala (revision 45f43e6e5f88874a7573ff096d1e5c2855bd16c7)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.mem
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import xiangshan._
23import utils._
24import utility._
25import xiangshan.cache._
26
27trait HasStorePrefetchHelper extends HasCircularQueuePtrHelper with HasDCacheParameters {
28  // common
29  val PAGEOFFSET = 12 // page offset 4096 Bytes
30  val BLOCKOFFSET = log2Up(dcacheParameters.blockBytes) // cache block offset 64 Bytes
31
32  // spb parameters
33  val ENABLE_SPB = EnableStorePrefetchSPB
34  val ONLY_ON_MEMSET = false
35  val SATURATE_COUNTER_BITS = 7
36  val BURST_ENGINE_SIZE = 2
37  val SPB_N = 48
38
39  // serializer parameters
40  val SERIALIZER_SIZE = 12
41
42  def block_addr(x: UInt): UInt = {
43    val offset = log2Up(dcacheParameters.blockBytes)
44    x(x.getWidth - 1, offset)
45  }
46
47  // filter logic (granularity: a page)
48  def same_page_addr(addr0: UInt, addr1: UInt): Bool = {
49    addr0(addr0.getWidth - 1, PAGEOFFSET) === addr1(addr1.getWidth - 1, PAGEOFFSET)
50  }
51
52  def filter_by_page_addr(valid_vec: Vec[Bool], data_vec: Vec[UInt], incoming_vaddr: UInt) : Bool = {
53    val match_vec = (valid_vec zip data_vec).map{
54      case(v, e_vaddr) => v && same_page_addr(e_vaddr, incoming_vaddr)
55    }
56    VecInit(match_vec).asUInt.orR
57  }
58
59  def cache_block_addr_difference(req_addr: UInt, last_addr: UInt): UInt = {
60    (block_addr(req_addr).asSInt - block_addr(last_addr).asSInt)(SATURATE_COUNTER_BITS - 1, 0)
61  }
62
63  def get_store_count_divided_by_8(st_count: UInt): UInt = {
64    st_count(st_count.getWidth - 1, 3)
65  }
66
67  def trigger_check(st_count: UInt, N: UInt): Bool = {
68    st_count > N
69  }
70
71  def can_burst(st_count: UInt, N: UInt, sa_count: SInt): Bool = {
72    // 1.counter overflows
73    // 2.counter / 8 == saturate counter
74    // 3.saturate counter is not negtive
75    trigger_check(st_count, N) && get_store_count_divided_by_8(st_count) === sa_count.asUInt && sa_count(sa_count.getWidth - 1) === false.B
76  }
77}
78
79// L1 Store prefetch component
80
81// an prefetch request generator used by spb to burst some prefetch request to L1 Dcache
82class PrefetchBurstGenerator(is_store: Boolean)(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper {
83  val io = IO(new DCacheBundle {
84    val alloc = Input(Bool())
85    val vaddr = Input(UInt(VAddrBits.W))
86    val prefetch_req = Vec(StorePipelineWidth, DecoupledIO(new StorePrefetchReq))
87  })
88
89  val maxStorePipelineWidth = (if (Enable3Load3Store) 3 else 2)
90  require(StorePipelineWidth == maxStorePipelineWidth)
91
92  val SIZE = BURST_ENGINE_SIZE
93
94  val valids = RegInit(VecInit(List.tabulate(SIZE){_ => false.B}))
95  val datas  = RegInit(VecInit(List.tabulate(SIZE){_ => 0.U.asTypeOf(io.vaddr)}))
96  val pagebits = RegInit(VecInit(List.tabulate(SIZE){_ => 0.U(1.W)}))
97
98  // enq
99  val enq_valids = ~(valids.asUInt)
100  val full = !(enq_valids.orR)
101  val enq_idx = PriorityEncoder(enq_valids)
102  val enq_filter = filter_by_page_addr(valids, datas, io.vaddr)
103
104  when(io.alloc && !full && !enq_filter) {
105    valids(enq_idx) := true.B
106    datas(enq_idx) := io.vaddr
107    pagebits(enq_idx) := io.vaddr(PAGEOFFSET)
108  }
109
110  XSPerfAccumulate("burst_generator_alloc_success", io.alloc && !full && !enq_filter)
111  XSPerfAccumulate("burst_generator_alloc_fail", io.alloc && full && !enq_filter)
112  XSPerfAccumulate("burst_generator_full", full)
113
114  // next prefetch address
115  val datas_next = Wire(Vec(SIZE, chiselTypeOf(datas(0))))
116  datas_next := datas.map(_ + Cat(1.U(1.W), 0.U(BLOCKOFFSET.W)))
117  // double next prefetch address
118  val datas_next_next = Wire(Vec(SIZE, chiselTypeOf(datas(0))))
119  datas_next_next := datas.map(_ + Cat(2.U(2.W), 0.U(BLOCKOFFSET.W)))
120
121  // deq
122  // val deq_valids = (valids zip datas zip pagebits).map{case (v, vaddr, pgbit) => v && vaddr(PAGEOFFSET) === pagebits}
123  val deq_valids = valids
124  val deq_decoupled = Wire(Vec(SIZE, Vec(StorePipelineWidth, Decoupled(new StorePrefetchReq))))
125
126  (deq_valids zip deq_decoupled zip datas zip datas_next zip datas_next_next zip pagebits zip valids).foreach{case ((((((deq_valid, out_decouple), data), data_next), data_next_next), pg_bit), v) => {
127    out_decouple(0).valid := deq_valid
128    out_decouple(0).bits := DontCare
129    out_decouple(0).bits.vaddr := data
130    out_decouple(1).valid := deq_valid && data_next(PAGEOFFSET) === pg_bit && out_decouple(0).fire
131    out_decouple(1).bits := DontCare
132    out_decouple(1).bits.vaddr := data_next
133    if (Enable3Load3Store) {
134      out_decouple(2).valid := false.B
135      out_decouple(2).bits := DontCare
136    }
137    when(out_decouple(1).fire) {
138      // fired 2 prefetch reqs
139      data := data_next_next
140      when(data_next_next(PAGEOFFSET) =/= pg_bit) {
141        // cross page, invalid this entry
142        v := false.B
143      }
144    }.elsewhen(out_decouple(0).fire) {
145      // fired 1 prefetch req
146      data := data_next
147      when(data_next(PAGEOFFSET) =/= pg_bit) {
148        // cross page, invalid this entry
149        v := false.B
150      }
151    }
152  }}
153  for (i <- 0 until StorePipelineWidth) {
154    arbiter(deq_decoupled.map(_(i)), io.prefetch_req(i), Some(s"spb_deq_arb${i}"))
155  }
156
157  XSPerfAccumulate("burst_valid_num", PopCount(valids))
158  XSPerfAccumulate("prefetch_req_fire_by_generator", PopCount(VecInit(io.prefetch_req.map(_.fire))))
159}
160
161class StorePrefetchBursts(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper {
162  val io = IO(new DCacheBundle {
163    val enable = Input(Bool())
164    val memSetPattenDetected = Input(Bool())
165    val sbuffer_enq  = Flipped(Valid(new DCacheWordReqWithVaddr))
166    val prefetch_req = Vec(StorePipelineWidth, DecoupledIO(new StorePrefetchReq))
167  })
168  require(EnsbufferWidth == 2)
169
170  // meta for SPB
171  val N = SPB_N
172  val last_st_block_addr = RegInit(0.U(VAddrBits.W))
173  val saturate_counter = RegInit(0.S(SATURATE_COUNTER_BITS.W))
174  val store_count = RegInit(0.U((log2Up(N) + 1).W))
175  val burst_engine = Module(new PrefetchBurstGenerator(is_store = true))
176
177  val sbuffer_fire = io.sbuffer_enq.valid
178  val sbuffer_vaddr = io.sbuffer_enq.bits.vaddr
179
180  val next_store_count = store_count + Mux(sbuffer_fire, 1.U, 0.U)
181  val next_saturate_count = (saturate_counter + Mux(sbuffer_fire, cache_block_addr_difference(sbuffer_vaddr, last_st_block_addr).asSInt, 0.S)).asSInt
182
183  when(sbuffer_fire) {
184    last_st_block_addr := sbuffer_vaddr
185  }
186
187  val check = trigger_check(next_store_count, N.U)
188  val burst = can_burst(next_store_count, N.U, next_saturate_count)
189
190  store_count := Mux(burst || check, 0.U, next_store_count)
191  saturate_counter := Mux(burst || check, 0.S, next_saturate_count)
192
193  if(ONLY_ON_MEMSET) {
194    // very strict: only burst on memset
195    burst_engine.io.alloc := burst && io.enable && io.memSetPattenDetected
196  }else {
197    burst_engine.io.alloc := burst && io.enable
198  }
199  burst_engine.io.vaddr := get_block_addr(io.sbuffer_enq.bits.vaddr)
200  burst_engine.io.prefetch_req <> io.prefetch_req
201
202  // perf
203  XSPerfAccumulate("trigger_burst", burst && io.enable)
204  XSPerfAccumulate("trigger_check", check && io.enable)
205}
206
207// L2 Store prefetch component
208
209// Serializer: FIFO queue, recieve EnsbufferWidth requests sent from sq to sbuffer
210//             save them to a FIFO queue, pop them in order
211class Serializer(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper {
212  val io = IO(new DCacheBundle {
213    val sbuffer_enq  = Vec(EnsbufferWidth, Flipped(Valid(new DCacheWordReqWithVaddr)))
214    val prefetch_train = DecoupledIO(new DCacheWordReqWithVaddr)
215  })
216  val QueueSize = SERIALIZER_SIZE
217
218  class SerializerPtr(implicit p: Parameters) extends CircularQueuePtr[SerializerPtr](p => QueueSize){}
219
220  object SerializerPtr {
221    def apply(f: Bool, v: UInt)(implicit p: Parameters): SerializerPtr = {
222      val ptr = Wire(new SerializerPtr)
223      ptr.flag := f
224      ptr.value := v
225      ptr
226    }
227  }
228
229  val enqPtrExt = RegInit(VecInit((0 until EnsbufferWidth).map(_.U.asTypeOf(new SerializerPtr))))
230  val deqPtrExt = RegInit(0.U.asTypeOf(new SerializerPtr))
231
232  val deqPtr = deqPtrExt.value
233
234  val reqs = RegInit(VecInit((0 until QueueSize).map(_.U.asTypeOf(Valid(new DCacheWordReqWithVaddr)))))
235
236  // deq
237  io.prefetch_train.valid := reqs(deqPtr).valid
238  io.prefetch_train.bits  := reqs(deqPtr).bits
239
240  when(io.prefetch_train.fire) {
241    deqPtrExt := deqPtrExt + 1.U
242    reqs(deqPtr).valid := false.B
243  }
244
245  // enq
246  val count_vsreq = PopCount(io.sbuffer_enq.map(_.valid))
247  val canEnqueue = (distanceBetween(enqPtrExt(0), deqPtrExt) + count_vsreq) <= QueueSize.U
248
249  when(canEnqueue) {
250    for(i <- 0 until EnsbufferWidth) {
251      when(io.sbuffer_enq(i).valid) {
252        reqs(enqPtrExt(i).value) := io.sbuffer_enq(i)
253      }
254    }
255    enqPtrExt.map(ptr => ptr := ptr + count_vsreq)
256  }
257
258  XSPerfAccumulate("canNotEnqueue", !canEnqueue)
259  XSPerfAccumulate("prefetch_train_fire", io.prefetch_train.fire)
260  XSPerfAccumulate("full", PopCount(reqs.map(_.valid)) === QueueSize.U)
261}
262
263class StorePfWrapper()(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper {
264  val io = IO(new DCacheBundle {
265    val sbuffer_enq  = Vec(EnsbufferWidth, Flipped(Valid(new DCacheWordReqWithVaddr)))
266    val prefetch_req = Vec(StorePipelineWidth, DecoupledIO(new StorePrefetchReq))
267    val memSetPattenDetected = Input(Bool())
268  })
269
270  // TODO: remove serializer, use a ptr in sq
271  val serializer = Module(new Serializer())
272  val spb = Module(new StorePrefetchBursts())
273
274  // give mutiple reqs to serializer, serializer will give out one req per cycle
275  for(i <- 0 until EnsbufferWidth) {
276    serializer.io.sbuffer_enq(i).valid := io.sbuffer_enq(i).valid && ENABLE_SPB.B
277    serializer.io.sbuffer_enq(i).bits := io.sbuffer_enq(i).bits
278  }
279
280  // train spb
281  spb.io.enable := ENABLE_SPB.B
282  spb.io.memSetPattenDetected := io.memSetPattenDetected
283  spb.io.sbuffer_enq.valid := serializer.io.prefetch_train.valid
284  spb.io.sbuffer_enq.bits  := serializer.io.prefetch_train.bits
285  // spb will always recieve train req
286  serializer.io.prefetch_train.ready := true.B
287
288  // fire a prefetch req
289  io.prefetch_req <> spb.io.prefetch_req
290}