xref: /XiangShan/src/main/scala/xiangshan/mem/sbuffer/StorePrefetchBursts.scala (revision b9ef0a42375afe7feb66a8e3dbb64a3bd25ccf07)
10d32f713Shappy-lx/***************************************************************************************
20d32f713Shappy-lx* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
30d32f713Shappy-lx* Copyright (c) 2020-2021 Peng Cheng Laboratory
40d32f713Shappy-lx*
50d32f713Shappy-lx* XiangShan is licensed under Mulan PSL v2.
60d32f713Shappy-lx* You can use this software according to the terms and conditions of the Mulan PSL v2.
70d32f713Shappy-lx* You may obtain a copy of Mulan PSL v2 at:
80d32f713Shappy-lx*          http://license.coscl.org.cn/MulanPSL2
90d32f713Shappy-lx*
100d32f713Shappy-lx* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
110d32f713Shappy-lx* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
120d32f713Shappy-lx* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
130d32f713Shappy-lx*
140d32f713Shappy-lx* See the Mulan PSL v2 for more details.
150d32f713Shappy-lx***************************************************************************************/
160d32f713Shappy-lx
170d32f713Shappy-lxpackage xiangshan.mem
180d32f713Shappy-lx
198891a219SYinan Xuimport org.chipsalliance.cde.config.Parameters
200d32f713Shappy-lximport chisel3._
210d32f713Shappy-lximport chisel3.util._
220d32f713Shappy-lximport xiangshan._
230d32f713Shappy-lximport utils._
240d32f713Shappy-lximport utility._
250d32f713Shappy-lximport xiangshan.cache._
260d32f713Shappy-lx
270d32f713Shappy-lxtrait HasStorePrefetchHelper extends HasCircularQueuePtrHelper with HasDCacheParameters {
280d32f713Shappy-lx  // common
290d32f713Shappy-lx  val PAGEOFFSET = 12 // page offset 4096 Bytes
300d32f713Shappy-lx  val BLOCKOFFSET = log2Up(dcacheParameters.blockBytes) // cache block offset 64 Bytes
310d32f713Shappy-lx
320d32f713Shappy-lx  // spb parameters
330d32f713Shappy-lx  val ENABLE_SPB = EnableStorePrefetchSPB
340d32f713Shappy-lx  val ONLY_ON_MEMSET = false
350d32f713Shappy-lx  val SATURATE_COUNTER_BITS = 7
360d32f713Shappy-lx  val BURST_ENGINE_SIZE = 2
370d32f713Shappy-lx  val SPB_N = 48
380d32f713Shappy-lx
390d32f713Shappy-lx  // serializer parameters
400d32f713Shappy-lx  val SERIALIZER_SIZE = 12
410d32f713Shappy-lx
420d32f713Shappy-lx  def block_addr(x: UInt): UInt = {
430d32f713Shappy-lx    val offset = log2Up(dcacheParameters.blockBytes)
440d32f713Shappy-lx    x(x.getWidth - 1, offset)
450d32f713Shappy-lx  }
460d32f713Shappy-lx
470d32f713Shappy-lx  // filter logic (granularity: a page)
480d32f713Shappy-lx  def same_page_addr(addr0: UInt, addr1: UInt): Bool = {
490d32f713Shappy-lx    addr0(addr0.getWidth - 1, PAGEOFFSET) === addr1(addr1.getWidth - 1, PAGEOFFSET)
500d32f713Shappy-lx  }
510d32f713Shappy-lx
520d32f713Shappy-lx  def filter_by_page_addr(valid_vec: Vec[Bool], data_vec: Vec[UInt], incoming_vaddr: UInt) : Bool = {
530d32f713Shappy-lx    val match_vec = (valid_vec zip data_vec).map{
540d32f713Shappy-lx      case(v, e_vaddr) => v && same_page_addr(e_vaddr, incoming_vaddr)
550d32f713Shappy-lx    }
560d32f713Shappy-lx    VecInit(match_vec).asUInt.orR
570d32f713Shappy-lx  }
580d32f713Shappy-lx
590d32f713Shappy-lx  def cache_block_addr_difference(req_addr: UInt, last_addr: UInt): UInt = {
600d32f713Shappy-lx    (block_addr(req_addr).asSInt - block_addr(last_addr).asSInt)(SATURATE_COUNTER_BITS - 1, 0)
610d32f713Shappy-lx  }
620d32f713Shappy-lx
630d32f713Shappy-lx  def get_store_count_divided_by_8(st_count: UInt): UInt = {
640d32f713Shappy-lx    st_count(st_count.getWidth - 1, 3)
650d32f713Shappy-lx  }
660d32f713Shappy-lx
670d32f713Shappy-lx  def trigger_check(st_count: UInt, N: UInt): Bool = {
680d32f713Shappy-lx    st_count > N
690d32f713Shappy-lx  }
700d32f713Shappy-lx
710d32f713Shappy-lx  def can_burst(st_count: UInt, N: UInt, sa_count: SInt): Bool = {
720d32f713Shappy-lx    // 1.counter overflows
730d32f713Shappy-lx    // 2.counter / 8 == saturate counter
740d32f713Shappy-lx    // 3.saturate counter is not negtive
750d32f713Shappy-lx    trigger_check(st_count, N) && get_store_count_divided_by_8(st_count) === sa_count.asUInt && sa_count(sa_count.getWidth - 1) === false.B
760d32f713Shappy-lx  }
770d32f713Shappy-lx}
780d32f713Shappy-lx
790d32f713Shappy-lx// L1 Store prefetch component
800d32f713Shappy-lx
810d32f713Shappy-lx// an prefetch request generator used by spb to burst some prefetch request to L1 Dcache
820d32f713Shappy-lxclass PrefetchBurstGenerator(is_store: Boolean)(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper {
830d32f713Shappy-lx  val io = IO(new DCacheBundle {
840d32f713Shappy-lx    val alloc = Input(Bool())
850d32f713Shappy-lx    val vaddr = Input(UInt(VAddrBits.W))
860d32f713Shappy-lx    val prefetch_req = Vec(StorePipelineWidth, DecoupledIO(new StorePrefetchReq))
870d32f713Shappy-lx  })
880d32f713Shappy-lx
89ec86549eSsfencevma  val maxStorePipelineWidth = (if (Enable3Load3Store) 3 else 2)
90*b9ef0a42SXuan Hu  require(StorePipelineWidth >= maxStorePipelineWidth)
910d32f713Shappy-lx
920d32f713Shappy-lx  val SIZE = BURST_ENGINE_SIZE
930d32f713Shappy-lx
940d32f713Shappy-lx  val valids = RegInit(VecInit(List.tabulate(SIZE){_ => false.B}))
950d32f713Shappy-lx  val datas  = RegInit(VecInit(List.tabulate(SIZE){_ => 0.U.asTypeOf(io.vaddr)}))
960d32f713Shappy-lx  val pagebits = RegInit(VecInit(List.tabulate(SIZE){_ => 0.U(1.W)}))
970d32f713Shappy-lx
980d32f713Shappy-lx  // enq
990d32f713Shappy-lx  val enq_valids = ~(valids.asUInt)
1000d32f713Shappy-lx  val full = !(enq_valids.orR)
1010d32f713Shappy-lx  val enq_idx = PriorityEncoder(enq_valids)
1020d32f713Shappy-lx  val enq_filter = filter_by_page_addr(valids, datas, io.vaddr)
1030d32f713Shappy-lx
1040d32f713Shappy-lx  when(io.alloc && !full && !enq_filter) {
1050d32f713Shappy-lx    valids(enq_idx) := true.B
1060d32f713Shappy-lx    datas(enq_idx) := io.vaddr
1070d32f713Shappy-lx    pagebits(enq_idx) := io.vaddr(PAGEOFFSET)
1080d32f713Shappy-lx  }
1090d32f713Shappy-lx
1100d32f713Shappy-lx  XSPerfAccumulate("burst_generator_alloc_success", io.alloc && !full && !enq_filter)
1110d32f713Shappy-lx  XSPerfAccumulate("burst_generator_alloc_fail", io.alloc && full && !enq_filter)
1120d32f713Shappy-lx  XSPerfAccumulate("burst_generator_full", full)
1130d32f713Shappy-lx
1140d32f713Shappy-lx  // next prefetch address
1150d32f713Shappy-lx  val datas_next = Wire(Vec(SIZE, chiselTypeOf(datas(0))))
1160d32f713Shappy-lx  datas_next := datas.map(_ + Cat(1.U(1.W), 0.U(BLOCKOFFSET.W)))
1170d32f713Shappy-lx  // double next prefetch address
1180d32f713Shappy-lx  val datas_next_next = Wire(Vec(SIZE, chiselTypeOf(datas(0))))
1190d32f713Shappy-lx  datas_next_next := datas.map(_ + Cat(2.U(2.W), 0.U(BLOCKOFFSET.W)))
1200d32f713Shappy-lx
1210d32f713Shappy-lx  // deq
1220d32f713Shappy-lx  // val deq_valids = (valids zip datas zip pagebits).map{case (v, vaddr, pgbit) => v && vaddr(PAGEOFFSET) === pagebits}
1230d32f713Shappy-lx  val deq_valids = valids
1240d32f713Shappy-lx  val deq_decoupled = Wire(Vec(SIZE, Vec(StorePipelineWidth, Decoupled(new StorePrefetchReq))))
1250d32f713Shappy-lx
1260d32f713Shappy-lx  (deq_valids zip deq_decoupled zip datas zip datas_next zip datas_next_next zip pagebits zip valids).foreach{case ((((((deq_valid, out_decouple), data), data_next), data_next_next), pg_bit), v) => {
1270d32f713Shappy-lx    out_decouple(0).valid := deq_valid
1280d32f713Shappy-lx    out_decouple(0).bits := DontCare
1290d32f713Shappy-lx    out_decouple(0).bits.vaddr := data
1300d32f713Shappy-lx    out_decouple(1).valid := deq_valid && data_next(PAGEOFFSET) === pg_bit && out_decouple(0).fire
1310d32f713Shappy-lx    out_decouple(1).bits := DontCare
1320d32f713Shappy-lx    out_decouple(1).bits.vaddr := data_next
133202674aeSHaojin Tang    out_decouple.drop(2).foreach { out => out.valid := false.B; out.bits := DontCare }
1340d32f713Shappy-lx    when(out_decouple(1).fire) {
1350d32f713Shappy-lx      // fired 2 prefetch reqs
1360d32f713Shappy-lx      data := data_next_next
1370d32f713Shappy-lx      when(data_next_next(PAGEOFFSET) =/= pg_bit) {
1380d32f713Shappy-lx        // cross page, invalid this entry
1390d32f713Shappy-lx        v := false.B
1400d32f713Shappy-lx      }
1410d32f713Shappy-lx    }.elsewhen(out_decouple(0).fire) {
1420d32f713Shappy-lx      // fired 1 prefetch req
1430d32f713Shappy-lx      data := data_next
1440d32f713Shappy-lx      when(data_next(PAGEOFFSET) =/= pg_bit) {
1450d32f713Shappy-lx        // cross page, invalid this entry
1460d32f713Shappy-lx        v := false.B
1470d32f713Shappy-lx      }
1480d32f713Shappy-lx    }
1490d32f713Shappy-lx  }}
1500d32f713Shappy-lx  for (i <- 0 until StorePipelineWidth) {
1510d32f713Shappy-lx    arbiter(deq_decoupled.map(_(i)), io.prefetch_req(i), Some(s"spb_deq_arb${i}"))
1520d32f713Shappy-lx  }
1530d32f713Shappy-lx
1540d32f713Shappy-lx  XSPerfAccumulate("burst_valid_num", PopCount(valids))
1550d32f713Shappy-lx  XSPerfAccumulate("prefetch_req_fire_by_generator", PopCount(VecInit(io.prefetch_req.map(_.fire))))
1560d32f713Shappy-lx}
1570d32f713Shappy-lx
1580d32f713Shappy-lxclass StorePrefetchBursts(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper {
1590d32f713Shappy-lx  val io = IO(new DCacheBundle {
1600d32f713Shappy-lx    val enable = Input(Bool())
1610d32f713Shappy-lx    val memSetPattenDetected = Input(Bool())
1620d32f713Shappy-lx    val sbuffer_enq  = Flipped(Valid(new DCacheWordReqWithVaddr))
1630d32f713Shappy-lx    val prefetch_req = Vec(StorePipelineWidth, DecoupledIO(new StorePrefetchReq))
1640d32f713Shappy-lx  })
1650d32f713Shappy-lx  require(EnsbufferWidth == 2)
1660d32f713Shappy-lx
1670d32f713Shappy-lx  // meta for SPB
1680d32f713Shappy-lx  val N = SPB_N
1690d32f713Shappy-lx  val last_st_block_addr = RegInit(0.U(VAddrBits.W))
1700d32f713Shappy-lx  val saturate_counter = RegInit(0.S(SATURATE_COUNTER_BITS.W))
1710d32f713Shappy-lx  val store_count = RegInit(0.U((log2Up(N) + 1).W))
1720d32f713Shappy-lx  val burst_engine = Module(new PrefetchBurstGenerator(is_store = true))
1730d32f713Shappy-lx
1740d32f713Shappy-lx  val sbuffer_fire = io.sbuffer_enq.valid
1750d32f713Shappy-lx  val sbuffer_vaddr = io.sbuffer_enq.bits.vaddr
1760d32f713Shappy-lx
1770d32f713Shappy-lx  val next_store_count = store_count + Mux(sbuffer_fire, 1.U, 0.U)
1780d32f713Shappy-lx  val next_saturate_count = (saturate_counter + Mux(sbuffer_fire, cache_block_addr_difference(sbuffer_vaddr, last_st_block_addr).asSInt, 0.S)).asSInt
1790d32f713Shappy-lx
1800d32f713Shappy-lx  when(sbuffer_fire) {
1810d32f713Shappy-lx    last_st_block_addr := sbuffer_vaddr
1820d32f713Shappy-lx  }
1830d32f713Shappy-lx
1840d32f713Shappy-lx  val check = trigger_check(next_store_count, N.U)
1850d32f713Shappy-lx  val burst = can_burst(next_store_count, N.U, next_saturate_count)
1860d32f713Shappy-lx
1870d32f713Shappy-lx  store_count := Mux(burst || check, 0.U, next_store_count)
1880d32f713Shappy-lx  saturate_counter := Mux(burst || check, 0.S, next_saturate_count)
1890d32f713Shappy-lx
1900d32f713Shappy-lx  if(ONLY_ON_MEMSET) {
1910d32f713Shappy-lx    // very strict: only burst on memset
1920d32f713Shappy-lx    burst_engine.io.alloc := burst && io.enable && io.memSetPattenDetected
1930d32f713Shappy-lx  }else {
1940d32f713Shappy-lx    burst_engine.io.alloc := burst && io.enable
1950d32f713Shappy-lx  }
1960d32f713Shappy-lx  burst_engine.io.vaddr := get_block_addr(io.sbuffer_enq.bits.vaddr)
1970d32f713Shappy-lx  burst_engine.io.prefetch_req <> io.prefetch_req
1980d32f713Shappy-lx
1990d32f713Shappy-lx  // perf
2000d32f713Shappy-lx  XSPerfAccumulate("trigger_burst", burst && io.enable)
2010d32f713Shappy-lx  XSPerfAccumulate("trigger_check", check && io.enable)
2020d32f713Shappy-lx}
2030d32f713Shappy-lx
2040d32f713Shappy-lx// L2 Store prefetch component
2050d32f713Shappy-lx
2060d32f713Shappy-lx// Serializer: FIFO queue, recieve EnsbufferWidth requests sent from sq to sbuffer
2070d32f713Shappy-lx//             save them to a FIFO queue, pop them in order
2080d32f713Shappy-lxclass Serializer(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper {
2090d32f713Shappy-lx  val io = IO(new DCacheBundle {
2100d32f713Shappy-lx    val sbuffer_enq  = Vec(EnsbufferWidth, Flipped(Valid(new DCacheWordReqWithVaddr)))
2110d32f713Shappy-lx    val prefetch_train = DecoupledIO(new DCacheWordReqWithVaddr)
2120d32f713Shappy-lx  })
2130d32f713Shappy-lx  val QueueSize = SERIALIZER_SIZE
2140d32f713Shappy-lx
2150d32f713Shappy-lx  class SerializerPtr(implicit p: Parameters) extends CircularQueuePtr[SerializerPtr](p => QueueSize){}
2160d32f713Shappy-lx
2170d32f713Shappy-lx  object SerializerPtr {
2180d32f713Shappy-lx    def apply(f: Bool, v: UInt)(implicit p: Parameters): SerializerPtr = {
2190d32f713Shappy-lx      val ptr = Wire(new SerializerPtr)
2200d32f713Shappy-lx      ptr.flag := f
2210d32f713Shappy-lx      ptr.value := v
2220d32f713Shappy-lx      ptr
2230d32f713Shappy-lx    }
2240d32f713Shappy-lx  }
2250d32f713Shappy-lx
2260d32f713Shappy-lx  val enqPtrExt = RegInit(VecInit((0 until EnsbufferWidth).map(_.U.asTypeOf(new SerializerPtr))))
2270d32f713Shappy-lx  val deqPtrExt = RegInit(0.U.asTypeOf(new SerializerPtr))
2280d32f713Shappy-lx
2290d32f713Shappy-lx  val deqPtr = deqPtrExt.value
2300d32f713Shappy-lx
2310d32f713Shappy-lx  val reqs = RegInit(VecInit((0 until QueueSize).map(_.U.asTypeOf(Valid(new DCacheWordReqWithVaddr)))))
2320d32f713Shappy-lx
2330d32f713Shappy-lx  // deq
2340d32f713Shappy-lx  io.prefetch_train.valid := reqs(deqPtr).valid
2350d32f713Shappy-lx  io.prefetch_train.bits  := reqs(deqPtr).bits
2360d32f713Shappy-lx
2370d32f713Shappy-lx  when(io.prefetch_train.fire) {
2380d32f713Shappy-lx    deqPtrExt := deqPtrExt + 1.U
2390d32f713Shappy-lx    reqs(deqPtr).valid := false.B
2400d32f713Shappy-lx  }
2410d32f713Shappy-lx
2420d32f713Shappy-lx  // enq
2430d32f713Shappy-lx  val count_vsreq = PopCount(io.sbuffer_enq.map(_.valid))
2440d32f713Shappy-lx  val canEnqueue = (distanceBetween(enqPtrExt(0), deqPtrExt) + count_vsreq) <= QueueSize.U
2450d32f713Shappy-lx
2460d32f713Shappy-lx  when(canEnqueue) {
2470d32f713Shappy-lx    for(i <- 0 until EnsbufferWidth) {
2480d32f713Shappy-lx      when(io.sbuffer_enq(i).valid) {
2490d32f713Shappy-lx        reqs(enqPtrExt(i).value) := io.sbuffer_enq(i)
2500d32f713Shappy-lx      }
2510d32f713Shappy-lx    }
2520d32f713Shappy-lx    enqPtrExt.map(ptr => ptr := ptr + count_vsreq)
2530d32f713Shappy-lx  }
2540d32f713Shappy-lx
2550d32f713Shappy-lx  XSPerfAccumulate("canNotEnqueue", !canEnqueue)
2560d32f713Shappy-lx  XSPerfAccumulate("prefetch_train_fire", io.prefetch_train.fire)
2570d32f713Shappy-lx  XSPerfAccumulate("full", PopCount(reqs.map(_.valid)) === QueueSize.U)
2580d32f713Shappy-lx}
2590d32f713Shappy-lx
2600d32f713Shappy-lxclass StorePfWrapper()(implicit p: Parameters) extends DCacheModule with HasStorePrefetchHelper {
2610d32f713Shappy-lx  val io = IO(new DCacheBundle {
2620d32f713Shappy-lx    val sbuffer_enq  = Vec(EnsbufferWidth, Flipped(Valid(new DCacheWordReqWithVaddr)))
2630d32f713Shappy-lx    val prefetch_req = Vec(StorePipelineWidth, DecoupledIO(new StorePrefetchReq))
2640d32f713Shappy-lx    val memSetPattenDetected = Input(Bool())
2650d32f713Shappy-lx  })
2660d32f713Shappy-lx
2670d32f713Shappy-lx  // TODO: remove serializer, use a ptr in sq
2680d32f713Shappy-lx  val serializer = Module(new Serializer())
2690d32f713Shappy-lx  val spb = Module(new StorePrefetchBursts())
2700d32f713Shappy-lx
2710d32f713Shappy-lx  // give mutiple reqs to serializer, serializer will give out one req per cycle
2720d32f713Shappy-lx  for(i <- 0 until EnsbufferWidth) {
2730d32f713Shappy-lx    serializer.io.sbuffer_enq(i).valid := io.sbuffer_enq(i).valid && ENABLE_SPB.B
2740d32f713Shappy-lx    serializer.io.sbuffer_enq(i).bits := io.sbuffer_enq(i).bits
2750d32f713Shappy-lx  }
2760d32f713Shappy-lx
2770d32f713Shappy-lx  // train spb
2780d32f713Shappy-lx  spb.io.enable := ENABLE_SPB.B
2790d32f713Shappy-lx  spb.io.memSetPattenDetected := io.memSetPattenDetected
2800d32f713Shappy-lx  spb.io.sbuffer_enq.valid := serializer.io.prefetch_train.valid
2810d32f713Shappy-lx  spb.io.sbuffer_enq.bits  := serializer.io.prefetch_train.bits
2820d32f713Shappy-lx  // spb will always recieve train req
2830d32f713Shappy-lx  serializer.io.prefetch_train.ready := true.B
2840d32f713Shappy-lx
2850d32f713Shappy-lx  // fire a prefetch req
2860d32f713Shappy-lx  io.prefetch_req <> spb.io.prefetch_req
2870d32f713Shappy-lx}