xref: /XiangShan/src/main/scala/xiangshan/mem/prefetch/L1StridePrefetcher.scala (revision 0d32f7132f120ac0b32ab552fe0da4934208dd01)
1*0d32f713Shappy-lxpackage xiangshan.mem.prefetch
2*0d32f713Shappy-lx
3*0d32f713Shappy-lximport chipsalliance.rocketchip.config.Parameters
4*0d32f713Shappy-lximport chisel3._
5*0d32f713Shappy-lximport chisel3.util._
6*0d32f713Shappy-lximport xiangshan._
7*0d32f713Shappy-lximport utils._
8*0d32f713Shappy-lximport utility._
9*0d32f713Shappy-lximport xiangshan.cache.HasDCacheParameters
10*0d32f713Shappy-lximport xiangshan.cache.mmu._
11*0d32f713Shappy-lximport xiangshan.mem.{L1PrefetchReq, LdPrefetchTrainBundle}
12*0d32f713Shappy-lximport xiangshan.mem.trace._
13*0d32f713Shappy-lximport scala.collection.SeqLike
14*0d32f713Shappy-lx
15*0d32f713Shappy-lxtrait HasStridePrefetchHelper extends HasL1PrefetchHelper {
16*0d32f713Shappy-lx  val STRIDE_FILTER_SIZE = 6
17*0d32f713Shappy-lx  val STRIDE_ENTRY_NUM = 10
18*0d32f713Shappy-lx  val STRIDE_BITS = 10 + BLOCK_OFFSET
19*0d32f713Shappy-lx  val STRIDE_VADDR_BITS = 10 + BLOCK_OFFSET
20*0d32f713Shappy-lx  val STRIDE_CONF_BITS = 2
21*0d32f713Shappy-lx
22*0d32f713Shappy-lx  // detail control
23*0d32f713Shappy-lx  val ALWAYS_UPDATE_PRE_VADDR = 1 // 1 for true, 0 for false
24*0d32f713Shappy-lx  val AGGRESIVE_POLICY = false // if true, prefetch degree is greater than 1, 1 otherwise
25*0d32f713Shappy-lx  val STRIDE_LOOK_AHEAD_BLOCKS = 2 // aggressive degree
26*0d32f713Shappy-lx  val LOOK_UP_STREAM = false // if true, avoid collision with stream
27*0d32f713Shappy-lx
28*0d32f713Shappy-lx  val STRIDE_WIDTH_BLOCKS = if(AGGRESIVE_POLICY) STRIDE_LOOK_AHEAD_BLOCKS else 1
29*0d32f713Shappy-lx
30*0d32f713Shappy-lx  def MAX_CONF = (1 << STRIDE_CONF_BITS) - 1
31*0d32f713Shappy-lx}
32*0d32f713Shappy-lx
33*0d32f713Shappy-lxclass StrideMetaBundle(implicit p: Parameters) extends XSBundle with HasStridePrefetchHelper {
34*0d32f713Shappy-lx  val pre_vaddr = UInt(STRIDE_VADDR_BITS.W)
35*0d32f713Shappy-lx  val stride = UInt(STRIDE_BITS.W)
36*0d32f713Shappy-lx  val confidence = UInt(STRIDE_CONF_BITS.W)
37*0d32f713Shappy-lx  val hash_pc = UInt(HASH_TAG_WIDTH.W)
38*0d32f713Shappy-lx
39*0d32f713Shappy-lx  def reset(index: Int) = {
40*0d32f713Shappy-lx    pre_vaddr := 0.U
41*0d32f713Shappy-lx    stride := 0.U
42*0d32f713Shappy-lx    confidence := 0.U
43*0d32f713Shappy-lx    hash_pc := index.U
44*0d32f713Shappy-lx  }
45*0d32f713Shappy-lx
46*0d32f713Shappy-lx  def alloc(vaddr: UInt, alloc_hash_pc: UInt) = {
47*0d32f713Shappy-lx    pre_vaddr := vaddr(STRIDE_VADDR_BITS - 1, 0)
48*0d32f713Shappy-lx    stride := 0.U
49*0d32f713Shappy-lx    confidence := 0.U
50*0d32f713Shappy-lx    hash_pc := alloc_hash_pc
51*0d32f713Shappy-lx  }
52*0d32f713Shappy-lx
53*0d32f713Shappy-lx  def update(vaddr: UInt, always_update_pre_vaddr: Bool) = {
54*0d32f713Shappy-lx    val new_vaddr = vaddr(STRIDE_VADDR_BITS - 1, 0)
55*0d32f713Shappy-lx    val new_stride = new_vaddr - pre_vaddr
56*0d32f713Shappy-lx    val new_stride_blk = block_addr(new_stride)
57*0d32f713Shappy-lx    // NOTE: for now, disable negtive stride
58*0d32f713Shappy-lx    val stride_valid = new_stride_blk =/= 0.U && new_stride_blk =/= 1.U && new_stride(STRIDE_VADDR_BITS - 1) === 0.U
59*0d32f713Shappy-lx    val stride_match = new_stride === stride
60*0d32f713Shappy-lx    val low_confidence = confidence <= 1.U
61*0d32f713Shappy-lx    val can_send_pf = stride_valid && stride_match && confidence === MAX_CONF.U
62*0d32f713Shappy-lx
63*0d32f713Shappy-lx    when(stride_valid) {
64*0d32f713Shappy-lx      when(stride_match) {
65*0d32f713Shappy-lx        confidence := Mux(confidence === MAX_CONF.U, confidence, confidence + 1.U)
66*0d32f713Shappy-lx      }.otherwise {
67*0d32f713Shappy-lx        confidence := Mux(confidence === 0.U, confidence, confidence - 1.U)
68*0d32f713Shappy-lx        when(low_confidence) {
69*0d32f713Shappy-lx          stride := new_stride
70*0d32f713Shappy-lx        }
71*0d32f713Shappy-lx      }
72*0d32f713Shappy-lx      pre_vaddr := new_vaddr
73*0d32f713Shappy-lx    }
74*0d32f713Shappy-lx    when(always_update_pre_vaddr) {
75*0d32f713Shappy-lx      pre_vaddr := new_vaddr
76*0d32f713Shappy-lx    }
77*0d32f713Shappy-lx
78*0d32f713Shappy-lx    (can_send_pf, new_stride)
79*0d32f713Shappy-lx  }
80*0d32f713Shappy-lx
81*0d32f713Shappy-lx}
82*0d32f713Shappy-lx
83*0d32f713Shappy-lxclass StrideMetaArray(implicit p: Parameters) extends XSModule with HasStridePrefetchHelper {
84*0d32f713Shappy-lx  val io = IO(new XSBundle {
85*0d32f713Shappy-lx    val enable = Input(Bool())
86*0d32f713Shappy-lx    // TODO: flush all entry when process changing happens, or disable stream prefetch for a while
87*0d32f713Shappy-lx    val flush = Input(Bool())
88*0d32f713Shappy-lx    val dynamic_depth = Input(UInt(32.W)) // TODO: enable dynamic stride depth
89*0d32f713Shappy-lx    val train_req = Flipped(DecoupledIO(new PrefetchReqBundle))
90*0d32f713Shappy-lx    val prefetch_req = ValidIO(new StreamPrefetchReqBundle)
91*0d32f713Shappy-lx    // query Stream component to see if a stream pattern has already been detected
92*0d32f713Shappy-lx    val stream_lookup_req  = ValidIO(new PrefetchReqBundle)
93*0d32f713Shappy-lx    val stream_lookup_resp = Input(Bool())
94*0d32f713Shappy-lx  })
95*0d32f713Shappy-lx
96*0d32f713Shappy-lx  val array = Reg(Vec(STRIDE_ENTRY_NUM, new StrideMetaBundle))
97*0d32f713Shappy-lx  val replacement = ReplacementPolicy.fromString("plru", STRIDE_ENTRY_NUM)
98*0d32f713Shappy-lx
99*0d32f713Shappy-lx  // s0: hash pc -> cam all entries
100*0d32f713Shappy-lx  val s0_can_accept = Wire(Bool())
101*0d32f713Shappy-lx  val s0_valid = io.train_req.fire
102*0d32f713Shappy-lx  val s0_vaddr = io.train_req.bits.vaddr
103*0d32f713Shappy-lx  val s0_pc = io.train_req.bits.pc
104*0d32f713Shappy-lx  val s0_pc_hash = pc_hash_tag(s0_pc)
105*0d32f713Shappy-lx  val s0_pc_match_vec = VecInit(array.map(_.hash_pc === s0_pc_hash)).asUInt
106*0d32f713Shappy-lx  val s0_hit = s0_pc_match_vec.orR
107*0d32f713Shappy-lx  val s0_index = Mux(s0_hit, OHToUInt(s0_pc_match_vec), replacement.way)
108*0d32f713Shappy-lx  io.train_req.ready := s0_can_accept
109*0d32f713Shappy-lx  io.stream_lookup_req.valid := s0_valid
110*0d32f713Shappy-lx  io.stream_lookup_req.bits  := io.train_req.bits
111*0d32f713Shappy-lx
112*0d32f713Shappy-lx  when(s0_valid) {
113*0d32f713Shappy-lx    replacement.access(s0_index)
114*0d32f713Shappy-lx  }
115*0d32f713Shappy-lx
116*0d32f713Shappy-lx  assert(PopCount(s0_pc_match_vec) <= 1.U)
117*0d32f713Shappy-lx  XSPerfAccumulate("s0_valid", s0_valid)
118*0d32f713Shappy-lx  XSPerfAccumulate("s0_hit", s0_valid && s0_hit)
119*0d32f713Shappy-lx  XSPerfAccumulate("s0_miss", s0_valid && !s0_hit)
120*0d32f713Shappy-lx
121*0d32f713Shappy-lx  // s1: alloc or update
122*0d32f713Shappy-lx  val s1_valid = RegNext(s0_valid)
123*0d32f713Shappy-lx  val s1_index = RegEnable(s0_index, s0_valid)
124*0d32f713Shappy-lx  val s1_pc_hash = RegEnable(s0_pc_hash, s0_valid)
125*0d32f713Shappy-lx  val s1_vaddr = RegEnable(s0_vaddr, s0_valid)
126*0d32f713Shappy-lx  val s1_hit = RegEnable(s0_hit, s0_valid)
127*0d32f713Shappy-lx  val s1_alloc = s1_valid && !s1_hit
128*0d32f713Shappy-lx  val s1_update = s1_valid && s1_hit
129*0d32f713Shappy-lx  val s1_stride = array(s1_index).stride
130*0d32f713Shappy-lx  val s1_new_stride = WireInit(0.U(STRIDE_BITS.W))
131*0d32f713Shappy-lx  val s1_can_send_pf = WireInit(false.B)
132*0d32f713Shappy-lx  s0_can_accept := !(s1_valid && s1_pc_hash === s0_pc_hash)
133*0d32f713Shappy-lx
134*0d32f713Shappy-lx  val always_update = WireInit(Constantin.createRecord("always_update" + p(XSCoreParamsKey).HartId.toString, initValue = ALWAYS_UPDATE_PRE_VADDR.U)) === 1.U
135*0d32f713Shappy-lx
136*0d32f713Shappy-lx  when(s1_alloc) {
137*0d32f713Shappy-lx    array(s1_index).alloc(
138*0d32f713Shappy-lx      vaddr = s1_vaddr,
139*0d32f713Shappy-lx      alloc_hash_pc = s1_pc_hash
140*0d32f713Shappy-lx    )
141*0d32f713Shappy-lx  }.elsewhen(s1_update) {
142*0d32f713Shappy-lx    val res = array(s1_index).update(s1_vaddr, always_update)
143*0d32f713Shappy-lx    s1_can_send_pf := res._1
144*0d32f713Shappy-lx    s1_new_stride := res._2
145*0d32f713Shappy-lx  }
146*0d32f713Shappy-lx
147*0d32f713Shappy-lx  val l1_stride_ratio_const = WireInit(Constantin.createRecord("l1_stride_ratio" + p(XSCoreParamsKey).HartId.toString, initValue = 2.U))
148*0d32f713Shappy-lx  val l1_stride_ratio = l1_stride_ratio_const(3, 0)
149*0d32f713Shappy-lx  val l2_stride_ratio_const = WireInit(Constantin.createRecord("l2_stride_ratio" + p(XSCoreParamsKey).HartId.toString, initValue = 5.U))
150*0d32f713Shappy-lx  val l2_stride_ratio = l2_stride_ratio_const(3, 0)
151*0d32f713Shappy-lx  // s2: calculate L1 & L2 pf addr
152*0d32f713Shappy-lx  val s2_valid = RegNext(s1_valid && s1_can_send_pf)
153*0d32f713Shappy-lx  val s2_vaddr = RegEnable(s1_vaddr, s1_valid && s1_can_send_pf)
154*0d32f713Shappy-lx  val s2_stride = RegEnable(s1_stride, s1_valid && s1_can_send_pf)
155*0d32f713Shappy-lx  val s2_l1_depth = s2_stride << l1_stride_ratio
156*0d32f713Shappy-lx  val s2_l1_pf_vaddr = (s2_vaddr + s2_l1_depth)(VAddrBits - 1, 0)
157*0d32f713Shappy-lx  val s2_l2_depth = s2_stride << l2_stride_ratio
158*0d32f713Shappy-lx  val s2_l2_pf_vaddr = (s2_vaddr + s2_l2_depth)(VAddrBits - 1, 0)
159*0d32f713Shappy-lx  val s2_l1_pf_req_bits = (new StreamPrefetchReqBundle).getStreamPrefetchReqBundle(
160*0d32f713Shappy-lx    vaddr = s2_l1_pf_vaddr,
161*0d32f713Shappy-lx    width = STRIDE_WIDTH_BLOCKS,
162*0d32f713Shappy-lx    decr_mode = false.B,
163*0d32f713Shappy-lx    sink = SINK_L1,
164*0d32f713Shappy-lx    source = L1_HW_PREFETCH_STRIDE)
165*0d32f713Shappy-lx  val s2_l2_pf_req_bits = (new StreamPrefetchReqBundle).getStreamPrefetchReqBundle(
166*0d32f713Shappy-lx    vaddr = s2_l2_pf_vaddr,
167*0d32f713Shappy-lx    width = STRIDE_WIDTH_BLOCKS,
168*0d32f713Shappy-lx    decr_mode = false.B,
169*0d32f713Shappy-lx    sink = SINK_L2,
170*0d32f713Shappy-lx    source = L1_HW_PREFETCH_STRIDE)
171*0d32f713Shappy-lx
172*0d32f713Shappy-lx  // s3: send l1 pf out
173*0d32f713Shappy-lx  val s3_valid = if (LOOK_UP_STREAM) RegNext(s2_valid) && !io.stream_lookup_resp else RegNext(s2_valid)
174*0d32f713Shappy-lx  val s3_l1_pf_req_bits = RegEnable(s2_l1_pf_req_bits, s2_valid)
175*0d32f713Shappy-lx  val s3_l2_pf_req_bits = RegEnable(s2_l2_pf_req_bits, s2_valid)
176*0d32f713Shappy-lx
177*0d32f713Shappy-lx  // s4: send l2 pf out
178*0d32f713Shappy-lx  val s4_valid = RegNext(s3_valid)
179*0d32f713Shappy-lx  val s4_l2_pf_req_bits = RegEnable(s3_l2_pf_req_bits, s3_valid)
180*0d32f713Shappy-lx
181*0d32f713Shappy-lx  // l2 has higher priority than l1 ?
182*0d32f713Shappy-lx  io.prefetch_req.valid := s3_valid || s4_valid
183*0d32f713Shappy-lx  io.prefetch_req.bits := Mux(s4_valid, s4_l2_pf_req_bits, s3_l1_pf_req_bits)
184*0d32f713Shappy-lx
185*0d32f713Shappy-lx  XSPerfAccumulate("pf_valid", io.prefetch_req.valid)
186*0d32f713Shappy-lx  XSPerfAccumulate("l1_pf_valid", s3_valid && !s4_valid)
187*0d32f713Shappy-lx  XSPerfAccumulate("l1_pf_block", s3_valid && s4_valid)
188*0d32f713Shappy-lx  XSPerfAccumulate("l2_pf_valid", s4_valid)
189*0d32f713Shappy-lx  XSPerfAccumulate("detect_stream", io.stream_lookup_resp)
190*0d32f713Shappy-lx  XSPerfHistogram("high_conf_num", PopCount(VecInit(array.map(_.confidence === MAX_CONF.U))).asUInt, true.B, 0, STRIDE_ENTRY_NUM, 1)
191*0d32f713Shappy-lx  for(i <- 0 until STRIDE_ENTRY_NUM) {
192*0d32f713Shappy-lx    XSPerfAccumulate(s"entry_${i}_update", i.U === s1_index && s1_update)
193*0d32f713Shappy-lx    for(j <- 0 until 4) {
194*0d32f713Shappy-lx      XSPerfAccumulate(s"entry_${i}_disturb_${j}", i.U === s1_index && s1_update &&
195*0d32f713Shappy-lx                                                   j.U === s1_new_stride &&
196*0d32f713Shappy-lx                                                   array(s1_index).confidence === MAX_CONF.U &&
197*0d32f713Shappy-lx                                                   array(s1_index).stride =/= s1_new_stride
198*0d32f713Shappy-lx      )
199*0d32f713Shappy-lx    }
200*0d32f713Shappy-lx  }
201*0d32f713Shappy-lx
202*0d32f713Shappy-lx  for(i <- 0 until STRIDE_ENTRY_NUM) {
203*0d32f713Shappy-lx    when(reset.asBool || RegNext(io.flush)) {
204*0d32f713Shappy-lx      array(i).reset(i)
205*0d32f713Shappy-lx    }
206*0d32f713Shappy-lx  }
207*0d32f713Shappy-lx}