xref: /XiangShan/src/main/scala/xiangshan/frontend/icache/IPrefetch.scala (revision 399766d91deff47155ba413cb78039ffb1cd66ee)
1/***************************************************************************************
2  * Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3  * Copyright (c) 2020-2021 Peng Cheng Laboratory
4  *
5  * XiangShan is licensed under Mulan PSL v2.
6  * You can use this software according to the terms and conditions of the Mulan PSL v2.
7  * You may obtain a copy of Mulan PSL v2 at:
8  *          http://license.coscl.org.cn/MulanPSL2
9  *
10  * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11  * EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12  * MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13  *
14  * See the Mulan PSL v2 for more details.
15  ***************************************************************************************/
16
17package xiangshan.frontend.icache
18
19import chisel3._
20import chisel3.util._
21import difftest._
22import freechips.rocketchip.tilelink._
23import huancun.PreferCacheKey
24import org.chipsalliance.cde.config.Parameters
25import utility._
26import utils._
27import xiangshan.SoftIfetchPrefetchBundle
28import xiangshan.XSCoreParamsKey
29import xiangshan.backend.fu.PMPReqBundle
30import xiangshan.backend.fu.PMPRespBundle
31import xiangshan.cache.mmu._
32import xiangshan.frontend._
33
34abstract class IPrefetchBundle(implicit p: Parameters) extends ICacheBundle
35abstract class IPrefetchModule(implicit p: Parameters) extends ICacheModule
36
37class IPrefetchReq(implicit p: Parameters) extends IPrefetchBundle {
38  val startAddr:        UInt   = UInt(VAddrBits.W)
39  val nextlineStart:    UInt   = UInt(VAddrBits.W)
40  val ftqIdx:           FtqPtr = new FtqPtr
41  val isSoftPrefetch:   Bool   = Bool()
42  val backendException: UInt   = UInt(ExceptionType.width.W)
43  def crossCacheline:   Bool   = startAddr(blockOffBits - 1) === 1.U
44
45  def fromFtqICacheInfo(info: FtqICacheInfo): IPrefetchReq = {
46    this.startAddr      := info.startAddr
47    this.nextlineStart  := info.nextlineStart
48    this.ftqIdx         := info.ftqIdx
49    this.isSoftPrefetch := false.B
50    this
51  }
52
53  def fromSoftPrefetch(req: SoftIfetchPrefetchBundle): IPrefetchReq = {
54    this.startAddr      := req.vaddr
55    this.nextlineStart  := req.vaddr + (1 << blockOffBits).U
56    this.ftqIdx         := DontCare
57    this.isSoftPrefetch := true.B
58    this
59  }
60}
61
62class IPrefetchIO(implicit p: Parameters) extends IPrefetchBundle {
63  // control
64  val csr_pf_enable     = Input(Bool())
65  val csr_parity_enable = Input(Bool())
66  val flush             = Input(Bool())
67
68  val req            = Flipped(Decoupled(new IPrefetchReq))
69  val flushFromBpu   = Flipped(new BpuFlushInfo)
70  val itlb           = Vec(PortNumber, new TlbRequestIO)
71  val pmp            = Vec(PortNumber, new ICachePMPBundle)
72  val metaRead       = new ICacheMetaReqBundle
73  val MSHRReq        = DecoupledIO(new ICacheMissReq)
74  val MSHRResp       = Flipped(ValidIO(new ICacheMissResp))
75  val wayLookupWrite = DecoupledIO(new WayLookupInfo)
76}
77
78class IPrefetchPipe(implicit p: Parameters) extends IPrefetchModule {
79  val io: IPrefetchIO = IO(new IPrefetchIO)
80
81  val (toITLB, fromITLB) = (io.itlb.map(_.req), io.itlb.map(_.resp))
82  val (toPMP, fromPMP)   = (io.pmp.map(_.req), io.pmp.map(_.resp))
83  val (toMeta, fromMeta) = (io.metaRead.toIMeta, io.metaRead.fromIMeta)
84  val (toMSHR, fromMSHR) = (io.MSHRReq, io.MSHRResp)
85  val toWayLookup        = io.wayLookupWrite
86
87  val s0_fire, s1_fire, s2_fire            = WireInit(false.B)
88  val s0_discard, s2_discard               = WireInit(false.B)
89  val s0_ready, s1_ready, s2_ready         = WireInit(false.B)
90  val s0_flush, s1_flush, s2_flush         = WireInit(false.B)
91  val from_bpu_s0_flush, from_bpu_s1_flush = WireInit(false.B)
92
93  /**
94    ******************************************************************************
95    * IPrefetch Stage 0
96    * - 1. receive ftq req
97    * - 2. send req to ITLB
98    * - 3. send req to Meta SRAM
99    ******************************************************************************
100    */
101  val s0_valid = io.req.valid
102
103  /**
104    ******************************************************************************
105    * receive ftq req
106    ******************************************************************************
107    */
108  val s0_req_vaddr        = VecInit(Seq(io.req.bits.startAddr, io.req.bits.nextlineStart))
109  val s0_req_ftqIdx       = io.req.bits.ftqIdx
110  val s0_isSoftPrefetch   = io.req.bits.isSoftPrefetch
111  val s0_doubleline       = io.req.bits.crossCacheline
112  val s0_req_vSetIdx      = s0_req_vaddr.map(get_idx)
113  val s0_backendException = VecInit(Seq.fill(PortNumber)(io.req.bits.backendException))
114
115  from_bpu_s0_flush := !s0_isSoftPrefetch && (io.flushFromBpu.shouldFlushByStage2(s0_req_ftqIdx) ||
116    io.flushFromBpu.shouldFlushByStage3(s0_req_ftqIdx))
117  s0_flush := io.flush || from_bpu_s0_flush || s1_flush
118
119  val s0_can_go = s1_ready && toITLB(0).ready && toITLB(1).ready && toMeta.ready
120  io.req.ready := s0_can_go
121
122  s0_fire := s0_valid && s0_can_go && !s0_flush
123
124  /**
125    ******************************************************************************
126    * IPrefetch Stage 1
127    * - 1. Receive resp from ITLB
128    * - 2. Receive resp from IMeta and check
129    * - 3. Monitor the requests from missUnit to write to SRAM.
130    * - 4. Wirte wayLookup
131    ******************************************************************************
132    */
133  val s1_valid = generatePipeControl(lastFire = s0_fire, thisFire = s1_fire, thisFlush = s1_flush, lastFlush = false.B)
134
135  val s1_req_vaddr        = RegEnable(s0_req_vaddr, 0.U.asTypeOf(s0_req_vaddr), s0_fire)
136  val s1_isSoftPrefetch   = RegEnable(s0_isSoftPrefetch, 0.U.asTypeOf(s0_isSoftPrefetch), s0_fire)
137  val s1_doubleline       = RegEnable(s0_doubleline, 0.U.asTypeOf(s0_doubleline), s0_fire)
138  val s1_req_ftqIdx       = RegEnable(s0_req_ftqIdx, 0.U.asTypeOf(s0_req_ftqIdx), s0_fire)
139  val s1_req_vSetIdx      = VecInit(s1_req_vaddr.map(get_idx))
140  val s1_backendException = RegEnable(s0_backendException, 0.U.asTypeOf(s0_backendException), s0_fire)
141
142  val m_idle :: m_itlbResend :: m_metaResend :: m_enqWay :: m_enterS2 :: Nil = Enum(5)
143  val state                                                                  = RegInit(m_idle)
144  val next_state                                                             = WireDefault(state)
145  val s0_fire_r                                                              = RegNext(s0_fire)
146  dontTouch(state)
147  dontTouch(next_state)
148  state := next_state
149
150  /**
151    ******************************************************************************
152    * resend itlb req if miss
153    ******************************************************************************
154    */
155  val s1_wait_itlb = RegInit(VecInit(Seq.fill(PortNumber)(false.B)))
156  (0 until PortNumber).foreach { i =>
157    when(s1_flush) {
158      s1_wait_itlb(i) := false.B
159    }.elsewhen(RegNext(s0_fire) && fromITLB(i).bits.miss) {
160      s1_wait_itlb(i) := true.B
161    }.elsewhen(s1_wait_itlb(i) && !fromITLB(i).bits.miss) {
162      s1_wait_itlb(i) := false.B
163    }
164  }
165  val s1_need_itlb = VecInit(Seq(
166    (RegNext(s0_fire) || s1_wait_itlb(0)) && fromITLB(0).bits.miss,
167    (RegNext(s0_fire) || s1_wait_itlb(1)) && fromITLB(1).bits.miss && s1_doubleline
168  ))
169  val tlb_valid_pulse = VecInit(Seq(
170    (RegNext(s0_fire) || s1_wait_itlb(0)) && !fromITLB(0).bits.miss,
171    (RegNext(s0_fire) || s1_wait_itlb(1)) && !fromITLB(1).bits.miss && s1_doubleline
172  ))
173  val tlb_valid_latch =
174    VecInit((0 until PortNumber).map(i => ValidHoldBypass(tlb_valid_pulse(i), s1_fire, flush = s1_flush)))
175  val itlb_finish = tlb_valid_latch(0) && (!s1_doubleline || tlb_valid_latch(1))
176
177  for (i <- 0 until PortNumber) {
178    toITLB(i).valid             := s1_need_itlb(i) || (s0_valid && (if (i == 0) true.B else s0_doubleline))
179    toITLB(i).bits              := DontCare
180    toITLB(i).bits.size         := 3.U
181    toITLB(i).bits.vaddr        := Mux(s1_need_itlb(i), s1_req_vaddr(i), s0_req_vaddr(i))
182    toITLB(i).bits.debug.pc     := Mux(s1_need_itlb(i), s1_req_vaddr(i), s0_req_vaddr(i))
183    toITLB(i).bits.cmd          := TlbCmd.exec
184    toITLB(i).bits.no_translate := false.B
185  }
186  fromITLB.foreach(_.ready := true.B)
187  io.itlb.foreach(_.req_kill := false.B)
188
189  /**
190    ******************************************************************************
191    * Receive resp from ITLB
192    ******************************************************************************
193    */
194  val s1_req_paddr_wire = VecInit(fromITLB.map(_.bits.paddr(0)))
195  val s1_req_paddr_reg = VecInit((0 until PortNumber).map(i =>
196    RegEnable(s1_req_paddr_wire(i), 0.U(PAddrBits.W), tlb_valid_pulse(i))
197  ))
198  val s1_req_paddr = VecInit((0 until PortNumber).map(i =>
199    Mux(tlb_valid_pulse(i), s1_req_paddr_wire(i), s1_req_paddr_reg(i))
200  ))
201  val s1_req_gpaddr_tmp = VecInit((0 until PortNumber).map(i =>
202    ResultHoldBypass(
203      valid = tlb_valid_pulse(i),
204      // NOTE: we dont use GPAddrBits or XLEN here, refer to ICacheMainPipe.scala L43-48 and PR#3795
205      init = 0.U(PAddrBitsMax.W),
206      data = fromITLB(i).bits.gpaddr(0)
207    )
208  ))
209  val s1_req_isForVSnonLeafPTE_tmp = VecInit((0 until PortNumber).map(i =>
210    ResultHoldBypass(
211      valid = tlb_valid_pulse(i),
212      init = 0.U.asTypeOf(fromITLB(i).bits.isForVSnonLeafPTE),
213      data = fromITLB(i).bits.isForVSnonLeafPTE
214    )
215  ))
216  val s1_itlb_exception = VecInit((0 until PortNumber).map(i =>
217    ResultHoldBypass(
218      valid = tlb_valid_pulse(i),
219      init = 0.U(ExceptionType.width.W),
220      data = ExceptionType.fromTlbResp(fromITLB(i).bits)
221    )
222  ))
223  val s1_itlb_pbmt = VecInit((0 until PortNumber).map(i =>
224    ResultHoldBypass(
225      valid = tlb_valid_pulse(i),
226      init = 0.U.asTypeOf(fromITLB(i).bits.pbmt(0)),
227      data = fromITLB(i).bits.pbmt(0)
228    )
229  ))
230  val s1_itlb_exception_gpf = VecInit(s1_itlb_exception.map(_ === ExceptionType.gpf))
231
232  /* Select gpaddr with the first gpf
233   * Note: the backend wants the base guest physical address of a fetch block
234   *       for port(i), its base gpaddr is actually (gpaddr - i * blocksize)
235   *       see GPAMem: https://github.com/OpenXiangShan/XiangShan/blob/344cf5d55568dd40cd658a9ee66047a505eeb504/src/main/scala/xiangshan/backend/GPAMem.scala#L33-L34
236   *       see also: https://github.com/OpenXiangShan/XiangShan/blob/344cf5d55568dd40cd658a9ee66047a505eeb504/src/main/scala/xiangshan/frontend/IFU.scala#L374-L375
237   */
238  val s1_req_gpaddr = PriorityMuxDefault(
239    s1_itlb_exception_gpf zip (0 until PortNumber).map(i => s1_req_gpaddr_tmp(i) - (i << blockOffBits).U),
240    0.U.asTypeOf(s1_req_gpaddr_tmp(0))
241  )
242
243  val s1_req_isForVSnonLeafPTE = PriorityMuxDefault(
244    s1_itlb_exception_gpf zip s1_req_isForVSnonLeafPTE_tmp,
245    0.U.asTypeOf(s1_req_isForVSnonLeafPTE_tmp(0))
246  )
247
248  /**
249    ******************************************************************************
250    * resend metaArray read req when itlb miss finish
251    ******************************************************************************
252    */
253  val s1_need_meta = ((state === m_itlbResend) && itlb_finish) || (state === m_metaResend)
254  toMeta.valid             := s1_need_meta || s0_valid
255  toMeta.bits              := DontCare
256  toMeta.bits.isDoubleLine := Mux(s1_need_meta, s1_doubleline, s0_doubleline)
257
258  for (i <- 0 until PortNumber) {
259    toMeta.bits.vSetIdx(i) := Mux(s1_need_meta, s1_req_vSetIdx(i), s0_req_vSetIdx(i))
260  }
261
262  /**
263    ******************************************************************************
264    * Receive resp from IMeta and check
265    ******************************************************************************
266    */
267  val s1_req_ptags = VecInit(s1_req_paddr.map(get_phy_tag))
268
269  val s1_meta_ptags  = fromMeta.tags
270  val s1_meta_valids = fromMeta.entryValid
271
272  def get_waymask(paddrs: Vec[UInt]): Vec[UInt] = {
273    val ptags = paddrs.map(get_phy_tag)
274    val tag_eq_vec =
275      VecInit((0 until PortNumber).map(p => VecInit((0 until nWays).map(w => s1_meta_ptags(p)(w) === ptags(p)))))
276    val tag_match_vec = VecInit((0 until PortNumber).map(k =>
277      VecInit(tag_eq_vec(k).zipWithIndex.map { case (way_tag_eq, w) => way_tag_eq && s1_meta_valids(k)(w) })
278    ))
279    val waymasks = VecInit(tag_match_vec.map(_.asUInt))
280    waymasks
281  }
282
283  val s1_SRAM_waymasks = VecInit((0 until PortNumber).map { port =>
284    Mux(tlb_valid_pulse(port), get_waymask(s1_req_paddr_wire)(port), get_waymask(s1_req_paddr_reg)(port))
285  })
286
287  // select ecc code
288  /* NOTE:
289   * When ECC check fails, s1_waymasks may be corrupted, so this selected meta_codes may be wrong.
290   * However, we can guarantee that the request sent to the l2 cache and the response to the IFU are both correct,
291   * considering the probability of bit flipping abnormally is very small, consider there's up to 1 bit being wrong:
292   * 1. miss -> fake hit: The wrong bit in s1_waymasks was set to true.B, thus selects the wrong meta_codes,
293   *                      but we can detect this by checking whether `encodeMetaECC(req_ptags) === meta_codes`.
294   * 2. hit -> fake multi-hit: In normal situation, multi-hit never happens, so multi-hit indicates ECC failure,
295   *                           we can detect this by checking whether `PopCount(waymasks) <= 1.U`,
296   *                           and meta_codes is not important in this situation.
297   * 3. hit -> fake miss: We can't detect this, but we can (pre)fetch the correct data from L2 cache, so it's not a problem.
298   * 4. hit -> hit / miss -> miss: ECC failure happens in a irrelevant way, so we don't care about it this time.
299   */
300  val s1_SRAM_meta_codes = VecInit((0 until PortNumber).map { port =>
301    Mux1H(s1_SRAM_waymasks(port), fromMeta.codes(port))
302  })
303
304  /**
305    ******************************************************************************
306    * update waymasks and meta_codes according to MSHR update data
307    ******************************************************************************
308    */
309  def update_meta_info(mask: UInt, vSetIdx: UInt, ptag: UInt, code: UInt): Tuple2[UInt, UInt] = {
310    require(mask.getWidth == nWays)
311    val new_mask  = WireInit(mask)
312    val new_code  = WireInit(code)
313    val valid     = fromMSHR.valid && !fromMSHR.bits.corrupt
314    val vset_same = fromMSHR.bits.vSetIdx === vSetIdx
315    val ptag_same = getPhyTagFromBlk(fromMSHR.bits.blkPaddr) === ptag
316    val way_same  = fromMSHR.bits.waymask === mask
317    when(valid && vset_same) {
318      when(ptag_same) {
319        new_mask := fromMSHR.bits.waymask
320        // also update meta_codes
321        // we have getPhyTagFromBlk(fromMSHR.bits.blkPaddr) === ptag, so we can use ptag directly for better timing
322        new_code := encodeMetaECC(ptag)
323      }.elsewhen(way_same) {
324        new_mask := 0.U
325        // we dont care about new_code, since it's not used for a missed request
326      }
327    }
328    (new_mask, new_code)
329  }
330
331  val s1_SRAM_valid   = s0_fire_r || RegNext(s1_need_meta && toMeta.ready)
332  val s1_MSHR_valid   = fromMSHR.valid && !fromMSHR.bits.corrupt
333  val s1_waymasks     = WireInit(VecInit(Seq.fill(PortNumber)(0.U(nWays.W))))
334  val s1_waymasks_r   = RegEnable(s1_waymasks, 0.U.asTypeOf(s1_waymasks), s1_SRAM_valid || s1_MSHR_valid)
335  val s1_meta_codes   = WireInit(VecInit(Seq.fill(PortNumber)(0.U(ICacheMetaCodeBits.W))))
336  val s1_meta_codes_r = RegEnable(s1_meta_codes, 0.U.asTypeOf(s1_meta_codes), s1_SRAM_valid || s1_MSHR_valid)
337
338  // update waymasks and meta_codes
339  (0 until PortNumber).foreach { i =>
340    val old_waymask    = Mux(s1_SRAM_valid, s1_SRAM_waymasks(i), s1_waymasks_r(i))
341    val old_meta_codes = Mux(s1_SRAM_valid, s1_SRAM_meta_codes(i), s1_meta_codes_r(i))
342    val new_info       = update_meta_info(old_waymask, s1_req_vSetIdx(i), s1_req_ptags(i), old_meta_codes)
343    s1_waymasks(i)   := new_info._1
344    s1_meta_codes(i) := new_info._2
345  }
346
347  /**
348    ******************************************************************************
349    * send enqueu req to WayLookup
350    ******** **********************************************************************
351    */
352  // Disallow enqueuing wayLookup when SRAM write occurs.
353  toWayLookup.valid := ((state === m_enqWay) || ((state === m_idle) && itlb_finish)) &&
354    !s1_flush && !fromMSHR.valid && !s1_isSoftPrefetch // do not enqueue soft prefetch
355  toWayLookup.bits.vSetIdx           := s1_req_vSetIdx
356  toWayLookup.bits.waymask           := s1_waymasks
357  toWayLookup.bits.ptag              := s1_req_ptags
358  toWayLookup.bits.gpaddr            := s1_req_gpaddr
359  toWayLookup.bits.isForVSnonLeafPTE := s1_req_isForVSnonLeafPTE
360  toWayLookup.bits.meta_codes        := s1_meta_codes
361  (0 until PortNumber).foreach { i =>
362    val excpValid = if (i == 0) true.B
363    else s1_doubleline // exception in first line is always valid, in second line is valid iff is doubleline request
364    // Send s1_itlb_exception to WayLookup (instead of s1_exception_out) for better timing. Will check pmp again in mainPipe
365    toWayLookup.bits.itlb_exception(i) := Mux(excpValid, s1_itlb_exception(i), ExceptionType.none)
366    toWayLookup.bits.itlb_pbmt(i)      := Mux(excpValid, s1_itlb_pbmt(i), Pbmt.pma)
367  }
368
369  val s1_waymasks_vec = s1_waymasks.map(_.asTypeOf(Vec(nWays, Bool())))
370  when(toWayLookup.fire) {
371    assert(
372      PopCount(s1_waymasks_vec(0)) <= 1.U && (PopCount(s1_waymasks_vec(1)) <= 1.U || !s1_doubleline),
373      "Multiple hit in main pipe, port0:is=%d,ptag=0x%x,vidx=0x%x,vaddr=0x%x port1:is=%d,ptag=0x%x,vidx=0x%x,vaddr=0x%x ",
374      PopCount(s1_waymasks_vec(0)) > 1.U,
375      s1_req_ptags(0),
376      get_idx(s1_req_vaddr(0)),
377      s1_req_vaddr(0),
378      PopCount(s1_waymasks_vec(1)) > 1.U && s1_doubleline,
379      s1_req_ptags(1),
380      get_idx(s1_req_vaddr(1)),
381      s1_req_vaddr(1)
382    )
383  }
384
385  /**
386    ******************************************************************************
387    * PMP check
388    ******************************************************************************
389    */
390  toPMP.zipWithIndex.foreach { case (p, i) =>
391    // if itlb has exception, paddr can be invalid, therefore pmp check can be skipped
392    p.valid     := s1_valid // !ExceptionType.hasException(s1_itlb_exception(i))
393    p.bits.addr := s1_req_paddr(i)
394    p.bits.size := 3.U      // TODO
395    p.bits.cmd  := TlbCmd.exec
396  }
397  val s1_pmp_exception = VecInit(fromPMP.map(ExceptionType.fromPMPResp))
398  val s1_pmp_mmio      = VecInit(fromPMP.map(_.mmio))
399
400  // merge s1 itlb/pmp exceptions, itlb has the highest priority, pmp next
401  // for timing consideration, meta_corrupt is not merged, and it will NOT cancel prefetch
402  val s1_exception_out = ExceptionType.merge(
403    s1_backendException,
404    s1_itlb_exception,
405    s1_pmp_exception
406  )
407
408  // merge pmp mmio and itlb pbmt
409  val s1_mmio = VecInit((s1_pmp_mmio zip s1_itlb_pbmt).map { case (mmio, pbmt) =>
410    mmio || Pbmt.isUncache(pbmt)
411  })
412
413  /**
414    ******************************************************************************
415    * state machine
416    ******** **********************************************************************
417    */
418
419  switch(state) {
420    is(m_idle) {
421      when(s1_valid) {
422        when(!itlb_finish) {
423          next_state := m_itlbResend
424        }.elsewhen(!toWayLookup.fire) { // itlb_finish
425          next_state := m_enqWay
426        }.elsewhen(!s2_ready) { // itlb_finish && toWayLookup.fire
427          next_state := m_enterS2
428        } // .otherwise { next_state := m_idle }
429      }   // .otherwise { next_state := m_idle }  // !s1_valid
430    }
431    is(m_itlbResend) {
432      when(itlb_finish) {
433        when(!toMeta.ready) {
434          next_state := m_metaResend
435        }.otherwise { // toMeta.ready
436          next_state := m_enqWay
437        }
438      } // .otherwise { next_state := m_itlbResend }  // !itlb_finish
439    }
440    is(m_metaResend) {
441      when(toMeta.ready) {
442        next_state := m_enqWay
443      } // .otherwise { next_state := m_metaResend }  // !toMeta.ready
444    }
445    is(m_enqWay) {
446      when(toWayLookup.fire || s1_isSoftPrefetch) {
447        when(!s2_ready) {
448          next_state := m_enterS2
449        }.otherwise { // s2_ready
450          next_state := m_idle
451        }
452      } // .otherwise { next_state := m_enqWay }
453    }
454    is(m_enterS2) {
455      when(s2_ready) {
456        next_state := m_idle
457      }
458    }
459  }
460
461  when(s1_flush) {
462    next_state := m_idle
463  }
464
465  /** Stage 1 control */
466  from_bpu_s1_flush := s1_valid && !s1_isSoftPrefetch && io.flushFromBpu.shouldFlushByStage3(s1_req_ftqIdx)
467  s1_flush          := io.flush || from_bpu_s1_flush
468
469  s1_ready := next_state === m_idle
470  s1_fire  := (next_state === m_idle) && s1_valid && !s1_flush // used to clear s1_valid & itlb_valid_latch
471  val s1_real_fire = s1_fire && io.csr_pf_enable // real "s1 fire" that s1 enters s2
472
473  /**
474    ******************************************************************************
475    * IPrefetch Stage 2
476    * - 1. Monitor the requests from missUnit to write to SRAM.
477    * - 2. send req to missUnit
478    ******************************************************************************
479    */
480  val s2_valid =
481    generatePipeControl(lastFire = s1_real_fire, thisFire = s2_fire, thisFlush = s2_flush, lastFlush = false.B)
482
483  val s2_req_vaddr      = RegEnable(s1_req_vaddr, 0.U.asTypeOf(s1_req_vaddr), s1_real_fire)
484  val s2_isSoftPrefetch = RegEnable(s1_isSoftPrefetch, 0.U.asTypeOf(s1_isSoftPrefetch), s1_real_fire)
485  val s2_doubleline     = RegEnable(s1_doubleline, 0.U.asTypeOf(s1_doubleline), s1_real_fire)
486  val s2_req_paddr      = RegEnable(s1_req_paddr, 0.U.asTypeOf(s1_req_paddr), s1_real_fire)
487  val s2_exception =
488    RegEnable(s1_exception_out, 0.U.asTypeOf(s1_exception_out), s1_real_fire) // includes itlb/pmp exception
489//  val s2_exception_in = RegEnable(s1_exception_out, 0.U.asTypeOf(s1_exception_out), s1_real_fire)  // disabled for timing consideration
490  val s2_mmio     = RegEnable(s1_mmio, 0.U.asTypeOf(s1_mmio), s1_real_fire)
491  val s2_waymasks = RegEnable(s1_waymasks, 0.U.asTypeOf(s1_waymasks), s1_real_fire)
492//  val s2_meta_codes   = RegEnable(s1_meta_codes,    0.U.asTypeOf(s1_meta_codes),    s1_real_fire)  // disabled for timing consideration
493
494  val s2_req_vSetIdx = s2_req_vaddr.map(get_idx)
495  val s2_req_ptags   = s2_req_paddr.map(get_phy_tag)
496
497  // disabled for timing consideration
498//  // do metaArray ECC check
499//  val s2_meta_corrupt = VecInit((s2_req_ptags zip s2_meta_codes zip s2_waymasks).map{ case ((meta, code), waymask) =>
500//    val hit_num = PopCount(waymask)
501//    // NOTE: if not hit, encodeMetaECC(meta) =/= code can also be true, but we don't care about it
502//    (encodeMetaECC(meta) =/= code && hit_num === 1.U) ||  // hit one way, but parity code does not match, ECC failure
503//      hit_num > 1.U                                       // hit multi way, must be a ECC failure
504//  })
505//
506//  // generate exception
507//  val s2_meta_exception = VecInit(s2_meta_corrupt.map(ExceptionType.fromECC(io.csr_parity_enable, _)))
508//
509//  // merge meta exception and itlb/pmp exception
510//  val s2_exception = ExceptionType.merge(s2_exception_in, s2_meta_exception)
511
512  /**
513    ******************************************************************************
514    * Monitor the requests from missUnit to write to SRAM
515    ******************************************************************************
516    */
517
518  /* NOTE: If fromMSHR.bits.corrupt, we should set s2_MSHR_hits to false.B, and send prefetch requests again.
519   * This is the opposite of how mainPipe handles fromMSHR.bits.corrupt,
520   *   in which we should set s2_MSHR_hits to true.B, and send error to ifu.
521   */
522  val s2_MSHR_match = VecInit((0 until PortNumber).map(i =>
523    (s2_req_vSetIdx(i) === fromMSHR.bits.vSetIdx) &&
524      (s2_req_ptags(i) === getPhyTagFromBlk(fromMSHR.bits.blkPaddr)) &&
525      s2_valid && fromMSHR.valid && !fromMSHR.bits.corrupt
526  ))
527  val s2_MSHR_hits = (0 until PortNumber).map(i => ValidHoldBypass(s2_MSHR_match(i), s2_fire || s2_flush))
528
529  val s2_SRAM_hits = s2_waymasks.map(_.orR)
530  val s2_hits      = VecInit((0 until PortNumber).map(i => s2_MSHR_hits(i) || s2_SRAM_hits(i)))
531
532  /* s2_exception includes itlb pf/gpf/af, pmp af and meta corruption (af), neither of which should be prefetched
533   * mmio should not be prefetched
534   * also, if previous has exception, latter port should also not be prefetched
535   */
536  val s2_miss = VecInit((0 until PortNumber).map { i =>
537    !s2_hits(i) && (if (i == 0) true.B else s2_doubleline) &&
538    !ExceptionType.hasException(s2_exception.take(i + 1)) &&
539    s2_mmio.take(i + 1).map(!_).reduce(_ && _)
540  })
541
542  /**
543    ******************************************************************************
544    * send req to missUnit
545    ******************************************************************************
546    */
547  val toMSHRArbiter = Module(new Arbiter(new ICacheMissReq, PortNumber))
548
549  // To avoid sending duplicate requests.
550  val has_send = RegInit(VecInit(Seq.fill(PortNumber)(false.B)))
551  (0 until PortNumber).foreach { i =>
552    when(s1_real_fire) {
553      has_send(i) := false.B
554    }.elsewhen(toMSHRArbiter.io.in(i).fire) {
555      has_send(i) := true.B
556    }
557  }
558
559  (0 until PortNumber).map { i =>
560    toMSHRArbiter.io.in(i).valid         := s2_valid && s2_miss(i) && !has_send(i)
561    toMSHRArbiter.io.in(i).bits.blkPaddr := getBlkAddr(s2_req_paddr(i))
562    toMSHRArbiter.io.in(i).bits.vSetIdx  := s2_req_vSetIdx(i)
563  }
564
565  toMSHR <> toMSHRArbiter.io.out
566
567  s2_flush := io.flush
568
569  // toMSHRArbiter.io.in(i).fire is not used here for timing consideration
570  // val s2_finish  = (0 until PortNumber).map(i => has_send(i) || !s2_miss(i) || toMSHRArbiter.io.in(i).fire).reduce(_&&_)
571  val s2_finish = (0 until PortNumber).map(i => has_send(i) || !s2_miss(i)).reduce(_ && _)
572  s2_ready := s2_finish || !s2_valid
573  s2_fire  := s2_valid && s2_finish && !s2_flush
574
575  /** PerfAccumulate */
576  // the number of bpu flush
577  XSPerfAccumulate("bpu_s0_flush", from_bpu_s0_flush)
578  XSPerfAccumulate("bpu_s1_flush", from_bpu_s1_flush)
579  // the number of prefetch request received from ftq or backend (software prefetch)
580//  XSPerfAccumulate("prefetch_req_receive", io.req.fire)
581  XSPerfAccumulate("prefetch_req_receive_hw", io.req.fire && !io.req.bits.isSoftPrefetch)
582  XSPerfAccumulate("prefetch_req_receive_sw", io.req.fire && io.req.bits.isSoftPrefetch)
583  // the number of prefetch request sent to missUnit
584//  XSPerfAccumulate("prefetch_req_send", toMSHR.fire)
585  XSPerfAccumulate("prefetch_req_send_hw", toMSHR.fire && !s2_isSoftPrefetch)
586  XSPerfAccumulate("prefetch_req_send_sw", toMSHR.fire && s2_isSoftPrefetch)
587  XSPerfAccumulate("to_missUnit_stall", toMSHR.valid && !toMSHR.ready)
588
589  /**
590    * Count the number of requests that are filtered for various reasons.
591    * The number of prefetch discard in Performance Accumulator may be
592    * a littel larger the number of really discarded. Because there can
593    * be multiple reasons for a canceled request at the same time.
594    */
595  // discard prefetch request by flush
596  // XSPerfAccumulate("fdip_prefetch_discard_by_tlb_except",  p1_discard && p1_tlb_except)
597  // // discard prefetch request by hit icache SRAM
598  // XSPerfAccumulate("fdip_prefetch_discard_by_hit_cache",   p2_discard && p1_meta_hit)
599  // // discard prefetch request by hit wirte SRAM
600  // XSPerfAccumulate("fdip_prefetch_discard_by_p1_monoitor", p1_discard && p1_monitor_hit)
601  // // discard prefetch request by pmp except or mmio
602  // XSPerfAccumulate("fdip_prefetch_discard_by_pmp",         p2_discard && p2_pmp_except)
603  // // discard prefetch request by hit mainPipe info
604  // // XSPerfAccumulate("fdip_prefetch_discard_by_mainPipe",    p2_discard && p2_mainPipe_hit)
605}
606