xref: /XiangShan/src/main/scala/xiangshan/frontend/icache/ICacheMissUnit.scala (revision 602b407ccabf2c6a4276345090d317a49650006a)
1/***************************************************************************************
2* Copyright (c) 2024 Beijing Institute of Open Source Chip (BOSC)
3* Copyright (c) 2020-2024 Institute of Computing Technology, Chinese Academy of Sciences
4* Copyright (c) 2020-2021 Peng Cheng Laboratory
5*
6* XiangShan is licensed under Mulan PSL v2.
7* You can use this software according to the terms and conditions of the Mulan PSL v2.
8* You may obtain a copy of Mulan PSL v2 at:
9*          http://license.coscl.org.cn/MulanPSL2
10*
11* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
12* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
13* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
14*
15* See the Mulan PSL v2 for more details.
16***************************************************************************************/
17
18package xiangshan.frontend.icache
19
20import chisel3._
21import chisel3.util._
22import difftest._
23import freechips.rocketchip.tilelink._
24import org.chipsalliance.cde.config.Parameters
25import utility._
26import xiangshan._
27
28class DeMultiplexerIO[T <: Data](gen: T, n: Int) extends Bundle {
29  val in:     DecoupledIO[T]      = Flipped(DecoupledIO(gen))
30  val out:    Vec[DecoupledIO[T]] = Vec(n, DecoupledIO(gen))
31  val chosen: UInt                = Output(UInt(log2Ceil(n).W))
32}
33
34/** Hardware module that is used to sequence 1 producer into n consumer.
35 * Priority is given to lower producer.
36 */
37class DeMultiplexer[T <: Data](val gen: T, val n: Int) extends Module {
38  require(n >= 2)
39  val io: DeMultiplexerIO[T] = IO(new DeMultiplexerIO(gen, n))
40
41  private val grant = false.B +: (1 until n).map(i => (0 until i).map(io.out(_).ready).reduce(_ || _))
42  (0 until n).foreach { i =>
43    io.out(i).bits  := io.in.bits
44    io.out(i).valid := !grant(i) && io.in.valid
45  }
46
47  io.in.ready := grant.last || io.out.last.ready
48  io.chosen   := PriorityEncoder(VecInit(io.out.map(_.ready)))
49}
50
51class MuxBundleIO[T <: Data](gen: T, n: Int) extends Bundle {
52  val sel: UInt                = Input(UInt(log2Ceil(n).W))
53  val in:  Vec[DecoupledIO[T]] = Flipped(Vec(n, DecoupledIO(gen)))
54  val out: DecoupledIO[T]      = DecoupledIO(gen)
55}
56
57class MuxBundle[T <: Data](val gen: T, val n: Int) extends Module {
58  require(n >= 2)
59  val io: MuxBundleIO[T] = IO(new MuxBundleIO[T](gen, n))
60
61  io.in <> DontCare
62  io.out <> DontCare
63  (0 until n).foreach { i =>
64    when(io.sel === i.U) {
65      io.out <> io.in(i)
66    }
67    io.in(i).ready := (io.sel === i.U) && io.out.ready
68  }
69}
70
71class ICacheMissReq(implicit p: Parameters) extends ICacheBundle {
72  val blkPaddr: UInt = UInt((PAddrBits - blockOffBits).W)
73  val vSetIdx:  UInt = UInt(idxBits.W)
74}
75
76class ICacheMissResp(implicit p: Parameters) extends ICacheBundle {
77  val blkPaddr: UInt = UInt((PAddrBits - blockOffBits).W)
78  val vSetIdx:  UInt = UInt(idxBits.W)
79  val waymask:  UInt = UInt(nWays.W)
80  val data:     UInt = UInt(blockBits.W)
81  val corrupt:  Bool = Bool()
82}
83
84class LookUpMSHR(implicit p: Parameters) extends ICacheBundle {
85  val info: Valid[ICacheMissReq] = ValidIO(new ICacheMissReq)
86  val hit:  Bool                 = Input(Bool())
87}
88
89class MSHRResp(implicit p: Parameters) extends ICacheBundle {
90  val blkPaddr: UInt = UInt((PAddrBits - blockOffBits).W)
91  val vSetIdx:  UInt = UInt(idxBits.W)
92  val way:      UInt = UInt(wayBits.W)
93}
94
95class MSHRAcquire(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheBundle {
96  val acquire: TLBundleA = new TLBundleA(edge.bundle)
97  val vSetIdx: UInt      = UInt(idxBits.W)
98}
99
100class ICacheMSHRIO(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheBundle {
101  val fencei:    Bool                       = Input(Bool())
102  val flush:     Bool                       = Input(Bool())
103  val invalid:   Bool                       = Input(Bool())
104  val req:       DecoupledIO[ICacheMissReq] = Flipped(DecoupledIO(new ICacheMissReq))
105  val acquire:   DecoupledIO[MSHRAcquire]   = DecoupledIO(new MSHRAcquire(edge))
106  val lookUps:   Vec[LookUpMSHR]            = Flipped(Vec(2, new LookUpMSHR))
107  val resp:      Valid[MSHRResp]            = ValidIO(new MSHRResp)
108  val victimWay: UInt                       = Input(UInt(wayBits.W))
109}
110
111class ICacheMSHR(edge: TLEdgeOut, isFetch: Boolean, ID: Int)(implicit p: Parameters) extends ICacheModule {
112  val io: ICacheMSHRIO = IO(new ICacheMSHRIO(edge))
113
114  private val valid = RegInit(Bool(), false.B)
115  // this MSHR doesn't respond to fetch and sram
116  private val flush  = RegInit(Bool(), false.B)
117  private val fencei = RegInit(Bool(), false.B)
118  // this MSHR has been issued
119  private val issue = RegInit(Bool(), false.B)
120
121  private val blkPaddr = RegInit(UInt((PAddrBits - blockOffBits).W), 0.U)
122  private val vSetIdx  = RegInit(UInt(idxBits.W), 0.U)
123  private val way      = RegInit(UInt(wayBits.W), 0.U)
124
125  // look up and return result at the same cycle
126  private val hits = io.lookUps.map { lookup =>
127    valid && !fencei && !flush && (lookup.info.bits.vSetIdx === vSetIdx) &&
128    (lookup.info.bits.blkPaddr === blkPaddr)
129  }
130  // Decoupling valid and bits
131  (0 until 2).foreach(i => io.lookUps(i).hit := hits(i))
132
133  // disable wake up when hit MSHR (fencei is low)
134  // when(hit) {
135  //   flush := false.B
136  // }
137
138  // invalid when the req hasn't been issued
139  when(io.fencei || io.flush) {
140    fencei := true.B
141    flush  := true.B
142    when(!issue) {
143      valid := false.B
144    }
145  }
146
147  // receive request and register
148  io.req.ready := !valid && !io.flush && !io.fencei
149  when(io.req.fire) {
150    valid    := true.B
151    flush    := false.B
152    issue    := false.B
153    fencei   := false.B
154    blkPaddr := io.req.bits.blkPaddr
155    vSetIdx  := io.req.bits.vSetIdx
156  }
157
158  // send request to L2
159  io.acquire.valid := valid && !issue && !io.flush && !io.fencei
160  private val getBlock = edge.Get(
161    fromSource = ID.U,
162    toAddress = Cat(blkPaddr, 0.U(blockOffBits.W)),
163    lgSize = log2Up(cacheParams.blockBytes).U
164  )._2
165  io.acquire.bits.acquire := getBlock
166  io.acquire.bits.acquire.user.lift(ReqSourceKey).foreach(_ := MemReqSource.CPUInst.id.U)
167  io.acquire.bits.vSetIdx := vSetIdx
168
169  // get victim way when acquire fire
170  when(io.acquire.fire) {
171    issue := true.B
172    way   := io.victimWay
173  }
174
175  // invalid request when grant finish
176  when(io.invalid) {
177    valid := false.B
178  }
179
180  // offer the information other than data for write sram and response fetch
181  io.resp.valid         := valid && (!flush && !fencei)
182  io.resp.bits.blkPaddr := blkPaddr
183  io.resp.bits.vSetIdx  := vSetIdx
184  io.resp.bits.way      := way
185}
186
187class ICacheMissUnitIO(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheBundle {
188  // difftest
189  val hartId: Bool = Input(Bool())
190  // control
191  val fencei: Bool = Input(Bool())
192  val flush:  Bool = Input(Bool())
193  // fetch
194  val fetch_req:  DecoupledIO[ICacheMissReq] = Flipped(DecoupledIO(new ICacheMissReq))
195  val fetch_resp: Valid[ICacheMissResp]      = ValidIO(new ICacheMissResp)
196  // prefetch
197  val prefetch_req: DecoupledIO[ICacheMissReq] = Flipped(DecoupledIO(new ICacheMissReq))
198  // SRAM Write Req
199  val meta_write: DecoupledIO[ICacheMetaWriteBundle] = DecoupledIO(new ICacheMetaWriteBundle)
200  val data_write: DecoupledIO[ICacheDataWriteBundle] = DecoupledIO(new ICacheDataWriteBundle)
201  // get victim from replacer
202  val victim: ReplacerVictim = new ReplacerVictim
203  // Tilelink
204  val mem_acquire: DecoupledIO[TLBundleA] = DecoupledIO(new TLBundleA(edge.bundle))
205  val mem_grant:   DecoupledIO[TLBundleD] = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
206}
207
208class ICacheMissUnit(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheModule {
209  val io: ICacheMissUnitIO = IO(new ICacheMissUnitIO(edge))
210
211  /**
212    ******************************************************************************
213    * fetch have higher priority
214    * fetch MSHR: lower index have a higher priority
215    * prefetch MSHR: the prefetchMSHRs earlier have a higher priority
216    *                 ---------       --------------       -----------
217    * ---fetch reg--->| Demux |-----> | fetch MSHR |------>| Arbiter |---acquire--->
218    *                 ---------       --------------       -----------
219    *                                 | fetch MSHR |            ^
220    *                                 --------------            |
221    *                                                           |
222    *                                -----------------          |
223    *                                | prefetch MSHR |          |
224    *                 ---------      -----------------     -----------
225    * ---fetch reg--->| Demux |----> | prefetch MSHR |---->| Arbiter |
226    *                 ---------      -----------------     -----------
227    *                                |    .......    |
228    *                                -----------------
229    ******************************************************************************
230    */
231
232  private val fetchDemux    = Module(new DeMultiplexer(new ICacheMissReq, nFetchMshr))
233  private val prefetchDemux = Module(new DeMultiplexer(new ICacheMissReq, nPrefetchMshr))
234  private val prefetchArb   = Module(new MuxBundle(new MSHRAcquire(edge), nPrefetchMshr))
235  private val acquireArb    = Module(new Arbiter(new MSHRAcquire(edge), nFetchMshr + 1))
236
237  // To avoid duplicate request reception.
238  private val fetchHit    = Wire(Bool())
239  private val prefetchHit = Wire(Bool())
240  fetchDemux.io.in <> io.fetch_req
241  fetchDemux.io.in.valid := io.fetch_req.valid && !fetchHit
242  io.fetch_req.ready     := fetchDemux.io.in.ready || fetchHit
243  prefetchDemux.io.in <> io.prefetch_req
244  prefetchDemux.io.in.valid := io.prefetch_req.valid && !prefetchHit
245  io.prefetch_req.ready     := prefetchDemux.io.in.ready || prefetchHit
246  acquireArb.io.in.last <> prefetchArb.io.out
247
248  // mem_acquire connect
249  io.mem_acquire.valid    := acquireArb.io.out.valid
250  io.mem_acquire.bits     := acquireArb.io.out.bits.acquire
251  acquireArb.io.out.ready := io.mem_acquire.ready
252
253  private val fetchMSHRs = (0 until nFetchMshr).map { i =>
254    val mshr = Module(new ICacheMSHR(edge, true, i))
255    mshr.io.flush  := false.B
256    mshr.io.fencei := io.fencei
257    mshr.io.req <> fetchDemux.io.out(i)
258    mshr.io.lookUps(0).info.valid := io.fetch_req.valid
259    mshr.io.lookUps(0).info.bits  := io.fetch_req.bits
260    mshr.io.lookUps(1).info.valid := io.prefetch_req.valid
261    mshr.io.lookUps(1).info.bits  := io.prefetch_req.bits
262    mshr.io.victimWay             := io.victim.way
263    acquireArb.io.in(i) <> mshr.io.acquire
264    mshr
265  }
266
267  private val prefetchMSHRs = (0 until nPrefetchMshr).map { i =>
268    val mshr = Module(new ICacheMSHR(edge, false, nFetchMshr + i))
269    mshr.io.flush  := io.flush
270    mshr.io.fencei := io.fencei
271    mshr.io.req <> prefetchDemux.io.out(i)
272    mshr.io.lookUps(0).info.valid := io.fetch_req.valid
273    mshr.io.lookUps(0).info.bits  := io.fetch_req.bits
274    mshr.io.lookUps(1).info.valid := io.prefetch_req.valid
275    mshr.io.lookUps(1).info.bits  := io.prefetch_req.bits
276    mshr.io.victimWay             := io.victim.way
277    prefetchArb.io.in(i) <> mshr.io.acquire
278    mshr
279  }
280
281  /**
282    ******************************************************************************
283    * MSHR look up
284    * - look up all mshr
285    ******************************************************************************
286    */
287  private val allMSHRs = fetchMSHRs ++ prefetchMSHRs
288  private val prefetchHitFetchReq = (io.prefetch_req.bits.blkPaddr === io.fetch_req.bits.blkPaddr) &&
289    (io.prefetch_req.bits.vSetIdx === io.fetch_req.bits.vSetIdx) &&
290    io.fetch_req.valid
291  fetchHit    := allMSHRs.map(mshr => mshr.io.lookUps(0).hit).reduce(_ || _)
292  prefetchHit := allMSHRs.map(mshr => mshr.io.lookUps(1).hit).reduce(_ || _) || prefetchHitFetchReq
293
294  /**
295    ******************************************************************************
296    * prefetchMSHRs priority
297    * - The requests that enter the prefetchMSHRs earlier have a higher priority in issuing.
298    * - The order of enqueuing is recorded in FIFO when request enters MSHRs.
299    * - The requests are dispatched in the order they are recorded in FIFO.
300    ******************************************************************************
301    */
302  // When the FIFO is full, enqueue and dequeue operations do not occur at the same cycle.
303  // So the depth of the FIFO is set to match the number of MSHRs.
304  // val priorityFIFO = Module(new Queue(UInt(log2Ceil(nPrefetchMshr).W), nPrefetchMshr, hasFlush=true))
305  private val priorityFIFO = Module(new FIFOReg(UInt(log2Ceil(nPrefetchMshr).W), nPrefetchMshr, hasFlush = true))
306  priorityFIFO.io.flush.get := io.flush || io.fencei
307  priorityFIFO.io.enq.valid := prefetchDemux.io.in.fire
308  priorityFIFO.io.enq.bits  := prefetchDemux.io.chosen
309  priorityFIFO.io.deq.ready := prefetchArb.io.out.fire
310  prefetchArb.io.sel        := priorityFIFO.io.deq.bits
311  assert(
312    !(priorityFIFO.io.enq.fire ^ prefetchDemux.io.in.fire),
313    "priorityFIFO.io.enq and io.prefetch_req must fire at the same cycle"
314  )
315  assert(
316    !(priorityFIFO.io.deq.fire ^ prefetchArb.io.out.fire),
317    "priorityFIFO.io.deq and prefetchArb.io.out must fire at the same cycle"
318  )
319
320  /**
321    ******************************************************************************
322    * Tilelink D channel (grant)
323    ******************************************************************************
324    */
325  // cacheline register
326  private val readBeatCnt = RegInit(UInt(log2Up(refillCycles).W), 0.U)
327  private val respDataReg = RegInit(VecInit(Seq.fill(refillCycles)(0.U(beatBits.W))))
328
329  private val wait_last = readBeatCnt === (refillCycles - 1).U
330  when(io.mem_grant.fire && edge.hasData(io.mem_grant.bits)) {
331    respDataReg(readBeatCnt) := io.mem_grant.bits.data
332    readBeatCnt              := Mux(wait_last, 0.U, readBeatCnt + 1.U)
333  }
334
335  // last transition finish or corrupt
336  private val last_fire = io.mem_grant.fire && edge.hasData(io.mem_grant.bits) && wait_last
337
338  private val (_, _, refill_done, _) = edge.addr_inc(io.mem_grant)
339  assert(!(refill_done ^ last_fire), "refill not done!")
340  io.mem_grant.ready := true.B
341
342  private val last_fire_r = RegNext(last_fire)
343  private val id_r        = RegNext(io.mem_grant.bits.source)
344
345  // if any beat is corrupt, the whole response (to mainPipe/metaArray/dataArray) is corrupt
346  private val corrupt_r = RegInit(false.B)
347  when(io.mem_grant.fire && edge.hasData(io.mem_grant.bits) && io.mem_grant.bits.corrupt) {
348    // Set corrupt_r when any beat is corrupt
349    // This is actually when(xxx.fire && xxx.hasData) { corrupt_r := corrupt_r || io.mem_grant.bits.corrupt }
350    corrupt_r := true.B
351  }.elsewhen(last_fire_r) {
352    // Clear corrupt_r when response it sent to mainPipe
353    // This used to be io.fetch_resp.valid (last_fire_r && mshr_valid) but when mshr is flushed by io.flush/fencei,
354    // mshr_valid is false.B and corrupt_r will never be cleared, that's not correct
355    // so we remove mshr_valid here, and the condition leftover is last_fire_r
356    // or, actually, io.fetch_resp.valid || (last_fire_r && !mshr_valid)
357    corrupt_r := false.B
358  }
359
360  /**
361    ******************************************************************************
362    * invalid mshr when finish transition
363    ******************************************************************************
364    */
365  (0 until (nFetchMshr + nPrefetchMshr)).foreach(i => allMSHRs(i).io.invalid := last_fire_r && (id_r === i.U))
366
367  /**
368    ******************************************************************************
369    * response fetch and write SRAM
370    ******************************************************************************
371    */
372  // get request information from MSHRs
373  private val allMSHRs_resp = VecInit(allMSHRs.map(mshr => mshr.io.resp))
374  // select MSHR response 1 cycle before sending response to mainPipe/prefetchPipe for better timing
375  private val mshr_resp =
376    RegEnable(allMSHRs_resp(io.mem_grant.bits.source).bits, 0.U.asTypeOf(allMSHRs_resp(0).bits), last_fire)
377  // we can latch mshr.io.resp.bits since they are set on req.fire or acquire.fire, and keeps unchanged during response
378  // however, we should not latch mshr.io.resp.valid, since io.flush/fencei may clear it at any time
379  private val mshr_valid = allMSHRs_resp(id_r).valid
380
381  // get waymask from replacer when acquire fire
382  io.victim.vSetIdx.valid := acquireArb.io.out.fire
383  io.victim.vSetIdx.bits  := acquireArb.io.out.bits.vSetIdx
384  private val waymask = UIntToOH(mshr_resp.way)
385  // NOTE: when flush/fencei, missUnit will still send response to mainPipe/prefetchPipe
386  //       this is intentional to fix timing (io.flush -> mainPipe/prefetchPipe s2_miss -> s2_ready -> ftq ready)
387  //       unnecessary response will be dropped by mainPipe/prefetchPipe/wayLookup since their sx_valid is set to false
388  private val fetch_resp_valid = mshr_valid && last_fire_r
389  // NOTE: but we should not write meta/dataArray when flush/fencei
390  private val write_sram_valid = fetch_resp_valid && !corrupt_r && !io.flush && !io.fencei
391
392  // write SRAM
393  io.meta_write.bits.generate(
394    tag = getPhyTagFromBlk(mshr_resp.blkPaddr),
395    idx = mshr_resp.vSetIdx,
396    waymask = waymask,
397    bankIdx = mshr_resp.vSetIdx(0),
398    poison = false.B
399  )
400  io.data_write.bits.generate(
401    data = respDataReg.asUInt,
402    idx = mshr_resp.vSetIdx,
403    waymask = waymask,
404    bankIdx = mshr_resp.vSetIdx(0),
405    poison = false.B
406  )
407
408  io.meta_write.valid := write_sram_valid
409  io.data_write.valid := write_sram_valid
410
411  // response fetch
412  io.fetch_resp.valid         := fetch_resp_valid
413  io.fetch_resp.bits.blkPaddr := mshr_resp.blkPaddr
414  io.fetch_resp.bits.vSetIdx  := mshr_resp.vSetIdx
415  io.fetch_resp.bits.waymask  := waymask
416  io.fetch_resp.bits.data     := respDataReg.asUInt
417  io.fetch_resp.bits.corrupt  := corrupt_r
418
419  /**
420    ******************************************************************************
421    * performance counter
422    ******************************************************************************
423    */
424  // Duplicate requests will be excluded.
425  XSPerfAccumulate("enq_fetch_req", fetchDemux.io.in.fire)
426  XSPerfAccumulate("enq_prefetch_req", prefetchDemux.io.in.fire)
427
428  /**
429    ******************************************************************************
430    * ChiselDB: record ICache SRAM write log
431    ******************************************************************************
432    */
433  private class ICacheSRAMDB(implicit p: Parameters) extends ICacheBundle {
434    val blkPaddr: UInt = UInt((PAddrBits - blockOffBits).W)
435    val vSetIdx:  UInt = UInt(idxBits.W)
436    val waymask:  UInt = UInt(wayBits.W)
437  }
438
439  private val isWriteICacheSRAMTable =
440    WireInit(Constantin.createRecord("isWriteICacheSRAMTable" + p(XSCoreParamsKey).HartId.toString))
441  private val ICacheSRAMTable =
442    ChiselDB.createTable("ICacheSRAMTable" + p(XSCoreParamsKey).HartId.toString, new ICacheSRAMDB)
443
444  private val ICacheSRAMDBDumpData = Wire(new ICacheSRAMDB)
445  ICacheSRAMDBDumpData.blkPaddr := mshr_resp.blkPaddr
446  ICacheSRAMDBDumpData.vSetIdx  := mshr_resp.vSetIdx
447  ICacheSRAMDBDumpData.waymask  := OHToUInt(waymask)
448  ICacheSRAMTable.log(
449    data = ICacheSRAMDBDumpData,
450    en = write_sram_valid,
451    clock = clock,
452    reset = reset
453  )
454
455  /**
456    ******************************************************************************
457    * Difftest
458    ******************************************************************************
459    */
460  if (env.EnableDifftest) {
461    val difftest = DifftestModule(new DiffRefillEvent, dontCare = true)
462    difftest.coreid := io.hartId
463    difftest.index  := 0.U
464    difftest.valid  := write_sram_valid
465    difftest.addr   := Cat(mshr_resp.blkPaddr, 0.U(blockOffBits.W))
466    difftest.data   := respDataReg.asTypeOf(difftest.data)
467    difftest.idtfr  := DontCare
468  }
469}
470