xref: /XiangShan/src/main/scala/xiangshan/frontend/icache/ICacheMissUnit.scala (revision dc4fac130426dbec49b49d778b9105d79b4a8eab)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.frontend.icache
18
19import chisel3._
20import chisel3.util._
21import difftest._
22import freechips.rocketchip.tilelink._
23import org.chipsalliance.cde.config.Parameters
24import utility._
25import xiangshan._
26
27class DeMultiplexerIO[T <: Data](gen: T, n: Int) extends Bundle {
28  val in:     DecoupledIO[T]      = Flipped(DecoupledIO(gen))
29  val out:    Vec[DecoupledIO[T]] = Vec(n, DecoupledIO(gen))
30  val chosen: UInt                = Output(UInt(log2Ceil(n).W))
31}
32
33/** Hardware module that is used to sequence 1 producer into n consumer.
34 * Priority is given to lower producer.
35 */
36class DeMultiplexer[T <: Data](val gen: T, val n: Int) extends Module {
37  require(n >= 2)
38  val io: DeMultiplexerIO[T] = IO(new DeMultiplexerIO(gen, n))
39
40  private val grant = false.B +: (1 until n).map(i => (0 until i).map(io.out(_).ready).reduce(_ || _))
41  (0 until n).foreach { i =>
42    io.out(i).bits  := io.in.bits
43    io.out(i).valid := !grant(i) && io.in.valid
44  }
45
46  io.in.ready := grant.last || io.out.last.ready
47  io.chosen   := PriorityEncoder(VecInit(io.out.map(_.ready)))
48}
49
50class MuxBundleIO[T <: Data](gen: T, n: Int) extends Bundle {
51  val sel: UInt                = Input(UInt(log2Ceil(n).W))
52  val in:  Vec[DecoupledIO[T]] = Flipped(Vec(n, DecoupledIO(gen)))
53  val out: DecoupledIO[T]      = DecoupledIO(gen)
54}
55
56class MuxBundle[T <: Data](val gen: T, val n: Int) extends Module {
57  require(n >= 2)
58  val io: MuxBundleIO[T] = IO(new MuxBundleIO[T](gen, n))
59
60  io.in <> DontCare
61  io.out <> DontCare
62  (0 until n).foreach { i =>
63    when(io.sel === i.U) {
64      io.out <> io.in(i)
65    }
66    io.in(i).ready := (io.sel === i.U) && io.out.ready
67  }
68}
69
70class ICacheMissReq(implicit p: Parameters) extends ICacheBundle {
71  val blkPaddr: UInt = UInt((PAddrBits - blockOffBits).W)
72  val vSetIdx:  UInt = UInt(idxBits.W)
73}
74
75class ICacheMissResp(implicit p: Parameters) extends ICacheBundle {
76  val blkPaddr: UInt = UInt((PAddrBits - blockOffBits).W)
77  val vSetIdx:  UInt = UInt(idxBits.W)
78  val waymask:  UInt = UInt(nWays.W)
79  val data:     UInt = UInt(blockBits.W)
80  val corrupt:  Bool = Bool()
81}
82
83class LookUpMSHR(implicit p: Parameters) extends ICacheBundle {
84  val info: Valid[ICacheMissReq] = ValidIO(new ICacheMissReq)
85  val hit:  Bool                 = Input(Bool())
86}
87
88class MSHRResp(implicit p: Parameters) extends ICacheBundle {
89  val blkPaddr: UInt = UInt((PAddrBits - blockOffBits).W)
90  val vSetIdx:  UInt = UInt(idxBits.W)
91  val way:      UInt = UInt(wayBits.W)
92}
93
94class MSHRAcquire(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheBundle {
95  val acquire: TLBundleA = new TLBundleA(edge.bundle)
96  val vSetIdx: UInt      = UInt(idxBits.W)
97}
98
99class ICacheMSHRIO(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheBundle {
100  val fencei:    Bool                       = Input(Bool())
101  val flush:     Bool                       = Input(Bool())
102  val invalid:   Bool                       = Input(Bool())
103  val req:       DecoupledIO[ICacheMissReq] = Flipped(DecoupledIO(new ICacheMissReq))
104  val acquire:   DecoupledIO[MSHRAcquire]   = DecoupledIO(new MSHRAcquire(edge))
105  val lookUps:   Vec[LookUpMSHR]            = Flipped(Vec(2, new LookUpMSHR))
106  val resp:      Valid[MSHRResp]            = ValidIO(new MSHRResp)
107  val victimWay: UInt                       = Input(UInt(wayBits.W))
108}
109
110class ICacheMSHR(edge: TLEdgeOut, isFetch: Boolean, ID: Int)(implicit p: Parameters) extends ICacheModule {
111  val io: ICacheMSHRIO = IO(new ICacheMSHRIO(edge))
112
113  private val valid = RegInit(Bool(), false.B)
114  // this MSHR doesn't respond to fetch and sram
115  private val flush  = RegInit(Bool(), false.B)
116  private val fencei = RegInit(Bool(), false.B)
117  // this MSHR has been issued
118  private val issue = RegInit(Bool(), false.B)
119
120  private val blkPaddr = RegInit(UInt((PAddrBits - blockOffBits).W), 0.U)
121  private val vSetIdx  = RegInit(UInt(idxBits.W), 0.U)
122  private val way      = RegInit(UInt(wayBits.W), 0.U)
123
124  // look up and return result at the same cycle
125  private val hits = io.lookUps.map { lookup =>
126    valid && !fencei && !flush && (lookup.info.bits.vSetIdx === vSetIdx) &&
127    (lookup.info.bits.blkPaddr === blkPaddr)
128  }
129  // Decoupling valid and bits
130  (0 until 2).foreach(i => io.lookUps(i).hit := hits(i))
131
132  // disable wake up when hit MSHR (fencei is low)
133  // when(hit) {
134  //   flush := false.B
135  // }
136
137  // invalid when the req hasn't been issued
138  when(io.fencei || io.flush) {
139    fencei := true.B
140    flush  := true.B
141    when(!issue) {
142      valid := false.B
143    }
144  }
145
146  // receive request and register
147  io.req.ready := !valid && !io.flush && !io.fencei
148  when(io.req.fire) {
149    valid    := true.B
150    flush    := false.B
151    issue    := false.B
152    fencei   := false.B
153    blkPaddr := io.req.bits.blkPaddr
154    vSetIdx  := io.req.bits.vSetIdx
155  }
156
157  // send request to L2
158  io.acquire.valid := valid && !issue && !io.flush && !io.fencei
159  private val getBlock = edge.Get(
160    fromSource = ID.U,
161    toAddress = Cat(blkPaddr, 0.U(blockOffBits.W)),
162    lgSize = log2Up(cacheParams.blockBytes).U
163  )._2
164  io.acquire.bits.acquire := getBlock
165  io.acquire.bits.acquire.user.lift(ReqSourceKey).foreach(_ := MemReqSource.CPUInst.id.U)
166  io.acquire.bits.vSetIdx := vSetIdx
167
168  // get victim way when acquire fire
169  when(io.acquire.fire) {
170    issue := true.B
171    way   := io.victimWay
172  }
173
174  // invalid request when grant finish
175  when(io.invalid) {
176    valid := false.B
177  }
178
179  // offer the information other than data for write sram and response fetch
180  io.resp.valid         := valid && (!flush && !fencei)
181  io.resp.bits.blkPaddr := blkPaddr
182  io.resp.bits.vSetIdx  := vSetIdx
183  io.resp.bits.way      := way
184}
185
186class ICacheMissUnitIO(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheBundle {
187  // difftest
188  val hartId: Bool = Input(Bool())
189  // control
190  val fencei: Bool = Input(Bool())
191  val flush:  Bool = Input(Bool())
192  // fetch
193  val fetch_req:  DecoupledIO[ICacheMissReq] = Flipped(DecoupledIO(new ICacheMissReq))
194  val fetch_resp: Valid[ICacheMissResp]      = ValidIO(new ICacheMissResp)
195  // prefetch
196  val prefetch_req: DecoupledIO[ICacheMissReq] = Flipped(DecoupledIO(new ICacheMissReq))
197  // SRAM Write Req
198  val meta_write: DecoupledIO[ICacheMetaWriteBundle] = DecoupledIO(new ICacheMetaWriteBundle)
199  val data_write: DecoupledIO[ICacheDataWriteBundle] = DecoupledIO(new ICacheDataWriteBundle)
200  // get victim from replacer
201  val victim: ReplacerVictim = new ReplacerVictim
202  // Tilelink
203  val mem_acquire: DecoupledIO[TLBundleA] = DecoupledIO(new TLBundleA(edge.bundle))
204  val mem_grant:   DecoupledIO[TLBundleD] = Flipped(DecoupledIO(new TLBundleD(edge.bundle)))
205}
206
207class ICacheMissUnit(edge: TLEdgeOut)(implicit p: Parameters) extends ICacheModule {
208  val io: ICacheMissUnitIO = IO(new ICacheMissUnitIO(edge))
209
210  /**
211    ******************************************************************************
212    * fetch have higher priority
213    * fetch MSHR: lower index have a higher priority
214    * prefetch MSHR: the prefetchMSHRs earlier have a higher priority
215    *                 ---------       --------------       -----------
216    * ---fetch reg--->| Demux |-----> | fetch MSHR |------>| Arbiter |---acquire--->
217    *                 ---------       --------------       -----------
218    *                                 | fetch MSHR |            ^
219    *                                 --------------            |
220    *                                                           |
221    *                                -----------------          |
222    *                                | prefetch MSHR |          |
223    *                 ---------      -----------------     -----------
224    * ---fetch reg--->| Demux |----> | prefetch MSHR |---->| Arbiter |
225    *                 ---------      -----------------     -----------
226    *                                |    .......    |
227    *                                -----------------
228    ******************************************************************************
229    */
230
231  private val fetchDemux    = Module(new DeMultiplexer(new ICacheMissReq, nFetchMshr))
232  private val prefetchDemux = Module(new DeMultiplexer(new ICacheMissReq, nPrefetchMshr))
233  private val prefetchArb   = Module(new MuxBundle(new MSHRAcquire(edge), nPrefetchMshr))
234  private val acquireArb    = Module(new Arbiter(new MSHRAcquire(edge), nFetchMshr + 1))
235
236  // To avoid duplicate request reception.
237  private val fetchHit    = Wire(Bool())
238  private val prefetchHit = Wire(Bool())
239  fetchDemux.io.in <> io.fetch_req
240  fetchDemux.io.in.valid := io.fetch_req.valid && !fetchHit
241  io.fetch_req.ready     := fetchDemux.io.in.ready || fetchHit
242  prefetchDemux.io.in <> io.prefetch_req
243  prefetchDemux.io.in.valid := io.prefetch_req.valid && !prefetchHit
244  io.prefetch_req.ready     := prefetchDemux.io.in.ready || prefetchHit
245  acquireArb.io.in.last <> prefetchArb.io.out
246
247  // mem_acquire connect
248  io.mem_acquire.valid    := acquireArb.io.out.valid
249  io.mem_acquire.bits     := acquireArb.io.out.bits.acquire
250  acquireArb.io.out.ready := io.mem_acquire.ready
251
252  private val fetchMSHRs = (0 until nFetchMshr).map { i =>
253    val mshr = Module(new ICacheMSHR(edge, true, i))
254    mshr.io.flush  := false.B
255    mshr.io.fencei := io.fencei
256    mshr.io.req <> fetchDemux.io.out(i)
257    mshr.io.lookUps(0).info.valid := io.fetch_req.valid
258    mshr.io.lookUps(0).info.bits  := io.fetch_req.bits
259    mshr.io.lookUps(1).info.valid := io.prefetch_req.valid
260    mshr.io.lookUps(1).info.bits  := io.prefetch_req.bits
261    mshr.io.victimWay             := io.victim.way
262    acquireArb.io.in(i) <> mshr.io.acquire
263    mshr
264  }
265
266  private val prefetchMSHRs = (0 until nPrefetchMshr).map { i =>
267    val mshr = Module(new ICacheMSHR(edge, false, nFetchMshr + i))
268    mshr.io.flush  := io.flush
269    mshr.io.fencei := io.fencei
270    mshr.io.req <> prefetchDemux.io.out(i)
271    mshr.io.lookUps(0).info.valid := io.fetch_req.valid
272    mshr.io.lookUps(0).info.bits  := io.fetch_req.bits
273    mshr.io.lookUps(1).info.valid := io.prefetch_req.valid
274    mshr.io.lookUps(1).info.bits  := io.prefetch_req.bits
275    mshr.io.victimWay             := io.victim.way
276    prefetchArb.io.in(i) <> mshr.io.acquire
277    mshr
278  }
279
280  /**
281    ******************************************************************************
282    * MSHR look up
283    * - look up all mshr
284    ******************************************************************************
285    */
286  private val allMSHRs = fetchMSHRs ++ prefetchMSHRs
287  private val prefetchHitFetchReq = (io.prefetch_req.bits.blkPaddr === io.fetch_req.bits.blkPaddr) &&
288    (io.prefetch_req.bits.vSetIdx === io.fetch_req.bits.vSetIdx) &&
289    io.fetch_req.valid
290  fetchHit    := allMSHRs.map(mshr => mshr.io.lookUps(0).hit).reduce(_ || _)
291  prefetchHit := allMSHRs.map(mshr => mshr.io.lookUps(1).hit).reduce(_ || _) || prefetchHitFetchReq
292
293  /**
294    ******************************************************************************
295    * prefetchMSHRs priority
296    * - The requests that enter the prefetchMSHRs earlier have a higher priority in issuing.
297    * - The order of enqueuing is recorded in FIFO when request enters MSHRs.
298    * - The requests are dispatched in the order they are recorded in FIFO.
299    ******************************************************************************
300    */
301  // When the FIFO is full, enqueue and dequeue operations do not occur at the same cycle.
302  // So the depth of the FIFO is set to match the number of MSHRs.
303  // val priorityFIFO = Module(new Queue(UInt(log2Ceil(nPrefetchMshr).W), nPrefetchMshr, hasFlush=true))
304  private val priorityFIFO = Module(new FIFOReg(UInt(log2Ceil(nPrefetchMshr).W), nPrefetchMshr, hasFlush = true))
305  priorityFIFO.io.flush.get := io.flush || io.fencei
306  priorityFIFO.io.enq.valid := prefetchDemux.io.in.fire
307  priorityFIFO.io.enq.bits  := prefetchDemux.io.chosen
308  priorityFIFO.io.deq.ready := prefetchArb.io.out.fire
309  prefetchArb.io.sel        := priorityFIFO.io.deq.bits
310  assert(
311    !(priorityFIFO.io.enq.fire ^ prefetchDemux.io.in.fire),
312    "priorityFIFO.io.enq and io.prefetch_req must fire at the same cycle"
313  )
314  assert(
315    !(priorityFIFO.io.deq.fire ^ prefetchArb.io.out.fire),
316    "priorityFIFO.io.deq and prefetchArb.io.out must fire at the same cycle"
317  )
318
319  /**
320    ******************************************************************************
321    * Tilelink D channel (grant)
322    ******************************************************************************
323    */
324  // cacheline register
325  private val readBeatCnt = RegInit(UInt(log2Up(refillCycles).W), 0.U)
326  private val respDataReg = RegInit(VecInit(Seq.fill(refillCycles)(0.U(beatBits.W))))
327
328  private val wait_last = readBeatCnt === (refillCycles - 1).U
329  when(io.mem_grant.fire && edge.hasData(io.mem_grant.bits)) {
330    respDataReg(readBeatCnt) := io.mem_grant.bits.data
331    readBeatCnt              := Mux(wait_last, 0.U, readBeatCnt + 1.U)
332  }
333
334  // last transition finish or corrupt
335  private val last_fire = io.mem_grant.fire && edge.hasData(io.mem_grant.bits) && wait_last
336
337  private val (_, _, refill_done, _) = edge.addr_inc(io.mem_grant)
338  assert(!(refill_done ^ last_fire), "refill not done!")
339  io.mem_grant.ready := true.B
340
341  private val last_fire_r = RegNext(last_fire)
342  private val id_r        = RegNext(io.mem_grant.bits.source)
343
344  // if any beat is corrupt, the whole response (to mainPipe/metaArray/dataArray) is corrupt
345  private val corrupt_r = RegInit(false.B)
346  when(io.mem_grant.fire && edge.hasData(io.mem_grant.bits) && io.mem_grant.bits.corrupt) {
347    corrupt_r := true.B
348  }.elsewhen(io.fetch_resp.fire) {
349    corrupt_r := false.B
350  }
351
352  /**
353    ******************************************************************************
354    * invalid mshr when finish transition
355    ******************************************************************************
356    */
357  (0 until (nFetchMshr + nPrefetchMshr)).foreach(i => allMSHRs(i).io.invalid := last_fire_r && (id_r === i.U))
358
359  /**
360    ******************************************************************************
361    * response fetch and write SRAM
362    ******************************************************************************
363    */
364  // get request information from MSHRs
365  private val allMSHRs_resp = VecInit(allMSHRs.map(mshr => mshr.io.resp))
366  private val mshr_resp     = allMSHRs_resp(id_r)
367
368  // get waymask from replacer when acquire fire
369  io.victim.vSetIdx.valid := acquireArb.io.out.fire
370  io.victim.vSetIdx.bits  := acquireArb.io.out.bits.vSetIdx
371  private val waymask = UIntToOH(mshr_resp.bits.way)
372  // NOTE: when flush/fencei, missUnit will still send response to mainPipe/prefetchPipe
373  //       this is intentional to fix timing (io.flush -> mainPipe/prefetchPipe s2_miss -> s2_ready -> ftq ready)
374  //       unnecessary response will be dropped by mainPipe/prefetchPipe/wayLookup since their sx_valid is set to false
375  private val fetch_resp_valid = mshr_resp.valid && last_fire_r
376  // NOTE: but we should not write meta/dataArray when flush/fencei
377  private val write_sram_valid = fetch_resp_valid && !corrupt_r && !io.flush && !io.fencei
378
379  // write SRAM
380  io.meta_write.bits.generate(
381    tag = getPhyTagFromBlk(mshr_resp.bits.blkPaddr),
382    idx = mshr_resp.bits.vSetIdx,
383    waymask = waymask,
384    bankIdx = mshr_resp.bits.vSetIdx(0)
385  )
386  io.data_write.bits.generate(
387    data = respDataReg.asUInt,
388    idx = mshr_resp.bits.vSetIdx,
389    waymask = waymask,
390    bankIdx = mshr_resp.bits.vSetIdx(0)
391  )
392
393  io.meta_write.valid := write_sram_valid
394  io.data_write.valid := write_sram_valid
395
396  // response fetch
397  io.fetch_resp.valid         := fetch_resp_valid
398  io.fetch_resp.bits.blkPaddr := mshr_resp.bits.blkPaddr
399  io.fetch_resp.bits.vSetIdx  := mshr_resp.bits.vSetIdx
400  io.fetch_resp.bits.waymask  := waymask
401  io.fetch_resp.bits.data     := respDataReg.asUInt
402  io.fetch_resp.bits.corrupt  := corrupt_r
403
404  /**
405    ******************************************************************************
406    * performance counter
407    ******************************************************************************
408    */
409  // Duplicate requests will be excluded.
410  XSPerfAccumulate("enq_fetch_req", fetchDemux.io.in.fire)
411  XSPerfAccumulate("enq_prefetch_req", prefetchDemux.io.in.fire)
412
413  /**
414    ******************************************************************************
415    * ChiselDB: record ICache SRAM write log
416    ******************************************************************************
417    */
418  private class ICacheSRAMDB(implicit p: Parameters) extends ICacheBundle {
419    val blkPaddr: UInt = UInt((PAddrBits - blockOffBits).W)
420    val vSetIdx:  UInt = UInt(idxBits.W)
421    val waymask:  UInt = UInt(wayBits.W)
422  }
423
424  private val isWriteICacheSRAMTable =
425    WireInit(Constantin.createRecord("isWriteICacheSRAMTable" + p(XSCoreParamsKey).HartId.toString))
426  private val ICacheSRAMTable =
427    ChiselDB.createTable("ICacheSRAMTable" + p(XSCoreParamsKey).HartId.toString, new ICacheSRAMDB)
428
429  private val ICacheSRAMDBDumpData = Wire(new ICacheSRAMDB)
430  ICacheSRAMDBDumpData.blkPaddr := mshr_resp.bits.blkPaddr
431  ICacheSRAMDBDumpData.vSetIdx  := mshr_resp.bits.vSetIdx
432  ICacheSRAMDBDumpData.waymask  := OHToUInt(waymask)
433  ICacheSRAMTable.log(
434    data = ICacheSRAMDBDumpData,
435    en = write_sram_valid,
436    clock = clock,
437    reset = reset
438  )
439
440  /**
441    ******************************************************************************
442    * Difftest
443    ******************************************************************************
444    */
445  if (env.EnableDifftest) {
446    val difftest = DifftestModule(new DiffRefillEvent, dontCare = true)
447    difftest.coreid := io.hartId
448    difftest.index  := 0.U
449    difftest.valid  := write_sram_valid
450    difftest.addr   := Cat(mshr_resp.bits.blkPaddr, 0.U(blockOffBits.W))
451    difftest.data   := respDataReg.asTypeOf(difftest.data)
452    difftest.idtfr  := DontCare
453  }
454}
455