xref: /XiangShan/src/main/scala/xiangshan/cache/mmu/Repeater.scala (revision ef6723f9795e8222d080df5d74a2a307c1e68a86)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache.mmu
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import xiangshan._
23import xiangshan.cache.{HasDCacheParameters, MemoryOpConstants}
24import utils._
25import utility._
26import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
27import freechips.rocketchip.tilelink._
28
29class PTWReapterIO(Width: Int)(implicit p: Parameters) extends MMUIOBaseBundle {
30  val tlb = Flipped(new TlbPtwIO(Width))
31  val ptw = new TlbPtwIO
32
33  def apply(tlb: TlbPtwIO, ptw: TlbPtwIO, sfence: SfenceBundle, csr: TlbCsrBundle): Unit = {
34    this.tlb <> tlb
35    this.ptw <> ptw
36    this.sfence <> sfence
37    this.csr <> csr
38  }
39
40  def apply(tlb: TlbPtwIO, sfence: SfenceBundle, csr: TlbCsrBundle): Unit = {
41    this.tlb <> tlb
42    this.sfence <> sfence
43    this.csr <> csr
44  }
45
46}
47
48class PTWRepeater(Width: Int = 1, FenceDelay: Int)(implicit p: Parameters) extends XSModule with HasPtwConst {
49  val io = IO(new PTWReapterIO(Width))
50
51  val req_in = if (Width == 1) {
52    io.tlb.req(0)
53  } else {
54    val arb = Module(new RRArbiter(io.tlb.req(0).bits.cloneType, Width))
55    arb.io.in <> io.tlb.req
56    arb.io.out
57  }
58  val (tlb, ptw, flush) = (io.tlb, io.ptw, DelayN(io.sfence.valid || io.csr.satp.changed || io.csr.vsatp.changed || io.csr.hgatp.changed, FenceDelay))
59  val req = RegEnable(req_in.bits, req_in.fire)
60  val resp = RegEnable(ptw.resp.bits, ptw.resp.fire)
61  val haveOne = BoolStopWatch(req_in.fire, tlb.resp.fire || flush)
62  val sent = BoolStopWatch(ptw.req(0).fire, req_in.fire || flush)
63  val recv = BoolStopWatch(ptw.resp.fire && haveOne, req_in.fire || flush)
64
65  req_in.ready := !haveOne
66  ptw.req(0).valid := haveOne && !sent
67  ptw.req(0).bits := req
68
69  tlb.resp.bits := resp
70  tlb.resp.valid := haveOne && recv
71  ptw.resp.ready := !recv
72
73  XSPerfAccumulate("req_count", ptw.req(0).fire)
74  XSPerfAccumulate("tlb_req_cycle", BoolStopWatch(req_in.fire, tlb.resp.fire || flush))
75  XSPerfAccumulate("ptw_req_cycle", BoolStopWatch(ptw.req(0).fire, ptw.resp.fire || flush))
76
77  XSDebug(haveOne, p"haveOne:${haveOne} sent:${sent} recv:${recv} sfence:${flush} req:${req} resp:${resp}")
78  XSDebug(req_in.valid || io.tlb.resp.valid, p"tlb: ${tlb}\n")
79  XSDebug(io.ptw.req(0).valid || io.ptw.resp.valid, p"ptw: ${ptw}\n")
80  assert(!RegNext(recv && io.ptw.resp.valid, init = false.B), "re-receive ptw.resp")
81  XSError(io.ptw.req(0).valid && io.ptw.resp.valid && !flush, "ptw repeater recv resp when sending")
82  XSError(io.ptw.resp.valid && (req.vpn =/= io.ptw.resp.bits.s1.entry.tag), "ptw repeater recv resp with wrong tag")
83  XSError(io.ptw.resp.valid && !io.ptw.resp.ready, "ptw repeater's ptw resp back, but not ready")
84  TimeOutAssert(sent && !recv, timeOutThreshold, "Repeater doesn't recv resp in time")
85}
86
87/* dtlb
88 *
89 */
90
91class PTWRepeaterNB(Width: Int = 1, passReady: Boolean = false, FenceDelay: Int)(implicit p: Parameters) extends XSModule with HasPtwConst {
92  val io = IO(new PTWReapterIO(Width))
93
94  val req_in = if (Width == 1) {
95    io.tlb.req(0)
96  } else {
97    val arb = Module(new RRArbiter(io.tlb.req(0).bits.cloneType, Width))
98    arb.io.in <> io.tlb.req
99    arb.io.out
100  }
101  val (tlb, ptw, flush) = (io.tlb, io.ptw, DelayN(io.sfence.valid || io.csr.satp.changed || (io.csr.priv.virt && io.csr.vsatp.changed), FenceDelay))
102  /* sent: tlb -> repeater -> ptw
103   * recv: ptw -> repeater -> tlb
104   * different from PTWRepeater
105   */
106
107  // tlb -> repeater -> ptw
108  val req = RegEnable(req_in.bits, req_in.fire)
109  val sent = BoolStopWatch(req_in.fire, ptw.req(0).fire || flush)
110  req_in.ready := !sent || { if (passReady) ptw.req(0).ready else false.B }
111  ptw.req(0).valid := sent
112  ptw.req(0).bits := req
113
114  // ptw -> repeater -> tlb
115  val resp = RegEnable(ptw.resp.bits, ptw.resp.fire)
116  val recv = BoolStopWatch(ptw.resp.fire, tlb.resp.fire || flush)
117  ptw.resp.ready := !recv || { if (passReady) tlb.resp.ready else false.B }
118  tlb.resp.valid := recv
119  tlb.resp.bits := resp
120
121  XSPerfAccumulate("req", req_in.fire)
122  XSPerfAccumulate("resp", tlb.resp.fire)
123  if (!passReady) {
124    XSPerfAccumulate("req_blank", req_in.valid && sent && ptw.req(0).ready)
125    XSPerfAccumulate("resp_blank", ptw.resp.valid && recv && tlb.resp.ready)
126    XSPerfAccumulate("req_blank_ignore_ready", req_in.valid && sent)
127    XSPerfAccumulate("resp_blank_ignore_ready", ptw.resp.valid && recv)
128  }
129  XSDebug(req_in.valid || io.tlb.resp.valid, p"tlb: ${tlb}\n")
130  XSDebug(io.ptw.req(0).valid || io.ptw.resp.valid, p"ptw: ${ptw}\n")
131}
132
133class PTWFilterIO(Width: Int, hasHint: Boolean = false)(implicit p: Parameters) extends MMUIOBaseBundle {
134  val tlb = Flipped(new VectorTlbPtwIO(Width))
135  val ptw = new TlbPtwIO()
136  val hint = if (hasHint) Some(new TlbHintIO) else None
137  val rob_head_miss_in_tlb = Output(Bool())
138  val debugTopDown = new Bundle {
139    val robHeadVaddr = Flipped(Valid(UInt(VAddrBits.W)))
140  }
141
142  def apply(tlb: VectorTlbPtwIO, ptw: TlbPtwIO, sfence: SfenceBundle, csr: TlbCsrBundle): Unit = {
143    this.tlb <> tlb
144    this.ptw <> ptw
145    this.sfence <> sfence
146    this.csr <> csr
147  }
148
149  def apply(tlb: VectorTlbPtwIO, sfence: SfenceBundle, csr: TlbCsrBundle): Unit = {
150    this.tlb <> tlb
151    this.sfence <> sfence
152    this.csr <> csr
153  }
154
155}
156
157class PTWFilterEntryIO(Width: Int, hasHint: Boolean = false)(implicit p: Parameters) extends PTWFilterIO(Width, hasHint){
158  val flush = Input(Bool())
159  val refill = Output(Bool())
160  val getGpa = Output(Bool())
161  val memidx = Output(new MemBlockidxBundle)
162}
163
164class PTWFilterEntry(Width: Int, Size: Int, hasHint: Boolean = false)(implicit p: Parameters) extends XSModule with HasPtwConst {
165
166  val io = IO(new PTWFilterEntryIO(Width, hasHint))
167  require(isPow2(Size), s"Filter Size ($Size) must be a power of 2")
168
169  def firstValidIndex(v: Seq[Bool], valid: Bool): UInt = {
170    val index = WireInit(0.U(log2Up(Size).W))
171    for (i <- 0 until v.size) {
172      when (v(i) === valid) {
173        index := i.U
174      }
175    }
176    index
177  }
178
179  val v = RegInit(VecInit(Seq.fill(Size)(false.B)))
180  val sent = RegInit(VecInit(Seq.fill(Size)(false.B)))
181  val vpn = Reg(Vec(Size, UInt(vpnLen.W)))
182  val s2xlate = Reg(Vec(Size, UInt(2.W)))
183  val getGpa = Reg(Vec(Size, Bool()))
184  val memidx = Reg(Vec(Size, new MemBlockidxBundle))
185
186  val enqvalid = WireInit(VecInit(Seq.fill(Width)(false.B)))
187  val canenq = WireInit(VecInit(Seq.fill(Width)(false.B)))
188  val enqidx = WireInit(VecInit(Seq.fill(Width)(0.U(log2Up(Size).W))))
189
190  //val selectCount = RegInit(0.U(log2Up(Width).W))
191
192  val entryIsMatchVec = WireInit(VecInit(Seq.fill(Width)(false.B)))
193  val entryMatchIndexVec = WireInit(VecInit(Seq.fill(Width)(0.U(log2Up(Size).W))))
194  val ptwResp_EntryMatchVec = vpn.zip(v).zip(s2xlate).map{ case ((pi, vi), s2xlatei) => vi && s2xlatei === io.ptw.resp.bits.s2xlate && io.ptw.resp.bits.hit(pi, io.csr.satp.asid, io.csr.vsatp.asid, io.csr.hgatp.asid, true, true)}
195  val ptwResp_EntryMatchFirst = firstValidIndex(ptwResp_EntryMatchVec, true.B)
196  val ptwResp_ReqMatchVec = io.tlb.req.map(a => io.ptw.resp.valid && a.bits.s2xlate === io.ptw.resp.bits.s2xlate && io.ptw.resp.bits.hit(a.bits.vpn, 0.U, 0.U, io.csr.hgatp.asid, allType = true, true))
197
198  io.refill := Cat(ptwResp_EntryMatchVec).orR && io.ptw.resp.fire
199  io.ptw.resp.ready := true.B
200  // DontCare
201  io.tlb.req.map(_.ready := true.B)
202  io.tlb.resp.valid := false.B
203  io.tlb.resp.bits.data := 0.U.asTypeOf(new PtwRespS2withMemIdx)
204  io.tlb.resp.bits.vector := 0.U.asTypeOf(Vec(Width, Bool()))
205  io.tlb.resp.bits.getGpa := 0.U.asTypeOf(Vec(Width, Bool()))
206  io.memidx := 0.U.asTypeOf(new MemBlockidxBundle)
207  io.getGpa := 0.U
208
209  // ugly code, should be optimized later
210  if (Enable3Load3Store) {
211    require(Width <= 4, s"DTLB Filter Width ($Width) must equal or less than 4")
212    if (Width == 1) {
213      require(Size == 8, s"prefetch filter Size ($Size) should be 8")
214      canenq(0) := !(Cat(v).andR)
215      enqidx(0) := firstValidIndex(v, false.B)
216    } else if (Width == 3) {
217      require(Size == 8, s"store filter Size ($Size) should be 8")
218      canenq(0) := !(Cat(v.take(3)).andR)
219      enqidx(0) := firstValidIndex(v.take(3), false.B)
220      canenq(1) := !(Cat(v.drop(3).take(3)).andR)
221      enqidx(1) := firstValidIndex(v.drop(3).take(3), false.B) + 3.U
222      canenq(2) := !(Cat(v.drop(6).take(2)).andR)
223      enqidx(2) := firstValidIndex(v.drop(6).take(2), false.B) + 6.U
224    } else if (Width == 4) {
225      require(Size == 16, s"load filter Size ($Size) should be 16")
226      canenq(0) := !(Cat(v.take(4)).andR)
227      enqidx(0) := firstValidIndex(v.take(4), false.B)
228      canenq(1) := !(Cat(v.drop(4).take(4)).andR)
229      enqidx(1) := firstValidIndex(v.drop(4).take(4), false.B) + 4.U
230      canenq(2) := !(Cat(v.drop(8).take(4)).andR)
231      enqidx(2) := firstValidIndex(v.drop(8).take(4), false.B) + 8.U
232      canenq(3) := !(Cat(v.drop(12).take(4)).andR)
233      enqidx(3) := firstValidIndex(v.drop(12).take(4), false.B) + 12.U
234    }
235  } else {
236    require(Width <= 3, s"DTLB Filter Width ($Width) must equal or less than 3")
237    if (Width == 1) {
238      require(Size == 8, s"prefetch filter Size ($Size) should be 8")
239      canenq(0) := !(Cat(v).andR)
240      enqidx(0) := firstValidIndex(v, false.B)
241    } else if (Width == 2) {
242      require(Size == 8, s"store filter Size ($Size) should be 8")
243      canenq(0) := !(Cat(v.take(Size/2)).andR)
244      enqidx(0) := firstValidIndex(v.take(Size/2), false.B)
245      canenq(1) := !(Cat(v.drop(Size/2)).andR)
246      enqidx(1) := firstValidIndex(v.drop(Size/2), false.B) + (Size/2).U
247    } else if (Width == 3) {
248      require(Size == 16, s"load filter Size ($Size) should be 16")
249      canenq(0) := !(Cat(v.take(8)).andR)
250      enqidx(0) := firstValidIndex(v.take(8), false.B)
251      canenq(1) := !(Cat(v.drop(8).take(4)).andR)
252      enqidx(1) := firstValidIndex(v.drop(8).take(4), false.B) + 8.U
253      // four entries for prefetch
254      canenq(2) := !(Cat(v.drop(12)).andR)
255      enqidx(2) := firstValidIndex(v.drop(12), false.B) + 12.U
256    }
257  }
258
259
260  for (i <- 0 until Width) {
261    enqvalid(i) := io.tlb.req(i).valid && !ptwResp_ReqMatchVec(i) && !entryIsMatchVec(i) && canenq(i)
262    when (!enqvalid(i)) {
263      enqidx(i) := entryMatchIndexVec(i)
264    }
265
266    val entryIsMatch = vpn.zip(v).zip(s2xlate).map{ case ((pi, vi), s2xlatei) => vi && s2xlatei === io.tlb.req(i).bits.s2xlate && pi === io.tlb.req(i).bits.vpn}
267    entryIsMatchVec(i) := Cat(entryIsMatch).orR
268    entryMatchIndexVec(i) := firstValidIndex(entryIsMatch, true.B)
269
270    if (i > 0) {
271      for (j <- 0 until i) {
272        val newIsMatch = io.tlb.req(i).bits.vpn === io.tlb.req(j).bits.vpn && io.tlb.req(i).bits.s2xlate === io.tlb.req(j).bits.s2xlate
273        when (newIsMatch && io.tlb.req(j).valid) {
274          enqidx(i) := enqidx(j)
275          canenq(i) := canenq(j)
276          enqvalid(i) := false.B
277        }
278      }
279    }
280
281    when (enqvalid(i)) {
282      v(enqidx(i)) := true.B
283      sent(enqidx(i)) := false.B
284      vpn(enqidx(i)) := io.tlb.req(i).bits.vpn
285      s2xlate(enqidx(i)) := io.tlb.req(i).bits.s2xlate
286      getGpa(enqidx(i)) := io.tlb.req(i).bits.getGpa
287      memidx(enqidx(i)) := io.tlb.req(i).bits.memidx
288    }
289  }
290
291  val issuevec = v.zip(sent).map{ case (v, s) => v && !s}
292  val issueindex = firstValidIndex(issuevec, true.B)
293  val canissue = Cat(issuevec).orR
294  for (i <- 0 until Size) {
295    io.ptw.req(0).valid := canissue
296    io.ptw.req(0).bits.vpn := vpn(issueindex)
297    io.ptw.req(0).bits.s2xlate := s2xlate(issueindex)
298  }
299  when (io.ptw.req(0).fire) {
300    sent(issueindex) := true.B
301  }
302
303  when (io.ptw.resp.fire) {
304    v.zip(ptwResp_EntryMatchVec).map{ case (vi, mi) => when (mi) { vi := false.B }}
305    io.memidx := memidx(ptwResp_EntryMatchFirst)
306    io.getGpa := getGpa(ptwResp_EntryMatchFirst)
307  }
308
309  when (io.flush) {
310    v.map(_ := false.B)
311  }
312
313  if (hasHint) {
314    val hintIO = io.hint.getOrElse(new TlbHintIO)
315    for (i <- 0 until exuParameters.LduCnt) {
316      hintIO.req(i).id := enqidx(i)
317      hintIO.req(i).full := !canenq(i) || ptwResp_ReqMatchVec(i)
318    }
319    hintIO.resp.valid := io.refill
320    hintIO.resp.bits.id := ptwResp_EntryMatchFirst
321    hintIO.resp.bits.replay_all := PopCount(ptwResp_EntryMatchVec) > 1.U
322  }
323
324  io.rob_head_miss_in_tlb := VecInit(v.zip(vpn).map{case (vi, vpni) => {
325    vi && io.debugTopDown.robHeadVaddr.valid && vpni === get_pn(io.debugTopDown.robHeadVaddr.bits)
326  }}).asUInt.orR
327
328
329  // Perf Counter
330  val counter = PopCount(v)
331  val inflight_counter = RegInit(0.U(log2Up(Size).W))
332  val inflight_full = inflight_counter === Size.U
333  when (io.ptw.req(0).fire =/= io.ptw.resp.fire) {
334    inflight_counter := Mux(io.ptw.req(0).fire, inflight_counter + 1.U, inflight_counter - 1.U)
335  }
336
337  assert(inflight_counter <= Size.U, "inflight should be no more than Size")
338  when (counter === 0.U) {
339    assert(!io.ptw.req(0).fire, "when counter is 0, should not req")
340  }
341
342  when (io.flush) {
343    inflight_counter := 0.U
344  }
345
346  XSPerfAccumulate("tlb_req_count", PopCount(Cat(io.tlb.req.map(_.valid))))
347  XSPerfAccumulate("tlb_req_count_filtered", PopCount(enqvalid))
348  XSPerfAccumulate("ptw_req_count", io.ptw.req(0).fire)
349  XSPerfAccumulate("ptw_req_cycle", inflight_counter)
350  XSPerfAccumulate("tlb_resp_count", io.tlb.resp.fire)
351  XSPerfAccumulate("ptw_resp_count", io.ptw.resp.fire)
352  XSPerfAccumulate("inflight_cycle", Cat(sent).orR)
353
354  for (i <- 0 until Size + 1) {
355    XSPerfAccumulate(s"counter${i}", counter === i.U)
356  }
357
358  for (i <- 0 until Size) {
359    TimeOutAssert(v(i), timeOutThreshold, s"Filter ${i} doesn't recv resp in time")
360  }
361
362}
363
364class PTWNewFilter(Width: Int, Size: Int, FenceDelay: Int)(implicit p: Parameters) extends XSModule with HasPtwConst {
365  require(Size >= Width)
366
367  val io = IO(new PTWFilterIO(Width, hasHint = true))
368
369  val load_filter = VecInit(Seq.fill(1) {
370    val load_entry = Module(new PTWFilterEntry(Width = exuParameters.LduCnt + 1, Size = loadfiltersize, hasHint = true))
371    load_entry.io
372  })
373
374  val store_filter = VecInit(Seq.fill(1) {
375    val store_entry = Module(new PTWFilterEntry(Width = exuParameters.StuCnt, Size = storefiltersize))
376    store_entry.io
377  })
378
379  val prefetch_filter = VecInit(Seq.fill(1) {
380    val prefetch_entry = Module(new PTWFilterEntry(Width = 1, Size = prefetchfiltersize))
381    prefetch_entry.io
382  })
383
384  val filter = load_filter ++ store_filter ++ prefetch_filter
385
386  load_filter.map(_.tlb.req := io.tlb.req.take(exuParameters.LduCnt + 1))
387  store_filter.map(_.tlb.req := io.tlb.req.drop(exuParameters.LduCnt + 1).take(exuParameters.StuCnt))
388  prefetch_filter.map(_.tlb.req := io.tlb.req.drop(exuParameters.LduCnt + 1 + exuParameters.StuCnt))
389
390  val flush = DelayN(io.sfence.valid || io.csr.satp.changed || (io.csr.priv.virt && io.csr.vsatp.changed), FenceDelay)
391  val ptwResp = RegEnable(io.ptw.resp.bits, io.ptw.resp.fire)
392  val ptwResp_valid = Cat(filter.map(_.refill)).orR
393  filter.map(_.tlb.resp.ready := true.B)
394  filter.map(_.ptw.resp.valid := RegNext(io.ptw.resp.fire, init = false.B))
395  filter.map(_.ptw.resp.bits := ptwResp)
396  filter.map(_.flush := flush)
397  filter.map(_.sfence := io.sfence)
398  filter.map(_.csr := io.csr)
399  filter.map(_.debugTopDown.robHeadVaddr := io.debugTopDown.robHeadVaddr)
400
401  io.tlb.req.map(_.ready := true.B)
402  io.tlb.resp.valid := ptwResp_valid
403  io.tlb.resp.bits.data.s2xlate := ptwResp.s2xlate
404  io.tlb.resp.bits.data.getGpa := DontCare // not used
405  io.tlb.resp.bits.data.s1 := ptwResp.s1
406  io.tlb.resp.bits.data.s2 := ptwResp.s2
407  io.tlb.resp.bits.data.memidx := 0.U.asTypeOf(new MemBlockidxBundle)
408  // vector used to represent different requestors of DTLB
409  // (e.g. the store DTLB has StuCnt requestors)
410  // However, it is only necessary to distinguish between different DTLB now
411  for (i <- 0 until Width) {
412    io.tlb.resp.bits.vector(i) := false.B
413    io.tlb.resp.bits.getGpa(i) := false.B
414  }
415  io.tlb.resp.bits.vector(0) := load_filter(0).refill
416  io.tlb.resp.bits.vector(exuParameters.LduCnt + 1) := store_filter(0).refill
417  io.tlb.resp.bits.vector(exuParameters.LduCnt + 1 + exuParameters.StuCnt) := prefetch_filter(0).refill
418  io.tlb.resp.bits.getGpa(0) := load_filter(0).getGpa
419  io.tlb.resp.bits.getGpa(exuParameters.LduCnt + 1) := store_filter(0).getGpa
420  io.tlb.resp.bits.getGpa(exuParameters.LduCnt + 1 + exuParameters.StuCnt) := prefetch_filter(0).getGpa
421
422  val hintIO = io.hint.getOrElse(new TlbHintIO)
423  val load_hintIO = load_filter(0).hint.getOrElse(new TlbHintIO)
424  for (i <- 0 until exuParameters.LduCnt) {
425    hintIO.req(i) := RegNext(load_hintIO.req(i))
426  }
427  hintIO.resp := RegNext(load_hintIO.resp)
428
429  when (load_filter(0).refill) {
430    io.tlb.resp.bits.vector(0) := true.B
431    io.tlb.resp.bits.data.memidx := load_filter(0).memidx
432  }
433  when (store_filter(0).refill) {
434    io.tlb.resp.bits.vector(exuParameters.LduCnt + 1) := true.B
435    io.tlb.resp.bits.data.memidx := store_filter(0).memidx
436  }
437  when (prefetch_filter(0).refill) {
438    io.tlb.resp.bits.vector(exuParameters.LduCnt + 1 + exuParameters.StuCnt) := true.B
439    io.tlb.resp.bits.data.memidx := 0.U.asTypeOf(new MemBlockidxBundle)
440  }
441
442  val ptw_arb = Module(new RRArbiterInit(new PtwReq, 3))
443  for (i <- 0 until 3) {
444    ptw_arb.io.in(i).valid := filter(i).ptw.req(0).valid
445    ptw_arb.io.in(i).bits.vpn := filter(i).ptw.req(0).bits.vpn
446    ptw_arb.io.in(i).bits.s2xlate := filter(i).ptw.req(0).bits.s2xlate
447    filter(i).ptw.req(0).ready := ptw_arb.io.in(i).ready
448  }
449  ptw_arb.io.out.ready := io.ptw.req(0).ready
450  io.ptw.req(0).valid := ptw_arb.io.out.valid
451  io.ptw.req(0).bits.vpn := ptw_arb.io.out.bits.vpn
452  io.ptw.req(0).bits.s2xlate := ptw_arb.io.out.bits.s2xlate
453  io.ptw.resp.ready := true.B
454
455  io.rob_head_miss_in_tlb := Cat(filter.map(_.rob_head_miss_in_tlb)).orR
456}
457
458class PTWFilter(Width: Int, Size: Int, FenceDelay: Int)(implicit p: Parameters) extends XSModule with HasPtwConst {
459  require(Size >= Width)
460
461  val io = IO(new PTWFilterIO(Width))
462
463  val v = RegInit(VecInit(Seq.fill(Size)(false.B)))
464  val ports = Reg(Vec(Size, Vec(Width, Bool()))) // record which port(s) the entry come from, may not able to cover all the ports
465  val vpn = Reg(Vec(Size, UInt(vpnLen.W)))
466  val s2xlate = Reg(Vec(Size, UInt(2.W)))
467  val getGpa = Reg(Vec(Size, Bool()))
468  val memidx = Reg(Vec(Size, new MemBlockidxBundle))
469  val enqPtr = RegInit(0.U(log2Up(Size).W)) // Enq
470  val issPtr = RegInit(0.U(log2Up(Size).W)) // Iss to Ptw
471  val deqPtr = RegInit(0.U(log2Up(Size).W)) // Deq
472  val mayFullDeq = RegInit(false.B)
473  val mayFullIss = RegInit(false.B)
474  val counter = RegInit(0.U(log2Up(Size+1).W))
475  val flush = DelayN(io.sfence.valid || io.csr.satp.changed || (io.csr.priv.virt && io.csr.vsatp.changed), FenceDelay)
476  val tlb_req = WireInit(io.tlb.req) // NOTE: tlb_req is not io.tlb.req, see below codes, just use cloneType
477  tlb_req.suggestName("tlb_req")
478
479  val inflight_counter = RegInit(0.U(log2Up(Size + 1).W))
480  val inflight_full = inflight_counter === Size.U
481
482  def ptwResp_hit(vpn: UInt, s2xlate: UInt, resp: PtwRespS2): Bool = {
483    val enableS2xlate = resp.s2xlate =/= noS2xlate
484    val onlyS2 = resp.s2xlate === onlyStage2
485    val s1hit = resp.s1.hit(vpn, 0.U, io.csr.hgatp.asid, true, true, enableS2xlate)
486    val s2hit = resp.s2.hit(vpn, io.csr.hgatp.asid)
487    s2xlate === resp.s2xlate && Mux(enableS2xlate && onlyS2, s2hit, s1hit)
488  }
489
490  when (io.ptw.req(0).fire =/= io.ptw.resp.fire) {
491    inflight_counter := Mux(io.ptw.req(0).fire, inflight_counter + 1.U, inflight_counter - 1.U)
492  }
493
494  val canEnqueue = Wire(Bool()) // NOTE: actually enqueue
495  val ptwResp = RegEnable(io.ptw.resp.bits, io.ptw.resp.fire)
496  val ptwResp_OldMatchVec = vpn.zip(v).zip(s2xlate).map { case (((vpn, v), s2xlate)) =>{
497    v && ptwResp_hit(vpn, s2xlate, io.ptw.resp.bits)
498  }
499  }
500  val ptwResp_valid = RegNext(io.ptw.resp.fire && Cat(ptwResp_OldMatchVec).orR, init = false.B)
501  // May send repeated requests to L2 tlb with same vpn(26, 3) when sector tlb
502  val oldMatchVec_early = io.tlb.req.map(a => vpn.zip(v).zip(s2xlate).map{ case ((pi, vi), s2xlate) => vi && pi === a.bits.vpn && s2xlate === a.bits.s2xlate })
503  val lastReqMatchVec_early = io.tlb.req.map(a => tlb_req.map{ b => b.valid && b.bits.vpn === a.bits.vpn && canEnqueue && b.bits.s2xlate === a.bits.s2xlate})
504  val newMatchVec_early = io.tlb.req.map(a => io.tlb.req.map(b => a.bits.vpn === b.bits.vpn && a.bits.s2xlate === b.bits.s2xlate))
505
506  (0 until Width) foreach { i =>
507    tlb_req(i).valid := RegNext(io.tlb.req(i).valid &&
508      !(ptwResp_valid && ptwResp_hit(io.tlb.req(i).bits.vpn, io.tlb.req(i).bits.s2xlate, ptwResp)) &&
509      !Cat(lastReqMatchVec_early(i)).orR,
510      init = false.B)
511    tlb_req(i).bits := RegEnable(io.tlb.req(i).bits, io.tlb.req(i).valid)
512  }
513
514  val oldMatchVec = oldMatchVec_early.map(a => RegNext(Cat(a).orR))
515  val newMatchVec = (0 until Width).map(i => (0 until Width).map(j =>
516    RegNext(newMatchVec_early(i)(j)) && tlb_req(j).valid
517  ))
518  val ptwResp_newMatchVec = tlb_req.map(a =>
519    ptwResp_valid && ptwResp_hit(a.bits.vpn, a.bits.s2xlate, ptwResp))
520
521  val oldMatchVec2 = (0 until Width).map(i => oldMatchVec_early(i).map(RegNext(_)).map(_ & tlb_req(i).valid))
522  val update_ports = v.indices.map(i => oldMatchVec2.map(j => j(i)))
523  val ports_init = (0 until Width).map(i => (1 << i).U(Width.W))
524  val filter_ports = (0 until Width).map(i => ParallelMux(newMatchVec(i).zip(ports_init).drop(i)))
525  val resp_vector = RegEnable(ParallelMux(ptwResp_OldMatchVec zip ports), io.ptw.resp.fire)
526  val resp_getGpa = RegEnable(ParallelMux(ptwResp_OldMatchVec zip getGpa), io.ptw.resp.fire)
527
528  def canMerge(index: Int) : Bool = {
529    ptwResp_newMatchVec(index) || oldMatchVec(index) ||
530    Cat(newMatchVec(index).take(index)).orR
531  }
532
533  def filter_req() = {
534    val reqs =  tlb_req.indices.map{ i =>
535      val req = Wire(ValidIO(new PtwReqwithMemIdx()))
536      val merge = canMerge(i)
537      req.bits := tlb_req(i).bits
538      req.valid := !merge && tlb_req(i).valid
539      req
540    }
541    reqs
542  }
543
544  val reqs = filter_req()
545  val req_ports = filter_ports
546  val isFull = enqPtr === deqPtr && mayFullDeq
547  val isEmptyDeq = enqPtr === deqPtr && !mayFullDeq
548  val isEmptyIss = enqPtr === issPtr && !mayFullIss
549  val accumEnqNum = (0 until Width).map(i => PopCount(reqs.take(i).map(_.valid)))
550  val enqPtrVecInit = VecInit((0 until Width).map(i => enqPtr + i.U))
551  val enqPtrVec = VecInit((0 until Width).map(i => enqPtrVecInit(accumEnqNum(i))))
552  val enqNum = PopCount(reqs.map(_.valid))
553  canEnqueue := counter +& enqNum <= Size.U
554
555  // the req may recv false ready, but actually received. Filter and TLB will handle it.
556  val enqNum_fake = PopCount(io.tlb.req.map(_.valid))
557  val canEnqueue_fake = counter +& enqNum_fake <= Size.U
558  io.tlb.req.map(_.ready := canEnqueue_fake) // NOTE: just drop un-fire reqs
559
560  // tlb req flushed by ptw resp: last ptw resp && current ptw resp
561  // the flushed tlb req will fakely enq, with a false valid
562  val tlb_req_flushed = reqs.map(a => io.ptw.resp.valid && ptwResp_hit(a.bits.vpn, a.bits.s2xlate, io.ptw.resp.bits))
563
564  io.tlb.resp.valid := ptwResp_valid
565  io.tlb.resp.bits.data.s2xlate := ptwResp.s2xlate
566  io.tlb.resp.bits.data.s1 := ptwResp.s1
567  io.tlb.resp.bits.data.s2 := ptwResp.s2
568  io.tlb.resp.bits.data.memidx := memidx(OHToUInt(ptwResp_OldMatchVec))
569  io.tlb.resp.bits.vector := resp_vector
570  io.tlb.resp.bits.data.getGpa := getGpa(OHToUInt(ptwResp_OldMatchVec))
571  io.tlb.resp.bits.getGpa := DontCare
572
573  val issue_valid = v(issPtr) && !isEmptyIss && !inflight_full
574  val issue_filtered = ptwResp_valid && ptwResp_hit(io.ptw.req(0).bits.vpn, io.ptw.req(0).bits.s2xlate, ptwResp)
575  val issue_fire_fake = issue_valid && (io.ptw.req(0).ready || (issue_filtered && false.B /*timing-opt*/))
576  io.ptw.req(0).valid := issue_valid && !issue_filtered
577  io.ptw.req(0).bits.vpn := vpn(issPtr)
578  io.ptw.req(0).bits.s2xlate := s2xlate(issPtr)
579  io.ptw.resp.ready := true.B
580
581  reqs.zipWithIndex.map{
582    case (req, i) =>
583      when (req.valid && canEnqueue) {
584        v(enqPtrVec(i)) := !tlb_req_flushed(i)
585        vpn(enqPtrVec(i)) := req.bits.vpn
586        s2xlate(enqPtrVec(i)) := req.bits.s2xlate
587        getGpa(enqPtrVec(i)) := req.bits.getGpa
588        memidx(enqPtrVec(i)) := req.bits.memidx
589        ports(enqPtrVec(i)) := req_ports(i).asBools
590      }
591  }
592  for (i <- ports.indices) {
593    when (v(i)) {
594      ports(i) := ports(i).zip(update_ports(i)).map(a => a._1 || a._2)
595    }
596  }
597
598  val do_enq = canEnqueue && Cat(reqs.map(_.valid)).orR
599  val do_deq = (!v(deqPtr) && !isEmptyDeq)
600  val do_iss = issue_fire_fake || (!v(issPtr) && !isEmptyIss)
601  when (do_enq) {
602    enqPtr := enqPtr + enqNum
603  }
604  when (do_deq) {
605    deqPtr := deqPtr + 1.U
606  }
607  when (do_iss) {
608    issPtr := issPtr + 1.U
609  }
610  when (issue_fire_fake && issue_filtered) { // issued but is filtered
611    v(issPtr) := false.B
612  }
613  when (do_enq =/= do_deq) {
614    mayFullDeq := do_enq
615  }
616  when (do_enq =/= do_iss) {
617    mayFullIss := do_enq
618  }
619
620  when (io.ptw.resp.fire) {
621    v.zip(ptwResp_OldMatchVec).map{ case (vi, mi) => when (mi) { vi := false.B }}
622  }
623
624  counter := counter - do_deq + Mux(do_enq, enqNum, 0.U)
625  assert(counter <= Size.U, "counter should be no more than Size")
626  assert(inflight_counter <= Size.U, "inflight should be no more than Size")
627  when (counter === 0.U) {
628    assert(!io.ptw.req(0).fire, "when counter is 0, should not req")
629    assert(isEmptyDeq && isEmptyIss, "when counter is 0, should be empty")
630  }
631  when (counter === Size.U) {
632    assert(mayFullDeq, "when counter is Size, should be full")
633  }
634
635  when (flush) {
636    v.map(_ := false.B)
637    deqPtr := 0.U
638    enqPtr := 0.U
639    issPtr := 0.U
640    ptwResp_valid := false.B
641    mayFullDeq := false.B
642    mayFullIss := false.B
643    counter := 0.U
644    inflight_counter := 0.U
645  }
646
647  val robHeadVaddr = io.debugTopDown.robHeadVaddr
648  io.rob_head_miss_in_tlb := VecInit(v.zip(vpn).map{case (vi, vpni) => {
649    vi && robHeadVaddr.valid && vpni === get_pn(robHeadVaddr.bits)
650  }}).asUInt.orR
651
652  // perf
653  XSPerfAccumulate("tlb_req_count", PopCount(Cat(io.tlb.req.map(_.valid))))
654  XSPerfAccumulate("tlb_req_count_filtered", Mux(do_enq, accumEnqNum(Width - 1), 0.U))
655  XSPerfAccumulate("ptw_req_count", io.ptw.req(0).fire)
656  XSPerfAccumulate("ptw_req_cycle", inflight_counter)
657  XSPerfAccumulate("tlb_resp_count", io.tlb.resp.fire)
658  XSPerfAccumulate("ptw_resp_count", io.ptw.resp.fire)
659  XSPerfAccumulate("inflight_cycle", !isEmptyDeq)
660  for (i <- 0 until Size + 1) {
661    XSPerfAccumulate(s"counter${i}", counter === i.U)
662  }
663
664  for (i <- 0 until Size) {
665    TimeOutAssert(v(i), timeOutThreshold, s"Filter ${i} doesn't recv resp in time")
666  }
667}
668
669object PTWRepeater {
670  def apply(fenceDelay: Int,
671    tlb: TlbPtwIO,
672    sfence: SfenceBundle,
673    csr: TlbCsrBundle
674  )(implicit p: Parameters) = {
675    val width = tlb.req.size
676    val repeater = Module(new PTWRepeater(width, fenceDelay))
677    repeater.io.apply(tlb, sfence, csr)
678    repeater
679  }
680
681  def apply(fenceDelay: Int,
682    tlb: TlbPtwIO,
683    ptw: TlbPtwIO,
684    sfence: SfenceBundle,
685    csr: TlbCsrBundle
686  )(implicit p: Parameters) = {
687    val width = tlb.req.size
688    val repeater = Module(new PTWRepeater(width, fenceDelay))
689    repeater.io.apply(tlb, ptw, sfence, csr)
690    repeater
691  }
692}
693
694object PTWRepeaterNB {
695  def apply(passReady: Boolean, fenceDelay: Int,
696    tlb: TlbPtwIO,
697    sfence: SfenceBundle,
698    csr: TlbCsrBundle
699  )(implicit p: Parameters) = {
700    val width = tlb.req.size
701    val repeater = Module(new PTWRepeaterNB(width, passReady,fenceDelay))
702    repeater.io.apply(tlb, sfence, csr)
703    repeater
704  }
705
706  def apply(passReady: Boolean, fenceDelay: Int,
707    tlb: TlbPtwIO,
708    ptw: TlbPtwIO,
709    sfence: SfenceBundle,
710    csr: TlbCsrBundle
711  )(implicit p: Parameters) = {
712    val width = tlb.req.size
713    val repeater = Module(new PTWRepeaterNB(width, passReady, fenceDelay))
714    repeater.io.apply(tlb, ptw, sfence, csr)
715    repeater
716  }
717}
718
719object PTWFilter {
720  def apply(fenceDelay: Int,
721    tlb: VectorTlbPtwIO,
722    ptw: TlbPtwIO,
723    sfence: SfenceBundle,
724    csr: TlbCsrBundle,
725    size: Int
726  )(implicit p: Parameters) = {
727    val width = tlb.req.size
728    val filter = Module(new PTWFilter(width, size, fenceDelay))
729    filter.io.apply(tlb, ptw, sfence, csr)
730    filter
731  }
732
733  def apply(fenceDelay: Int,
734    tlb: VectorTlbPtwIO,
735    sfence: SfenceBundle,
736    csr: TlbCsrBundle,
737    size: Int
738  )(implicit p: Parameters) = {
739    val width = tlb.req.size
740    val filter = Module(new PTWFilter(width, size, fenceDelay))
741    filter.io.apply(tlb, sfence, csr)
742    filter
743  }
744}
745
746object PTWNewFilter {
747  def apply(fenceDelay: Int,
748            tlb: VectorTlbPtwIO,
749            ptw: TlbPtwIO,
750            sfence: SfenceBundle,
751            csr: TlbCsrBundle,
752            size: Int
753           )(implicit p: Parameters) = {
754    val width = tlb.req.size
755    val filter = Module(new PTWNewFilter(width, size, fenceDelay))
756    filter.io.apply(tlb, ptw, sfence, csr)
757    filter
758  }
759
760  def apply(fenceDelay: Int,
761            tlb: VectorTlbPtwIO,
762            sfence: SfenceBundle,
763            csr: TlbCsrBundle,
764            size: Int
765           )(implicit p: Parameters) = {
766    val width = tlb.req.size
767    val filter = Module(new PTWNewFilter(width, size, fenceDelay))
768    filter.io.apply(tlb, sfence, csr)
769    filter
770  }
771}
772