xref: /XiangShan/src/main/scala/xiangshan/cache/mmu/PageTableWalker.scala (revision 382a2ebdf328e8147e67aad81c929b5587bdfda4)
1/***************************************************************************************
2* Copyright (c) 2020-2021 Institute of Computing Technology, Chinese Academy of Sciences
3* Copyright (c) 2020-2021 Peng Cheng Laboratory
4*
5* XiangShan is licensed under Mulan PSL v2.
6* You can use this software according to the terms and conditions of the Mulan PSL v2.
7* You may obtain a copy of Mulan PSL v2 at:
8*          http://license.coscl.org.cn/MulanPSL2
9*
10* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
11* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
12* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
13*
14* See the Mulan PSL v2 for more details.
15***************************************************************************************/
16
17package xiangshan.cache.mmu
18
19import org.chipsalliance.cde.config.Parameters
20import chisel3._
21import chisel3.util._
22import xiangshan._
23import xiangshan.cache.{HasDCacheParameters, MemoryOpConstants}
24import utils._
25import utility._
26import freechips.rocketchip.diplomacy.{LazyModule, LazyModuleImp}
27import freechips.rocketchip.tilelink._
28import xiangshan.backend.fu.{PMPReqBundle, PMPRespBundle}
29
30/** Page Table Walk is divided into two parts
31  * One,   PTW: page walk for pde, except for leaf entries, one by one
32  * Two, LLPTW: page walk for pte, only the leaf entries(4KB), in parallel
33  */
34
35
36/** PTW : page table walker
37  * a finite state machine
38  * only take 1GB and 2MB page walks
39  * or in other words, except the last level(leaf)
40  **/
41class PTWIO()(implicit p: Parameters) extends MMUIOBaseBundle with HasPtwConst {
42  val req = Flipped(DecoupledIO(new Bundle {
43    val req_info = new L2TlbInnerBundle()
44    val l1Hit = Bool()
45    val ppn = UInt(ppnLen.W)
46  }))
47  val resp = DecoupledIO(new Bundle {
48    val source = UInt(bSourceWidth.W)
49    val s2xlate = UInt(2.W)
50    val resp = new PtwMergeResp
51    val h_resp = new HptwResp
52  })
53
54  val llptw = DecoupledIO(new LLPTWInBundle())
55  // NOTE: llptw change from "connect to llptw" to "connect to page cache"
56  // to avoid corner case that caused duplicate entries
57
58  val hptw = new Bundle {
59    val req = DecoupledIO(new Bundle {
60      val source = UInt(bSourceWidth.W)
61      val id = UInt(log2Up(l2tlbParams.llptwsize).W)
62      val gvpn = UInt(vpnLen.W)
63    })
64    val resp = Flipped(Valid(new Bundle {
65      val h_resp = Output(new HptwResp)
66    }))
67  }
68  val mem = new Bundle {
69    val req = DecoupledIO(new L2TlbMemReqBundle())
70    val resp = Flipped(ValidIO(UInt(XLEN.W)))
71    val mask = Input(Bool())
72  }
73  val pmp = new Bundle {
74    val req = ValidIO(new PMPReqBundle())
75    val resp = Flipped(new PMPRespBundle())
76  }
77
78  val refill = Output(new Bundle {
79    val req_info = new L2TlbInnerBundle()
80    val level = UInt(log2Up(Level).W)
81  })
82}
83
84class PTW()(implicit p: Parameters) extends XSModule with HasPtwConst with HasPerfEvents {
85  val io = IO(new PTWIO)
86  val sfence = io.sfence
87  val mem = io.mem
88  val req_s2xlate = Reg(UInt(2.W))
89  val enableS2xlate = io.req.bits.req_info.s2xlate =/= noS2xlate
90  val onlyS1xlate = io.req.bits.req_info.s2xlate === onlyStage1
91  val onlyS2xlate = io.req.bits.req_info.s2xlate === onlyStage2
92
93  val satp = Mux(enableS2xlate, io.csr.vsatp, io.csr.satp)
94  val hgatp = io.csr.hgatp
95  val flush = io.sfence.valid || satp.changed
96  val s2xlate = enableS2xlate && !onlyS1xlate
97  val level = RegInit(0.U(log2Up(Level).W))
98  val af_level = RegInit(0.U(log2Up(Level).W)) // access fault return this level
99  val ppn = Reg(UInt(ppnLen.W))
100  val vpn = Reg(UInt(vpnLen.W)) // vpn or gvpn
101  val levelNext = level + 1.U
102  val l1Hit = Reg(Bool())
103  val pte = mem.resp.bits.asTypeOf(new PteBundle().cloneType)
104
105  // s/w register
106  val s_pmp_check = RegInit(true.B)
107  val s_mem_req = RegInit(true.B)
108  val s_llptw_req = RegInit(true.B)
109  val w_mem_resp = RegInit(true.B)
110  val s_hptw_req = RegInit(true.B)
111  val w_hptw_resp = RegInit(true.B)
112  val s_last_hptw_req = RegInit(true.B)
113  val w_last_hptw_resp = RegInit(true.B)
114  // for updating "level"
115  val mem_addr_update = RegInit(false.B)
116
117  val idle = RegInit(true.B)
118  val finish = WireInit(false.B)
119  val sent_to_pmp = idle === false.B && (s_pmp_check === false.B || mem_addr_update) && !finish
120
121  val pageFault = pte.isPf(level)
122  val accessFault = RegEnable(io.pmp.resp.ld || io.pmp.resp.mmio, sent_to_pmp)
123
124  val hptw_pageFault = RegInit(false.B)
125  val hptw_accessFault = RegInit(false.B)
126  val last_s2xlate = RegInit(false.B)
127
128  val ppn_af = pte.isAf()
129  val find_pte = pte.isLeaf() || ppn_af || pageFault
130  val to_find_pte = level === 1.U && find_pte === false.B
131  val source = RegEnable(io.req.bits.req_info.source, io.req.fire)
132
133  val l1addr = MakeAddr(satp.ppn, getVpnn(vpn, 2))
134  val l2addr = MakeAddr(Mux(l1Hit, ppn, pte.ppn), getVpnn(vpn, 1))
135  val mem_addr = Mux(af_level === 0.U, l1addr, l2addr)
136
137  val hptw_resp = io.hptw.resp.bits.h_resp
138  val gpaddr = Mux(onlyS2xlate, Cat(vpn, 0.U(offLen.W)), mem_addr)
139  val hpaddr = Cat(hptw_resp.entry.ppn, 0.U(offLen.W))
140
141  io.req.ready := idle
142
143  io.resp.valid := idle === false.B && mem_addr_update && !last_s2xlate && ((w_mem_resp && find_pte) || (s_pmp_check && accessFault) || onlyS2xlate)
144  io.resp.bits.source := source
145  io.resp.bits.resp.apply(pageFault && !accessFault && !ppn_af, accessFault || ppn_af, Mux(accessFault, af_level,level), pte, vpn, satp.asid, hgatp.asid, vpn(sectortlbwidth - 1, 0), not_super = false)
146  io.resp.bits.h_resp := io.hptw.resp.bits.h_resp
147  io.resp.bits.s2xlate := s2xlate
148
149  io.llptw.valid := s_llptw_req === false.B && to_find_pte && !accessFault
150  io.llptw.bits.req_info.source := source
151  io.llptw.bits.req_info.vpn := vpn
152  io.llptw.bits.req_info.s2xlate := req_s2xlate
153  io.llptw.bits.ppn := DontCare
154
155  io.pmp.req.valid := DontCare // samecycle, do not use valid
156  io.pmp.req.bits.addr := Mux(s2xlate, hpaddr, mem_addr)
157  io.pmp.req.bits.size := 3.U // TODO: fix it
158  io.pmp.req.bits.cmd := TlbCmd.read
159
160  mem.req.valid := s_mem_req === false.B && !mem.mask && !accessFault && s_pmp_check
161  mem.req.bits.addr := Mux(s2xlate, hpaddr, mem_addr)
162  mem.req.bits.id := FsmReqID.U(bMemID.W)
163
164  io.refill.req_info.s2xlate := req_s2xlate
165  io.refill.req_info.vpn := vpn
166  io.refill.level := level
167  io.refill.req_info.source := source
168
169  io.hptw.req.valid := !s_hptw_req || !s_last_hptw_req
170  io.hptw.req.bits.id := FsmReqID.U(bMemID.W)
171  io.hptw.req.bits.gvpn := get_pn(gpaddr)
172  io.hptw.req.bits.source := source
173
174  io.hptw.req.valid := !s_hptw_req || !s_last_hptw_req
175  io.hptw.req.bits.id := FsmReqID.U(bMemID.W)
176  io.hptw.req.bits.gvpn := gvpn
177
178  when (io.req.fire){
179    val req = io.req.bits
180    level := Mux(req.l1Hit, 1.U, 0.U)
181    af_level := Mux(req.l1Hit, 1.U, 0.U)
182    ppn := Mux(req.l1Hit, io.req.bits.ppn, satp.ppn)
183    vpn := io.req.bits.req_info.vpn
184    l1Hit := req.l1Hit
185    accessFault := false.B
186    s_pmp_check := false.B
187    idle := false.B
188    hptw_pageFault := false.B
189    req_s2xlate := io.req.bits.req_info.s2xlate
190    when(io.req.bits.req_info.s2xlate =/= noS2xlate && io.req.bits.req_info.s2xlate =/= onlyStage1){
191      last_s2xlate := true.B
192      s_hptw_req := false.B
193    }.otherwise {
194      s_pmp_check := false.B
195    }
196  }
197
198  when(io.hptw.req.fire() && s_hptw_req === false.B){
199    s_hptw_req := true.B
200    w_hptw_resp := false.B
201  }
202
203  when(io.hptw.resp.fire() && w_hptw_resp === false.B) {
204    hptw_pageFault := io.hptw.resp.bits.h_resp.gpf
205    hptw_accessFault := io.hptw.resp.bits.h_resp.gaf
206    w_hptw_resp := true.B
207    when(onlyS2xlate){
208      mem_addr_update := true.B
209      last_s2xlate := false.B
210    }.otherwise {
211      s_pmp_check := false.B
212    }
213  }
214
215  when(io.hptw.req.fire() && s_last_hptw_req === false.B) {
216    w_last_hptw_resp := false.B
217    s_last_hptw_req := true.B
218  }
219
220  when(io.hptw.resp.fire() && w_last_hptw_resp === false.B){
221    hptw_pageFault := io.hptw.resp.bits.h_resp.gpf
222    hptw_accessFault := io.hptw.resp.bits.h_resp.gaf
223    w_last_hptw_resp := true.B
224    mem_addr_update := true.B
225    last_s2xlate := false.B
226  }
227
228  when(sent_to_pmp && mem_addr_update === false.B){
229    s_mem_req := false.B
230    s_pmp_check := true.B
231  }
232
233  when(accessFault && idle === false.B){
234    s_pmp_check := true.B
235    s_mem_req := true.B
236    w_mem_resp := true.B
237    s_llptw_req := true.B
238    s_hptw_req := true.B
239    w_hptw_resp := true.B
240    s_last_hptw_req := true.B
241    w_last_hptw_resp := true.B
242    mem_addr_update := true.B
243    last_s2xlate := false.B
244  }
245
246  when (mem.req.fire){
247    s_mem_req := true.B
248    w_mem_resp := false.B
249  }
250
251  when(mem.resp.fire && w_mem_resp === false.B){
252    w_mem_resp := true.B
253    af_level := af_level + 1.U
254    s_llptw_req := false.B
255    mem_addr_update := true.B
256  }
257
258  when(mem_addr_update){
259    when(level === 0.U && !(find_pte || accessFault)){
260      level := levelNext
261      when(s2xlate){
262        s_hptw_req := false.B
263      }.otherwise{
264        s_mem_req := false.B
265      }
266      s_llptw_req := true.B
267      mem_addr_update := false.B
268    }.elsewhen(io.llptw.valid){
269      when(io.llptw.fire) {
270        idle := true.B
271        s_llptw_req := true.B
272        mem_addr_update := false.B
273        last_s2xlate := false.B
274      }
275      finish := true.B
276    }.elsewhen(s2xlate && last_s2xlate === true.B) {
277      s_last_hptw_req := false.B
278      mem_addr_update := false.B
279    }.elsewhen(io.resp.valid){
280      when(io.resp.fire) {
281        idle := true.B
282        s_llptw_req := true.B
283        mem_addr_update := false.B
284        accessFault := false.B
285      }
286      finish := true.B
287    }
288  }
289
290
291  when (sfence.valid) {
292    idle := true.B
293    s_pmp_check := true.B
294    s_mem_req := true.B
295    s_llptw_req := true.B
296    w_mem_resp := true.B
297    accessFault := false.B
298    mem_addr_update := false.B
299    s_hptw_req := true.B
300    w_hptw_resp := true.B
301    s_last_hptw_req := true.B
302    w_last_hptw_resp := true.B
303  }
304
305
306  XSDebug(p"[ptw] level:${level} notFound:${pageFault}\n")
307
308  // perf
309  XSPerfAccumulate("fsm_count", io.req.fire)
310  for (i <- 0 until PtwWidth) {
311    XSPerfAccumulate(s"fsm_count_source${i}", io.req.fire && io.req.bits.req_info.source === i.U)
312  }
313  XSPerfAccumulate("fsm_busy", !idle)
314  XSPerfAccumulate("fsm_idle", idle)
315  XSPerfAccumulate("resp_blocked", io.resp.valid && !io.resp.ready)
316  XSPerfAccumulate("ptw_ppn_af", io.resp.fire && ppn_af)
317  XSPerfAccumulate("mem_count", mem.req.fire)
318  XSPerfAccumulate("mem_cycle", BoolStopWatch(mem.req.fire, mem.resp.fire, true))
319  XSPerfAccumulate("mem_blocked", mem.req.valid && !mem.req.ready)
320
321  TimeOutAssert(!idle, timeOutThreshold, "page table walker time out")
322
323  val perfEvents = Seq(
324    ("fsm_count         ", io.req.fire                                     ),
325    ("fsm_busy          ", !idle                                             ),
326    ("fsm_idle          ", idle                                              ),
327    ("resp_blocked      ", io.resp.valid && !io.resp.ready                   ),
328    ("mem_count         ", mem.req.fire                                    ),
329    ("mem_cycle         ", BoolStopWatch(mem.req.fire, mem.resp.fire, true)),
330    ("mem_blocked       ", mem.req.valid && !mem.req.ready                   ),
331  )
332  generatePerfEvent()
333}
334
335/*========================= LLPTW ==============================*/
336
337/** LLPTW : Last Level Page Table Walker
338  * the page walker that only takes 4KB(last level) page walk.
339  **/
340
341class LLPTWInBundle(implicit p: Parameters) extends XSBundle with HasPtwConst {
342  val req_info = Output(new L2TlbInnerBundle())
343  val ppn = Output(if(HasHExtension) UInt((vpnLen.max(ppnLen)).W) else UInt(ppnLen.W))
344}
345
346class LLPTWIO(implicit p: Parameters) extends MMUIOBaseBundle with HasPtwConst {
347  val in = Flipped(DecoupledIO(new LLPTWInBundle()))
348  val out = DecoupledIO(new Bundle {
349    val req_info = Output(new L2TlbInnerBundle())
350    val id = Output(UInt(bMemID.W))
351    val h_resp = Output(new HptwResp)
352    val af = Output(Bool())
353  })
354  val mem = new Bundle {
355    val req = DecoupledIO(new L2TlbMemReqBundle())
356    val resp = Flipped(Valid(new Bundle {
357      val id = Output(UInt(log2Up(l2tlbParams.llptwsize).W))
358    }))
359    val enq_ptr = Output(UInt(log2Ceil(l2tlbParams.llptwsize).W))
360    val buffer_it = Output(Vec(l2tlbParams.llptwsize, Bool()))
361    val refill = Output(new L2TlbInnerBundle())
362    val req_mask = Input(Vec(l2tlbParams.llptwsize, Bool()))
363  }
364  val cache = DecoupledIO(new L2TlbInnerBundle())
365  val pmp = new Bundle {
366    val req = Valid(new PMPReqBundle())
367    val resp = Flipped(new PMPRespBundle())
368  }
369  val hptw = new Bundle {
370    val req = DecoupledIO(new Bundle{
371      val source = UInt(bSourceWidth.W)
372      val id = UInt(log2Up(l2tlbParams.llptwsize).W)
373      val gvpn = UInt(vpnLen.W)
374    })
375    val resp = Flipped(Valid(new Bundle {
376      val id = Output(UInt(log2Up(l2tlbParams.llptwsize).W))
377      val h_resp = Output(new HptwResp)
378    }))
379  }
380}
381
382class LLPTWEntry(implicit p: Parameters) extends XSBundle with HasPtwConst {
383  val req_info = new L2TlbInnerBundle()
384  val s2xlate = Bool()
385  val ppn = UInt(ppnLen.W)
386  val wait_id = UInt(log2Up(l2tlbParams.llptwsize).W)
387  val af = Bool()
388  val gaf = Bool()
389  val gpf = Bool()
390}
391
392
393class LLPTW(implicit p: Parameters) extends XSModule with HasPtwConst with HasPerfEvents {
394  val io = IO(new LLPTWIO())
395  val enableS2xlate = io.in.bits.req_info.s2xlate =/= noS2xlate
396  val satp = Mux(enableS2xlate, io.csr.vsatp, io.csr.satp)
397
398  val flush = io.sfence.valid || satp.changed
399  val entries = Reg(Vec(l2tlbParams.llptwsize, new LLPTWEntry()))
400  val state_idle :: state_hptw_req :: state_hptw_resp :: state_addr_check :: state_mem_req :: state_mem_waiting :: state_mem_out :: state_last_hptw_req :: state_last_hptw_resp :: state_cache :: Nil = Enum(10)
401  val state = RegInit(VecInit(Seq.fill(l2tlbParams.llptwsize)(state_idle)))
402
403  val is_emptys = state.map(_ === state_idle)
404  val is_mems = state.map(_ === state_mem_req)
405  val is_waiting = state.map(_ === state_mem_waiting)
406  val is_having = state.map(_ === state_mem_out)
407  val is_cache = state.map(_ === state_cache)
408  val is_hptw_req = state.map(_ === state_hptw_req)
409  val is_last_hptw_req = state.map(_ === state_last_hptw_req)
410
411  val full = !ParallelOR(is_emptys).asBool
412  val enq_ptr = ParallelPriorityEncoder(is_emptys)
413
414  val mem_ptr = ParallelPriorityEncoder(is_having) // TODO: optimize timing, bad: entries -> ptr -> entry
415  val mem_arb = Module(new RRArbiter(new LLPTWEntry(), l2tlbParams.llptwsize))
416  for (i <- 0 until l2tlbParams.llptwsize) {
417    mem_arb.io.in(i).bits := entries(i)
418    mem_arb.io.in(i).valid := is_mems(i) && !io.mem.req_mask(i)
419  }
420  val hyper_arb1 = Module(new RRArbiter(new LLPTWEntry(), l2tlbParams.llptwsize))
421  for (i <- 0 until l2tlbParams.llptwsize) {
422    hyper_arb1.io.in(i).bits := entries(i)
423    hyper_arb1.io.in(i).valid := is_hptw_req(i)
424  }
425  val hyper_arb2 = Module(new RRArbiter(new LLPTWEntry(), l2tlbParams.llptwsize))
426  for(i <- 0 until l2tlbParams.llptwsize) {
427    hyper_arb2.io.in(i).bits := entries(i)
428    hyper_arb2.io.in(i).valid := is_last_hptw_req(i)
429  }
430
431  val cache_ptr = ParallelMux(is_cache, (0 until l2tlbParams.llptwsize).map(_.U(log2Up(l2tlbParams.llptwsize).W)))
432
433  // duplicate req
434  // to_wait: wait for the last to access mem, set to mem_resp
435  // to_cache: the last is back just right now, set to mem_cache
436  val dup_vec = state.indices.map(i =>
437    dup(io.in.bits.req_info.vpn, entries(i).req_info.vpn) && io.in.bits.req_info.s2xlate === entries(i).req_info.s2xlate
438  )
439  val dup_req_fire = mem_arb.io.out.fire && dup(io.in.bits.req_info.vpn, mem_arb.io.out.bits.req_info.vpn) && io.in.bits.req_info.s2xlate === mem_arb.io.out.bits.req_info.s2xlate // dup with the req fire entry
440  val dup_vec_wait = dup_vec.zip(is_waiting).map{case (d, w) => d && w} // dup with "mem_waiting" entres, sending mem req already
441  val dup_vec_having = dup_vec.zipWithIndex.map{case (d, i) => d && is_having(i)} // dup with the "mem_out" entry recv the data just now
442  val wait_id = Mux(dup_req_fire, mem_arb.io.chosen, ParallelMux(dup_vec_wait zip entries.map(_.wait_id)))
443  val dup_wait_resp = io.mem.resp.fire && VecInit(dup_vec_wait)(io.mem.resp.bits.id) // dup with the entry that data coming next cycle
444  val to_wait = Cat(dup_vec_wait).orR || dup_req_fire
445  val to_mem_out = dup_wait_resp
446  val to_cache = Cat(dup_vec_having).orR
447  XSError(RegNext(dup_req_fire && Cat(dup_vec_wait).orR, init = false.B), "mem req but some entries already waiting, should not happed")
448
449  XSError(io.in.fire && ((to_mem_out && to_cache) || (to_wait && to_cache)), "llptw enq, to cache conflict with to mem")
450  val mem_resp_hit = RegInit(VecInit(Seq.fill(l2tlbParams.llptwsize)(false.B)))
451  val enq_state_normal = Mux(to_mem_out, state_mem_out, // same to the blew, but the mem resp now
452    Mux(to_wait, state_mem_waiting,
453    Mux(to_cache, state_cache, state_addr_check)))
454  val enq_state = Mux(from_pre(io.in.bits.req_info.source) && enq_state_normal =/= state_addr_check, state_idle, enq_state_normal)
455  when (io.in.fire) {
456    // if prefetch req does not need mem access, just give it up.
457    // so there will be at most 1 + FilterSize entries that needs re-access page cache
458    // so 2 + FilterSize is enough to avoid dead-lock
459    state(enq_ptr) := enq_state
460    entries(enq_ptr).req_info := io.in.bits.req_info
461    entries(enq_ptr).ppn := io.in.bits.ppn
462    entries(enq_ptr).wait_id := Mux(to_wait, wait_id, enq_ptr)
463    entries(enq_ptr).af := false.B
464    entries(enq_ptr).gaf := false.B
465    entries(enq_ptr).gpf := false.B
466    entries(enq_ptr).s2xlate := enableS2xlate
467    mem_resp_hit(enq_ptr) := to_mem_out
468  }
469
470  val enq_ptr_reg = RegNext(enq_ptr)
471  val need_addr_check = RegNext(enq_state === state_addr_check && (io.in.fire() || io.hptw.resp.fire()) && !flush)
472
473  val gpaddr = MakeGAddr(io.in.bits.ppn, getVpnn(io.in.bits.req_info.vpn, 0))
474  val hpaddr = Cat(io.in.bits.ppn, gpaddr(offLen-1, 0))
475
476  val addr = Mux(enableS2xlate, hpaddr, MakeAddr(io.in.bits.ppn, getVpnn(io.in.bits.req_info.vpn, 0)))
477
478  io.pmp.req.valid := need_addr_check
479  io.pmp.req.bits.addr := RegEnable(addr, io.in.fire)
480  io.pmp.req.bits.cmd := TlbCmd.read
481  io.pmp.req.bits.size := 3.U // TODO: fix it
482  val pmp_resp_valid = io.pmp.req.valid // same cycle
483  when (pmp_resp_valid) {
484    // NOTE: when pmp resp but state is not addr check, then the entry is dup with other entry, the state was changed before
485    //       when dup with the req-ing entry, set to mem_waiting (above codes), and the ld must be false, so dontcare
486    val accessFault = io.pmp.resp.ld || io.pmp.resp.mmio
487    entries(enq_ptr_reg).af := accessFault
488    state(enq_ptr_reg) := Mux(accessFault, state_mem_out, state_mem_req)
489  }
490
491  when (mem_arb.io.out.fire) {
492    for (i <- state.indices) {
493      when (state(i) =/= state_idle && dup(entries(i).req_info.vpn, mem_arb.io.out.bits.req_info.vpn)) {
494        // NOTE: "dup enq set state to mem_wait" -> "sending req set other dup entries to mem_wait"
495        state(i) := state_mem_waiting
496        entries(i).wait_id := mem_arb.io.chosen
497      }
498    }
499  }
500  when (io.mem.resp.fire) {
501    state.indices.map{i =>
502      when (state(i) === state_mem_waiting && io.mem.resp.bits.id === entries(i).wait_id) {
503        state(i) := Mux(entries(i).s2xlate, state_last_hptw_req, state_mem_out)
504        mem_resp_hit(i) := true.B
505      }
506    }
507  }
508
509  when (hyper_arb1.io.out.fire()) {
510    for (i <- state.indices) {
511      when (state(i) === state_hptw_req && entries(i).ppn === hyper_arb1.io.out.bits.ppn && entries(i).s2xlate) {
512        state(i) := state_hptw_resp
513        entries(i).wait_id := hyper_arb1.io.chosen
514      }
515    }
516  }
517
518  when (hyper_arb2.io.out.fire()) {
519    for (i <- state.indices) {
520      when (state(i) === state_last_hptw_req && entries(i).ppn === hyper_arb2.io.out.bits.ppn && entries(i).s2xlate) {
521        state(i) := state_last_hptw_resp
522        entries(i).wait_id := hyper_arb2.io.chosen
523      }
524    }
525  }
526
527  when (io.hptw.resp.fire()) {
528    for (i <- state.indices) {
529      when (state(i) === state_hptw_resp && io.hptw.resp.bits.id === entries(i).wait_id) {
530        state(i) := state_addr_check
531        entries(i).gpf := io.hptw.resp.bits.h_resp.gpf
532        entries(i).gaf := io.hptw.resp.bits.h_resp.gaf
533      }
534      when (state(i) === state_last_hptw_resp && io.hptw.resp.bits.id === entries(i).wait_id) {
535        state(i) := state_mem_out
536        entries(i).gpf := io.hptw.resp.bits.h_resp.gpf
537        entries(i).gaf := io.hptw.resp.bits.h_resp.gaf
538      }
539    }
540  }
541
542  when (io.out.fire) {
543    assert(state(mem_ptr) === state_mem_out)
544    state(mem_ptr) := state_idle
545  }
546  mem_resp_hit.map(a => when (a) { a := false.B } )
547
548  when (io.cache.fire) {
549    state(cache_ptr) := state_idle
550  }
551  XSError(io.out.fire && io.cache.fire && (mem_ptr === cache_ptr), "mem resp and cache fire at the same time at same entry")
552
553  when (flush) {
554    state.map(_ := state_idle)
555  }
556
557  io.in.ready := !full
558
559  io.out.valid := ParallelOR(is_having).asBool
560  io.out.bits.req_info := entries(mem_ptr).req_info
561  io.out.bits.id := mem_ptr
562  io.out.bits.af := entries(mem_ptr).af
563  io.out.bits.h_resp := io.hptw.resp.bits.h_resp
564
565  io.hptw.req.valid := (hyper_arb1.io.out.valid || hyper_arb2.io.out.valid) && !flush
566  io.hptw.req.bits.gvpn := Mux(hyper_arb1.io.out.valid, hyper_arb1.io.out.bits.ppn, hyper_arb2.io.out.bits.ppn)
567  io.hptw.req.bits.id := Mux(hyper_arb1.io.out.valid, hyper_arb1.io.chosen, hyper_arb2.io.chosen)
568  io.hptw.req.bits.source := Mux(hyper_arb1.io.out.valid, hyper_arb1.io.out.bits.req_info.source, hyper_arb2.io.out.bits.req_info.source)
569  hyper_arb1.io.out.ready := io.hptw.req.ready
570  hyper_arb2.io.out.ready := io.hptw.req.ready
571
572  io.mem.req.valid := mem_arb.io.out.valid && !flush
573  io.mem.req.bits.addr := MakeAddr(mem_arb.io.out.bits.ppn, getVpnn(mem_arb.io.out.bits.req_info.vpn, 0))
574  io.mem.req.bits.id := mem_arb.io.chosen
575  mem_arb.io.out.ready := io.mem.req.ready
576  io.mem.refill := entries(RegNext(io.mem.resp.bits.id(log2Up(l2tlbParams.llptwsize)-1, 0))).req_info
577  io.mem.buffer_it := mem_resp_hit
578  io.mem.enq_ptr := enq_ptr
579
580  io.cache.valid := Cat(is_cache).orR
581  io.cache.bits := ParallelMux(is_cache, entries.map(_.req_info))
582
583  XSPerfAccumulate("llptw_in_count", io.in.fire)
584  XSPerfAccumulate("llptw_in_block", io.in.valid && !io.in.ready)
585  for (i <- 0 until 7) {
586    XSPerfAccumulate(s"enq_state${i}", io.in.fire && enq_state === i.U)
587  }
588  for (i <- 0 until (l2tlbParams.llptwsize + 1)) {
589    XSPerfAccumulate(s"util${i}", PopCount(is_emptys.map(!_)) === i.U)
590    XSPerfAccumulate(s"mem_util${i}", PopCount(is_mems) === i.U)
591    XSPerfAccumulate(s"waiting_util${i}", PopCount(is_waiting) === i.U)
592  }
593  XSPerfAccumulate("mem_count", io.mem.req.fire)
594  XSPerfAccumulate("mem_cycle", PopCount(is_waiting) =/= 0.U)
595  XSPerfAccumulate("blocked_in", io.in.valid && !io.in.ready)
596
597  for (i <- 0 until l2tlbParams.llptwsize) {
598    TimeOutAssert(state(i) =/= state_idle, timeOutThreshold, s"missqueue time out no out ${i}")
599  }
600
601  val perfEvents = Seq(
602    ("tlbllptw_incount           ", io.in.fire               ),
603    ("tlbllptw_inblock           ", io.in.valid && !io.in.ready),
604    ("tlbllptw_memcount          ", io.mem.req.fire          ),
605    ("tlbllptw_memcycle          ", PopCount(is_waiting)       ),
606  )
607  generatePerfEvent()
608}
609
610/*========================= HPTW ==============================*/
611
612/** HPTW : Hypervisor Page Table Walker
613  * the page walker take the virtual machine's page walk.
614  * guest physical address translation, guest physical address -> host physical address
615  **/
616class HPTWIO()(implicit p: Parameters) extends MMUIOBaseBundle with HasPtwConst {
617  val req = Flipped(DecoupledIO(new Bundle {
618    val source = UInt(bSourceWidth.W)
619    val id = UInt(log2Up(l2tlbParams.llptwsize).W)
620    val gvpn = UInt(vpnLen.W)
621    val l1Hit = Bool()
622    val l2Hit = Bool()
623  }))
624  val resp = Valid(new Bundle {
625    val source = UInt(bSourceWidth.W)
626    val resp = Output(new HptwResp())
627    val id = Output(UInt(bMemID.W))
628  })
629
630  val mem = new Bundle {
631    val req = DecoupledIO(new L2TlbMemReqBundle())
632    val resp = Flipped(ValidIO(UInt(XLEN.W)))
633    val mask = Input(Bool())
634  }
635  val refill = Output(new Bundle {
636    val req_info = new L2TlbInnerBundle()
637    val level = UInt(log2Up(Level).W)
638  })
639  val pmp = new Bundle {
640    val req = ValidIO(new PMPReqBundle())
641    val resp = Flipped(new PMPRespBundle())
642  }
643}
644
645@chiselName
646class HPTW()(implicit p: Parameters) extends XSModule with HasPtwConst {
647  val io = IO(new HPTWIO)
648  val hgatp = io.csr.hgatp
649  val sfence = io.sfence
650  val flush = sfence.valid || hgatp.changed
651
652  val level = RegInit(0.U(log2Up(Level).W))
653  val gpaddr = Reg(UInt(GPAddrBits.W))
654  val vpn = gpaddr(GPAddrBits-1, offLen)
655  val levelNext = level + 1.U
656  val l1Hit = Reg(Bool())
657  val l2Hit = Reg(Bool())
658  val ppn = Reg(UInt(ppnLen.W))
659  val pg_base = MakeGAddr(hgatp.ppn, getGVpnn(vpn, 2.U))
660//  val pte = io.mem.resp.bits.MergeRespToPte()
661  val pte = io.mem.resp.bits.asTypeOf(new PteBundle().cloneType)
662  val p_pte = MakeAddr(ppn, getVpnn(vpn, 2.U - level))
663  val mem_addr = Mux(level === 0.U, pg_base, p_pte)
664
665  //s/w register
666  val s_pmp_check = RegInit(true.B)
667  val s_mem_req = RegInit(true.B)
668  val w_mem_resp = RegInit(true.B)
669  val mem_addr_update = RegInit(true.B)
670  val idle = RegInit(true.B)
671  val finish = WireInit(false.B)
672
673  val sent_to_pmp = !idle && (!s_pmp_check || mem_addr_update) && !finish
674  val pageFault = pte.isPf(level)
675  val accessFault = RegEnable(io.pmp.resp.ld || io.pmp.resp.mmio, sent_to_pmp)
676
677  val ppn_af = pte.isAf()
678  val find_pte = pte.isLeaf() || ppn_af || pageFault
679
680  val resp_valid = !idle && mem_addr_update && ((w_mem_resp && find_pte) || (s_pmp_check && accessFault))
681  val id = Reg(UInt(log2Up(l2tlbParams.llptwsize).W))
682  val source = RegEnable(io.req.bits.source, io.req.fire())
683
684  io.req.ready := idle
685  val resp = Wire(new HptwResp())
686  resp.apply(pageFault && !accessFault && !ppn_af, accessFault || ppn_af, level, pte, vpn, hgatp.asid)
687  io.resp.valid := resp_valid
688  io.resp.bits.id := id
689  io.resp.bits.resp := resp
690  io.resp.bits.source := source
691
692  io.pmp.req.valid := DontCare
693  io.pmp.req.bits.addr := mem_addr
694  io.pmp.req.bits.size := 3.U
695  io.pmp.req.bits.cmd := TlbCmd.read
696
697  io.mem.req.valid := !s_mem_req && !io.mem.mask && !accessFault && s_pmp_check
698  io.mem.req.bits.addr := mem_addr
699  io.mem.req.bits.id := HptwReqId.U(bMemID.W)
700
701  io.refill.req_info.vpn := vpn
702  io.refill.level := level
703  io.refill.req_info.source := source
704  io.refill.req_info.s2xlate := onlyStage2
705  when (idle){
706    when(io.req.fire()){
707      level := Mux(io.req.bits.l2Hit, 2.U, Mux(io.req.bits.l1Hit, 1.U, 0.U))
708      idle := false.B
709      gpaddr := Cat(io.req.bits.gvpn, 0.U(offLen.W))
710      accessFault := false.B
711      s_pmp_check := false.B
712      id := io.req.bits.id
713      l1Hit := io.req.bits.l1Hit
714      l2Hit := io.req.bits.l2Hit
715    }
716  }
717
718  when(sent_to_pmp && !mem_addr_update){
719    s_mem_req := false.B
720    s_pmp_check := true.B
721  }
722
723  when(accessFault && !idle){
724    s_pmp_check := true.B
725    s_mem_req := true.B
726    w_mem_resp := true.B
727    mem_addr_update := true.B
728  }
729
730  when(io.mem.req.fire()){
731    s_mem_req := true.B
732    w_mem_resp := false.B
733  }
734
735  when(io.mem.resp.fire() && !w_mem_resp){
736    ppn := pte.ppn
737    w_mem_resp := true.B
738    mem_addr_update := true.B
739  }
740
741  when(mem_addr_update){
742    when(!(find_pte || accessFault)){
743      level := levelNext
744      s_mem_req := false.B
745      mem_addr_update := false.B
746    }.elsewhen(resp_valid){
747      when(io.resp.fire()){
748        idle := true.B
749        mem_addr_update := false.B
750        accessFault := false.B
751      }
752      finish := true.B
753    }
754  }
755}