Lines Matching +full:page +full:- +full:level

1 // SPDX-License-Identifier: GPL-2.0
17 INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); in kvm_mmu_init_tdp_mmu()
18 spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); in kvm_mmu_init_tdp_mmu()
26 lockdep_assert_held_read(&kvm->mmu_lock); in kvm_lockdep_assert_mmu_lock_held()
28 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_lockdep_assert_mmu_lock_held()
43 WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); in kvm_mmu_uninit_tdp_mmu()
44 WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); in kvm_mmu_uninit_tdp_mmu()
56 free_page((unsigned long)sp->external_spt); in tdp_mmu_free_sp()
57 free_page((unsigned long)sp->spt); in tdp_mmu_free_sp()
62 * This is called through call_rcu in order to free TDP page table memory
65 * By only accessing TDP MMU page table memory in an RCU read critical
79 if (!refcount_dec_and_test(&root->tdp_mmu_root_count)) in kvm_tdp_mmu_put_root()
87 KVM_BUG_ON(!is_tdp_mmu_page(root) || !root->role.invalid, kvm); in kvm_tdp_mmu_put_root()
89 spin_lock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_put_root()
90 list_del_rcu(&root->link); in kvm_tdp_mmu_put_root()
91 spin_unlock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_put_root()
92 call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback); in kvm_tdp_mmu_put_root()
101 if (root->role.invalid && !(types & KVM_INVALID_ROOTS)) in tdp_mmu_root_match()
113 * must hold a reference to @prev_root if it's non-NULL).
126 * While the roots themselves are RCU-protected, fields such as in tdp_mmu_next_root()
129 lockdep_assert_held(&kvm->mmu_lock); in tdp_mmu_next_root()
134 next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots, in tdp_mmu_next_root()
135 &prev_root->link, in tdp_mmu_next_root()
138 next_root = list_first_or_null_rcu(&kvm->arch.tdp_mmu_roots, in tdp_mmu_next_root()
146 next_root = list_next_or_null_rcu(&kvm->arch.tdp_mmu_roots, in tdp_mmu_next_root()
147 &next_root->link, typeof(*next_root), link); in tdp_mmu_next_root()
169 ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
179 ({ lockdep_assert_held(&(_kvm)->mmu_lock); }), _root; \
185 * inherently yield-friendly and should use the yield-safe variant above.
190 list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link) \
203 sp = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_page_header_cache); in tdp_mmu_alloc_sp()
204 sp->spt = kvm_mmu_memory_cache_alloc(&vcpu->arch.mmu_shadow_page_cache); in tdp_mmu_alloc_sp()
212 INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); in tdp_mmu_init_sp()
214 set_page_private(virt_to_page(sp->spt), (unsigned long)sp); in tdp_mmu_init_sp()
216 sp->role = role; in tdp_mmu_init_sp()
217 sp->gfn = gfn; in tdp_mmu_init_sp()
218 sp->ptep = sptep; in tdp_mmu_init_sp()
219 sp->tdp_mmu_page = true; in tdp_mmu_init_sp()
230 parent_sp = sptep_to_sp(rcu_dereference(iter->sptep)); in tdp_mmu_init_child_sp()
232 role = parent_sp->role; in tdp_mmu_init_child_sp()
233 role.level--; in tdp_mmu_init_child_sp()
235 tdp_mmu_init_sp(child_sp, iter->sptep, iter->gfn, role); in tdp_mmu_init_child_sp()
240 struct kvm_mmu *mmu = vcpu->arch.mmu; in kvm_tdp_mmu_alloc_root()
241 union kvm_mmu_page_role role = mmu->root_role; in kvm_tdp_mmu_alloc_root()
243 struct kvm *kvm = vcpu->kvm; in kvm_tdp_mmu_alloc_root()
255 read_lock(&kvm->mmu_lock); in kvm_tdp_mmu_alloc_root()
258 if (root->role.word == role.word) in kvm_tdp_mmu_alloc_root()
262 spin_lock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_alloc_root()
272 list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { in kvm_tdp_mmu_alloc_root()
273 if (root->role.word == role.word && in kvm_tdp_mmu_alloc_root()
288 refcount_set(&root->tdp_mmu_root_count, 2); in kvm_tdp_mmu_alloc_root()
289 list_add_rcu(&root->link, &kvm->arch.tdp_mmu_roots); in kvm_tdp_mmu_alloc_root()
292 spin_unlock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_alloc_root()
294 read_unlock(&kvm->mmu_lock); in kvm_tdp_mmu_alloc_root()
301 mmu->mirror_root_hpa = __pa(root->spt); in kvm_tdp_mmu_alloc_root()
303 mmu->root.hpa = __pa(root->spt); in kvm_tdp_mmu_alloc_root()
304 mmu->root.pgd = 0; in kvm_tdp_mmu_alloc_root()
309 u64 old_spte, u64 new_spte, int level,
314 kvm_account_pgtable_pages((void *)sp->spt, +1); in tdp_account_mmu_page()
315 atomic64_inc(&kvm->arch.tdp_mmu_pages); in tdp_account_mmu_page()
320 kvm_account_pgtable_pages((void *)sp->spt, -1); in tdp_unaccount_mmu_page()
321 atomic64_dec(&kvm->arch.tdp_mmu_pages); in tdp_unaccount_mmu_page()
325 * tdp_mmu_unlink_sp() - Remove a shadow page from the list of used pages
328 * @sp: the page to be removed
334 if (!sp->nx_huge_page_disallowed) in tdp_mmu_unlink_sp()
337 spin_lock(&kvm->arch.tdp_mmu_pages_lock); in tdp_mmu_unlink_sp()
338 sp->nx_huge_page_disallowed = false; in tdp_mmu_unlink_sp()
340 spin_unlock(&kvm->arch.tdp_mmu_pages_lock); in tdp_mmu_unlink_sp()
344 int level) in remove_external_spte() argument
352 * But remove_external_spte() will be called on non-leaf PTEs via in remove_external_spte()
356 if (!is_last_spte(old_spte, level)) in remove_external_spte()
360 lockdep_assert_held_write(&kvm->mmu_lock); in remove_external_spte()
362 ret = static_call(kvm_x86_remove_external_spte)(kvm, gfn, level, old_pfn); in remove_external_spte()
367 * handle_removed_pt() - handle a page table removed from the TDP structure
370 * @pt: the page removed from the paging structure
375 * Given a page table that has been removed from the TDP paging structure,
376 * iterates through the page table to clear SPTEs and free child page tables.
380 * this thread will be responsible for ensuring the page is freed. Hence the
386 int level = sp->role.level; in handle_removed_pt() local
387 gfn_t base_gfn = sp->gfn; in handle_removed_pt()
396 gfn_t gfn = base_gfn + i * KVM_PAGES_PER_HPAGE(level); in handle_removed_pt()
404 * handling a page fault could overwrite it, so in handle_removed_pt()
416 * If the SPTE is not MMU-present, there is no backing in handle_removed_pt()
417 * page associated with the SPTE and so no side effects in handle_removed_pt()
434 * document that A/D assists can use upper-level PxE in handle_removed_pt()
436 * still access the page and mark it dirty. in handle_removed_pt()
448 * scenario where a non-atomic update could result in a in handle_removed_pt()
450 * sufficient as a fast page fault could read the upper in handle_removed_pt()
451 * level SPTE before it is zapped, and then make this in handle_removed_pt()
457 FROZEN_SPTE, level); in handle_removed_pt()
460 old_spte, FROZEN_SPTE, level, shared); in handle_removed_pt()
464 remove_external_spte(kvm, gfn, old_spte, level); in handle_removed_pt()
469 WARN_ON(static_call(kvm_x86_free_external_spt)(kvm, base_gfn, sp->role.level, in handle_removed_pt()
470 sp->external_spt))) { in handle_removed_pt()
472 * Failed to free page table page in mirror page table and in handle_removed_pt()
474 * Intentionally leak the page to prevent the kernel from in handle_removed_pt()
475 * accessing the encrypted page. in handle_removed_pt()
477 sp->external_spt = NULL; in handle_removed_pt()
480 call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback); in handle_removed_pt()
483 static void *get_external_spt(gfn_t gfn, u64 new_spte, int level) in get_external_spt() argument
485 if (is_shadow_present_pte(new_spte) && !is_last_spte(new_spte, level)) { in get_external_spt()
488 WARN_ON_ONCE(sp->role.level + 1 != level); in get_external_spt()
489 WARN_ON_ONCE(sp->gfn != gfn); in get_external_spt()
490 return sp->external_spt; in get_external_spt()
498 u64 new_spte, int level) in set_external_spte_present() argument
502 bool is_leaf = is_present && is_last_spte(new_spte, level); in set_external_spte_present()
508 lockdep_assert_held(&kvm->mmu_lock); in set_external_spte_present()
511 * page table has been modified. Use FROZEN_SPTE similar to in set_external_spte_present()
515 return -EBUSY; in set_external_spte_present()
518 * Use different call to either set up middle level in set_external_spte_present()
519 * external page table, or leaf. in set_external_spte_present()
522 ret = static_call(kvm_x86_set_external_spte)(kvm, gfn, level, new_pfn); in set_external_spte_present()
524 void *external_spt = get_external_spt(gfn, new_spte, level); in set_external_spte_present()
527 ret = static_call(kvm_x86_link_external_spt)(kvm, gfn, level, external_spt); in set_external_spte_present()
537 * handle_changed_spte - handle bookkeeping associated with an SPTE change
543 * @level: the level of the PT the SPTE is part of in the paging structure
553 u64 old_spte, u64 new_spte, int level, in handle_changed_spte() argument
558 bool was_leaf = was_present && is_last_spte(old_spte, level); in handle_changed_spte()
559 bool is_leaf = is_present && is_last_spte(new_spte, level); in handle_changed_spte()
562 WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL); in handle_changed_spte()
563 WARN_ON_ONCE(level < PG_LEVEL_4K); in handle_changed_spte()
564 WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); in handle_changed_spte()
571 * should be zapping the SPTE before the main MM's page table is in handle_changed_spte()
579 "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", in handle_changed_spte()
580 as_id, gfn, old_spte, new_spte, level); in handle_changed_spte()
592 trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte); in handle_changed_spte()
598 * The only times a SPTE should be changed from a non-present to in handle_changed_spte()
599 * non-present state is when an MMIO entry is installed/modified/ in handle_changed_spte()
617 "as_id: %d gfn: %llx old_spte: %llx new_spte: %llx level: %d", in handle_changed_spte()
618 as_id, gfn, old_spte, new_spte, level); in handle_changed_spte()
623 kvm_update_page_stats(kvm, level, is_leaf ? 1 : -1); in handle_changed_spte()
633 handle_removed_pt(kvm, spte_to_child_pt(old_spte, level), shared); in handle_changed_spte()
643 * and pre-checking before inserting a new SPTE is advantageous as it in __tdp_mmu_set_spte_atomic()
646 WARN_ON_ONCE(iter->yielded || is_frozen_spte(iter->old_spte)); in __tdp_mmu_set_spte_atomic()
648 if (is_mirror_sptep(iter->sptep) && !is_frozen_spte(new_spte)) { in __tdp_mmu_set_spte_atomic()
656 return -EBUSY; in __tdp_mmu_set_spte_atomic()
658 ret = set_external_spte_present(kvm, iter->sptep, iter->gfn, in __tdp_mmu_set_spte_atomic()
659 iter->old_spte, new_spte, iter->level); in __tdp_mmu_set_spte_atomic()
663 u64 *sptep = rcu_dereference(iter->sptep); in __tdp_mmu_set_spte_atomic()
669 * updates iter->old_spte with the current value, so the caller in __tdp_mmu_set_spte_atomic()
673 if (!try_cmpxchg64(sptep, &iter->old_spte, new_spte)) in __tdp_mmu_set_spte_atomic()
674 return -EBUSY; in __tdp_mmu_set_spte_atomic()
681 * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically
682 * and handle the associated bookkeeping. Do not mark the page dirty
685 * If setting the SPTE fails because it has changed, iter->old_spte will be
692 * * 0 - If the SPTE was set.
693 * * -EBUSY - If the SPTE cannot be set. In this case this function will have
694 * no side-effects other than setting iter->old_spte to the last
703 lockdep_assert_held_read(&kvm->mmu_lock); in tdp_mmu_set_spte_atomic()
709 handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, in tdp_mmu_set_spte_atomic()
710 new_spte, iter->level, true); in tdp_mmu_set_spte_atomic()
716 * tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping
723 * @level: The level _containing_ the SPTE (its parent PT's level)
729 u64 old_spte, u64 new_spte, gfn_t gfn, int level) in tdp_mmu_set_spte() argument
731 lockdep_assert_held_write(&kvm->mmu_lock); in tdp_mmu_set_spte()
742 old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level); in tdp_mmu_set_spte()
744 handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, false); in tdp_mmu_set_spte()
747 * Users that do non-atomic setting of PTEs don't operate on mirror in tdp_mmu_set_spte()
752 remove_external_spte(kvm, gfn, old_spte, level); in tdp_mmu_set_spte()
761 WARN_ON_ONCE(iter->yielded); in tdp_mmu_iter_set_spte()
762 iter->old_spte = tdp_mmu_set_spte(kvm, iter->as_id, iter->sptep, in tdp_mmu_iter_set_spte()
763 iter->old_spte, new_spte, in tdp_mmu_iter_set_spte()
764 iter->gfn, iter->level); in tdp_mmu_iter_set_spte()
773 !is_last_spte(_iter.old_spte, _iter.level)) \
783 if (!need_resched() && !rwlock_needbreak(&kvm->mmu_lock)) in tdp_mmu_iter_need_resched()
787 return iter->next_last_level_gfn != iter->yielded_gfn; in tdp_mmu_iter_need_resched()
797 * If this function yields, iter->yielded is set and the caller must skip to
808 KVM_MMU_WARN_ON(iter->yielded); in tdp_mmu_iter_cond_resched()
819 cond_resched_rwlock_read(&kvm->mmu_lock); in tdp_mmu_iter_cond_resched()
821 cond_resched_rwlock_write(&kvm->mmu_lock); in tdp_mmu_iter_cond_resched()
825 WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn); in tdp_mmu_iter_cond_resched()
827 iter->yielded = true; in tdp_mmu_iter_cond_resched()
855 if (iter.level > zap_level) in __tdp_mmu_zap_root()
876 * and lead to use-after-free as zapping a SPTE triggers "writeback" of in tdp_mmu_zap_root()
877 * dirty accessed bits to the SPTE's associated struct page. in tdp_mmu_zap_root()
879 WARN_ON_ONCE(!refcount_read(&root->tdp_mmu_root_count)); in tdp_mmu_zap_root()
888 * preempt models) or mmu_lock contention (full or real-time models). in tdp_mmu_zap_root()
893 * in order to mimic the page fault path, which can replace a 1GiB page in tdp_mmu_zap_root()
907 __tdp_mmu_zap_root(kvm, root, shared, root->role.level); in tdp_mmu_zap_root()
917 * This helper intentionally doesn't allow zapping a root shadow page, in kvm_tdp_mmu_zap_sp()
918 * which doesn't have a parent page table and thus no associated entry. in kvm_tdp_mmu_zap_sp()
920 if (WARN_ON_ONCE(!sp->ptep)) in kvm_tdp_mmu_zap_sp()
923 old_spte = kvm_tdp_mmu_read_spte(sp->ptep); in kvm_tdp_mmu_zap_sp()
927 tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, in kvm_tdp_mmu_zap_sp()
928 SHADOW_NONPRESENT_VALUE, sp->gfn, sp->role.level + 1); in kvm_tdp_mmu_zap_sp()
947 lockdep_assert_held_write(&kvm->mmu_lock); in tdp_mmu_zap_leafs()
959 !is_last_spte(iter.old_spte, iter.level)) in tdp_mmu_zap_leafs()
968 if (!root->role.invalid) in tdp_mmu_zap_leafs()
990 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_tdp_mmu_zap_leafs()
991 for_each_valid_tdp_mmu_root_yield_safe(kvm, root, -1) in kvm_tdp_mmu_zap_leafs()
1007 * "fast zap". Walking zapped top-level SPTEs isn't all that expensive in kvm_tdp_mmu_zap_all()
1014 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_tdp_mmu_zap_all()
1015 __for_each_tdp_mmu_root_yield_safe(kvm, root, -1, in kvm_tdp_mmu_zap_all()
1029 read_lock(&kvm->mmu_lock); in kvm_tdp_mmu_zap_invalidated_roots()
1031 write_lock(&kvm->mmu_lock); in kvm_tdp_mmu_zap_invalidated_roots()
1034 if (!root->tdp_mmu_scheduled_root_to_zap) in kvm_tdp_mmu_zap_invalidated_roots()
1037 root->tdp_mmu_scheduled_root_to_zap = false; in kvm_tdp_mmu_zap_invalidated_roots()
1038 KVM_BUG_ON(!root->role.invalid, kvm); in kvm_tdp_mmu_zap_invalidated_roots()
1044 * TLB flush on reuse also invalidates paging-structure-cache in kvm_tdp_mmu_zap_invalidated_roots()
1060 read_unlock(&kvm->mmu_lock); in kvm_tdp_mmu_zap_invalidated_roots()
1062 write_unlock(&kvm->mmu_lock); in kvm_tdp_mmu_zap_invalidated_roots()
1099 refcount_read(&kvm->users_count) && kvm->created_vcpus) in kvm_tdp_mmu_invalidate_roots()
1100 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_tdp_mmu_invalidate_roots()
1107 list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) { in kvm_tdp_mmu_invalidate_roots()
1117 if (!root->role.invalid) { in kvm_tdp_mmu_invalidate_roots()
1118 root->tdp_mmu_scheduled_root_to_zap = true; in kvm_tdp_mmu_invalidate_roots()
1119 root->role.invalid = true; in kvm_tdp_mmu_invalidate_roots()
1125 * Installs a last-level SPTE to handle a TDP page fault.
1132 struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(iter->sptep)); in tdp_mmu_map_handle_target_level()
1137 if (WARN_ON_ONCE(sp->role.level != fault->goal_level)) in tdp_mmu_map_handle_target_level()
1140 if (fault->prefetch && is_shadow_present_pte(iter->old_spte)) in tdp_mmu_map_handle_target_level()
1143 if (is_shadow_present_pte(iter->old_spte) && in tdp_mmu_map_handle_target_level()
1144 is_access_allowed(fault, iter->old_spte) && in tdp_mmu_map_handle_target_level()
1145 is_last_spte(iter->old_spte, iter->level)) in tdp_mmu_map_handle_target_level()
1148 if (unlikely(!fault->slot)) in tdp_mmu_map_handle_target_level()
1149 new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); in tdp_mmu_map_handle_target_level()
1151 wrprot = make_spte(vcpu, sp, fault->slot, ACC_ALL, iter->gfn, in tdp_mmu_map_handle_target_level()
1152 fault->pfn, iter->old_spte, fault->prefetch, in tdp_mmu_map_handle_target_level()
1153 false, fault->map_writable, &new_spte); in tdp_mmu_map_handle_target_level()
1155 if (new_spte == iter->old_spte) in tdp_mmu_map_handle_target_level()
1157 else if (tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte)) in tdp_mmu_map_handle_target_level()
1159 else if (is_shadow_present_pte(iter->old_spte) && in tdp_mmu_map_handle_target_level()
1160 (!is_last_spte(iter->old_spte, iter->level) || in tdp_mmu_map_handle_target_level()
1161 WARN_ON_ONCE(leaf_spte_change_needs_tlb_flush(iter->old_spte, new_spte)))) in tdp_mmu_map_handle_target_level()
1162 kvm_flush_remote_tlbs_gfn(vcpu->kvm, iter->gfn, iter->level); in tdp_mmu_map_handle_target_level()
1165 * If the page fault was caused by a write but the page is write in tdp_mmu_map_handle_target_level()
1169 if (wrprot && fault->write) in tdp_mmu_map_handle_target_level()
1173 if (unlikely(is_mmio_spte(vcpu->kvm, new_spte))) { in tdp_mmu_map_handle_target_level()
1174 vcpu->stat.pf_mmio_spte_created++; in tdp_mmu_map_handle_target_level()
1175 trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn, in tdp_mmu_map_handle_target_level()
1179 trace_kvm_mmu_set_spte(iter->level, iter->gfn, in tdp_mmu_map_handle_target_level()
1180 rcu_dereference(iter->sptep)); in tdp_mmu_map_handle_target_level()
1187 * tdp_mmu_link_sp - Replace the given spte with an spte pointing to the
1188 * provided page table.
1192 * @sp: The new TDP page table to install.
1195 * Returns: 0 if the new page table was installed. Non-0 if the page table
1196 * could not be installed (e.g. the atomic compare-exchange failed).
1201 u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled); in tdp_mmu_link_sp()
1221 * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing
1222 * page tables and SPTEs to translate the faulting guest physical address.
1227 struct kvm *kvm = vcpu->kvm; in kvm_tdp_mmu_map()
1238 tdp_mmu_for_each_pte(iter, kvm, root, fault->gfn, fault->gfn + 1) { in kvm_tdp_mmu_map()
1241 if (fault->nx_huge_page_workaround_enabled) in kvm_tdp_mmu_map()
1242 disallowed_hugepage_adjust(fault, iter.old_spte, iter.level); in kvm_tdp_mmu_map()
1246 * retry, avoiding unnecessary page table allocation and free. in kvm_tdp_mmu_map()
1251 if (iter.level == fault->goal_level) in kvm_tdp_mmu_map()
1254 /* Step down into the lower level page table if it exists. */ in kvm_tdp_mmu_map()
1260 * The SPTE is either non-present or points to a huge page that in kvm_tdp_mmu_map()
1268 sp->nx_huge_page_disallowed = fault->huge_page_disallowed; in kvm_tdp_mmu_map()
1271 /* Don't support large page for mirrored roots (TDX) */ in kvm_tdp_mmu_map()
1272 KVM_BUG_ON(is_mirror_sptep(iter.sptep), vcpu->kvm); in kvm_tdp_mmu_map()
1279 * Force the guest to retry if installing an upper level SPTE in kvm_tdp_mmu_map()
1287 if (fault->huge_page_disallowed && in kvm_tdp_mmu_map()
1288 fault->req_level >= iter.level) { in kvm_tdp_mmu_map()
1289 spin_lock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_map()
1290 if (sp->nx_huge_page_disallowed) in kvm_tdp_mmu_map()
1292 spin_unlock(&kvm->arch.tdp_mmu_pages_lock); in kvm_tdp_mmu_map()
1297 * The walk aborted before reaching the target level, e.g. because the in kvm_tdp_mmu_map()
1298 * iterator detected an upper level SPTE was frozen during traversal. in kvm_tdp_mmu_map()
1300 WARN_ON_ONCE(iter.level == fault->goal_level); in kvm_tdp_mmu_map()
1318 types = kvm_gfn_range_filter_to_root_types(kvm, range->attr_filter) | KVM_INVALID_ROOTS; in kvm_tdp_mmu_unmap_gfn_range()
1320 __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, types) in kvm_tdp_mmu_unmap_gfn_range()
1321 flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end, in kvm_tdp_mmu_unmap_gfn_range()
1322 range->may_block, flush); in kvm_tdp_mmu_unmap_gfn_range()
1328 * Mark the SPTEs range of GFNs [start, end) unaccessed and return non-zero
1333 * return value to determine if the page has been accessed.
1339 if (spte_ad_enabled(iter->old_spte)) { in kvm_tdp_mmu_age_spte()
1340 iter->old_spte = tdp_mmu_clear_spte_bits(iter->sptep, in kvm_tdp_mmu_age_spte()
1341 iter->old_spte, in kvm_tdp_mmu_age_spte()
1343 iter->level); in kvm_tdp_mmu_age_spte()
1344 new_spte = iter->old_spte & ~shadow_accessed_mask; in kvm_tdp_mmu_age_spte()
1346 new_spte = mark_spte_for_access_track(iter->old_spte); in kvm_tdp_mmu_age_spte()
1347 iter->old_spte = kvm_tdp_mmu_write_spte(iter->sptep, in kvm_tdp_mmu_age_spte()
1348 iter->old_spte, new_spte, in kvm_tdp_mmu_age_spte()
1349 iter->level); in kvm_tdp_mmu_age_spte()
1352 trace_kvm_tdp_mmu_spte_changed(iter->as_id, iter->gfn, iter->level, in kvm_tdp_mmu_age_spte()
1353 iter->old_spte, new_spte); in kvm_tdp_mmu_age_spte()
1365 types = kvm_gfn_range_filter_to_root_types(kvm, range->attr_filter); in __kvm_tdp_mmu_age_gfn_range()
1374 __for_each_tdp_mmu_root(kvm, root, range->slot->as_id, types) { in __kvm_tdp_mmu_age_gfn_range()
1377 tdp_root_for_each_leaf_pte(iter, kvm, root, range->start, range->end) { in __kvm_tdp_mmu_age_gfn_range()
1424 !is_last_spte(iter.old_spte, iter.level) || in wrprot_gfn_range()
1451 lockdep_assert_held_read(&kvm->mmu_lock); in kvm_tdp_mmu_wrprot_slot()
1453 for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) in kvm_tdp_mmu_wrprot_slot()
1454 spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn, in kvm_tdp_mmu_wrprot_slot()
1455 slot->base_gfn + slot->npages, min_level); in kvm_tdp_mmu_wrprot_slot()
1468 sp->spt = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); in tdp_mmu_alloc_sp_for_split()
1469 if (!sp->spt) { in tdp_mmu_alloc_sp_for_split()
1481 const u64 huge_spte = iter->old_spte; in tdp_mmu_split_huge_page()
1482 const int level = iter->level; in tdp_mmu_split_huge_page() local
1486 * No need for atomics when writing to sp->spt since the page table has in tdp_mmu_split_huge_page()
1490 sp->spt[i] = make_small_spte(kvm, huge_spte, sp->role, i); in tdp_mmu_split_huge_page()
1493 * Replace the huge spte with a pointer to the populated lower level in tdp_mmu_split_huge_page()
1494 * page table. Since we are making this change without a TLB flush vCPUs in tdp_mmu_split_huge_page()
1505 * tdp_mmu_link_sp_atomic() will handle subtracting the huge page we in tdp_mmu_split_huge_page()
1506 * are overwriting from the page stats. But we have to manually update in tdp_mmu_split_huge_page()
1507 * the page stats with the new present child pages. in tdp_mmu_split_huge_page()
1509 kvm_update_page_stats(kvm, level - 1, SPTE_ENT_PER_PAGE); in tdp_mmu_split_huge_page()
1512 trace_kvm_mmu_split_huge_page(iter->gfn, huge_spte, level, ret); in tdp_mmu_split_huge_page()
1527 * Traverse the page table splitting all huge pages above the target in tdp_mmu_split_huge_pages_root()
1528 * level into one lower level. For example, if we encounter a 1GB page in tdp_mmu_split_huge_pages_root()
1531 * Since the TDP iterator uses a pre-order traversal, we are guaranteed in tdp_mmu_split_huge_pages_root()
1534 * level above the target level (e.g. splitting a 1GB to 512 2MB pages, in tdp_mmu_split_huge_pages_root()
1549 read_unlock(&kvm->mmu_lock); in tdp_mmu_split_huge_pages_root()
1551 write_unlock(&kvm->mmu_lock); in tdp_mmu_split_huge_pages_root()
1556 read_lock(&kvm->mmu_lock); in tdp_mmu_split_huge_pages_root()
1558 write_lock(&kvm->mmu_lock); in tdp_mmu_split_huge_pages_root()
1563 iter.level, -ENOMEM); in tdp_mmu_split_huge_pages_root()
1564 return -ENOMEM; in tdp_mmu_split_huge_pages_root()
1596 * Try to split all huge pages mapped by the TDP MMU down to the target level.
1607 for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) { in kvm_tdp_mmu_try_split_huge_pages()
1620 * from level, so it is valid to key off any shadow page to determine if in tdp_mmu_need_write_protect()
1638 !is_last_spte(iter.old_spte, iter.level)) in clear_dirty_gfn_range()
1658 * Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the
1666 lockdep_assert_held_read(&kvm->mmu_lock); in kvm_tdp_mmu_clear_dirty_slot()
1667 for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) in kvm_tdp_mmu_clear_dirty_slot()
1668 clear_dirty_gfn_range(kvm, root, slot->base_gfn, in kvm_tdp_mmu_clear_dirty_slot()
1669 slot->base_gfn + slot->npages); in kvm_tdp_mmu_clear_dirty_slot()
1679 lockdep_assert_held_write(&kvm->mmu_lock); in clear_dirty_pt_masked()
1691 if (iter.level > PG_LEVEL_4K || in clear_dirty_pt_masked()
1692 !(mask & (1UL << (iter.gfn - gfn)))) in clear_dirty_pt_masked()
1695 mask &= ~(1UL << (iter.gfn - gfn)); in clear_dirty_pt_masked()
1702 iter.level); in clear_dirty_pt_masked()
1704 trace_kvm_tdp_mmu_spte_changed(iter.as_id, iter.gfn, iter.level, in clear_dirty_pt_masked()
1713 * Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for
1724 for_each_valid_tdp_mmu_root(kvm, root, slot->as_id) in kvm_tdp_mmu_clear_dirty_pt_masked()
1732 struct kvm_mmu_page *root = spte_to_child_sp(parent->old_spte); in tdp_mmu_make_huge_spte()
1733 gfn_t start = parent->gfn; in tdp_mmu_make_huge_spte()
1734 gfn_t end = start + KVM_PAGES_PER_HPAGE(parent->level); in tdp_mmu_make_huge_spte()
1741 * returning -EAGAIN here and then failing the forward progress in tdp_mmu_make_huge_spte()
1745 return -EAGAIN; in tdp_mmu_make_huge_spte()
1747 *huge_spte = make_huge_spte(kvm, iter.old_spte, parent->level); in tdp_mmu_make_huge_spte()
1751 return -ENOENT; in tdp_mmu_make_huge_spte()
1758 gfn_t start = slot->base_gfn; in recover_huge_pages_range()
1759 gfn_t end = start + slot->npages; in recover_huge_pages_range()
1778 if (iter.level > KVM_MAX_HUGEPAGE_LEVEL || in recover_huge_pages_range()
1784 * a large page size, then its parent would have been zapped in recover_huge_pages_range()
1787 if (is_last_spte(iter.old_spte, iter.level)) in recover_huge_pages_range()
1791 * If iter.gfn resides outside of the slot, i.e. the page for in recover_huge_pages_range()
1792 * the current level overlaps but is not contained by the slot, in recover_huge_pages_range()
1794 * to query that info from slot->arch.lpage_info will cause an in recover_huge_pages_range()
1795 * out-of-bounds access. in recover_huge_pages_range()
1801 if (max_mapping_level < iter.level) in recover_huge_pages_range()
1805 if (r == -EAGAIN) in recover_huge_pages_range()
1823 * Recover huge page mappings within the slot by replacing non-leaf SPTEs with
1831 lockdep_assert_held_read(&kvm->mmu_lock); in kvm_tdp_mmu_recover_huge_pages()
1832 for_each_valid_tdp_mmu_root_yield_safe(kvm, root, slot->as_id) in kvm_tdp_mmu_recover_huge_pages()
1837 * Removes write access on the last level SPTE mapping this GFN and unsets the
1838 * MMU-writable bit to ensure future writes continue to be intercepted.
1854 !is_last_spte(iter.old_spte, iter.level)) in write_protect_gfn()
1873 * Removes write access on the last level SPTE mapping this GFN and unsets the
1874 * MMU-writable bit to ensure future writes continue to be intercepted.
1884 lockdep_assert_held_write(&kvm->mmu_lock); in kvm_tdp_mmu_write_protect_gfn()
1885 for_each_valid_tdp_mmu_root(kvm, root, slot->as_id) in kvm_tdp_mmu_write_protect_gfn()
1892 * Return the level of the lowest level SPTE added to sptes.
1893 * That SPTE may be non-present.
1900 struct kvm_mmu_page *root = root_to_sp(vcpu->arch.mmu->root.hpa); in kvm_tdp_mmu_get_walk()
1903 int leaf = -1; in kvm_tdp_mmu_get_walk()
1905 *root_level = vcpu->arch.mmu->root_role.level; in kvm_tdp_mmu_get_walk()
1907 tdp_mmu_for_each_pte(iter, vcpu->kvm, root, gfn, gfn + 1) { in kvm_tdp_mmu_get_walk()
1908 leaf = iter.level; in kvm_tdp_mmu_get_walk()
1916 * Returns the last level spte pointer of the shadow page walk for the given
1917 * gpa, and sets *spte to the spte value. This spte may be non-preset. If no
1921 * - Must be called between kvm_tdp_mmu_walk_lockless_{begin,end}.
1922 * - The returned sptep must not be used after kvm_tdp_mmu_walk_lockless_end.
1934 tdp_mmu_for_each_pte(iter, vcpu->kvm, root, gfn, gfn + 1) { in kvm_tdp_mmu_fast_pf_get_last_sptep()
1942 * legacy MMU and thus does not retain the TDP MMU-specific __rcu in kvm_tdp_mmu_fast_pf_get_last_sptep()