Lines Matching +full:page +full:- +full:level

1 // SPDX-License-Identifier: GPL-2.0
3 * Copyright (C) 2020-2023 Loongson Technology Corporation Limited
9 #include <linux/page-flags.h>
18 return slot->arch.flags & KVM_MEM_HUGEPAGE_CAPABLE; in kvm_hugepage_capable()
23 return slot->arch.flags & KVM_MEM_HUGEPAGE_INCAPABLE; in kvm_hugepage_incapable()
28 ctx->level = kvm->arch.root_level; in kvm_ptw_prepare()
30 ctx->invalid_ptes = kvm->arch.invalid_ptes; in kvm_ptw_prepare()
31 ctx->pte_shifts = kvm->arch.pte_shifts; in kvm_ptw_prepare()
32 ctx->pgtable_shift = ctx->pte_shifts[ctx->level]; in kvm_ptw_prepare()
33 ctx->invalid_entry = ctx->invalid_ptes[ctx->level]; in kvm_ptw_prepare()
34 ctx->opaque = kvm; in kvm_ptw_prepare()
39 * VM's GPA page table to allow detection of commonly used pages.
53 * GPA page table to allow dirty page tracking.
63 * may cross hugepage, for first huge page parameter addr is equal to in kvm_mkclean_pte()
64 * start, however for the second huge page addr is base address of in kvm_mkclean_pte()
65 * this huge page, rather than start or end address in kvm_mkclean_pte()
67 if ((ctx->flag & _KVM_HAS_PGMASK) && !kvm_pte_huge(val)) { in kvm_mkclean_pte()
68 offset = (addr >> PAGE_SHIFT) - ctx->gfn; in kvm_mkclean_pte()
69 if (!(BIT(offset) & ctx->mask)) in kvm_mkclean_pte()
74 * Need not split huge page now, just set write-proect pte bit in kvm_mkclean_pte()
75 * Split huge page until next write fault in kvm_mkclean_pte()
92 kvm = ctx->opaque; in kvm_flush_pte()
93 if (ctx->level) in kvm_flush_pte()
94 kvm->stat.hugepages--; in kvm_flush_pte()
96 kvm->stat.pages--; in kvm_flush_pte()
98 *pte = ctx->invalid_entry; in kvm_flush_pte()
104 * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory.
106 * Allocate a blank KVM GPA page directory (PGD) for representing guest physical
107 * to host physical page mappings.
109 * Returns: Pointer to new KVM GPA page directory.
136 p[-3] = val; in _kvm_pte_init()
137 p[-2] = val; in _kvm_pte_init()
138 p[-1] = val; in _kvm_pte_init()
143 * Caller must hold kvm->mm_lock
145 * Walk the page tables of kvm to find the PTE corresponding to the
146 * address @addr. If page tables don't exist for @addr, they will be created
151 unsigned long addr, int level) in kvm_populate_gpa() argument
157 child = kvm->arch.pgd; in kvm_populate_gpa()
158 while (ctx.level > level) { in kvm_populate_gpa()
165 _kvm_pte_init(child, ctx.invalid_ptes[ctx.level - 1]); in kvm_populate_gpa()
181 * Page walker for VM shadow mmu at last level
182 * The last level is small pte page or huge pmd page
196 next = addr + (0x1UL << ctx->pgtable_shift); in kvm_ptw_leaf()
200 ret |= ctx->ops(entry, addr, ctx); in kvm_ptw_leaf()
204 size = 0x1UL << (ctx->pgtable_shift + PAGE_SHIFT - 3); in kvm_ptw_leaf()
207 list_add_tail(list, &ctx->list); in kvm_ptw_leaf()
208 *dir = ctx->invalid_ptes[ctx->level + 1]; in kvm_ptw_leaf()
216 * Page walker for VM shadow mmu at page table dir level
235 ret |= ctx->ops(entry, addr, ctx); in kvm_ptw_dir()
240 if (ctx->level == 0) in kvm_ptw_dir()
248 size = 0x1UL << (ctx->pgtable_shift + PAGE_SHIFT - 3); in kvm_ptw_dir()
251 list_add_tail(list, &ctx->list); in kvm_ptw_dir()
252 *dir = ctx->invalid_ptes[ctx->level + 1]; in kvm_ptw_dir()
260 * Page walker for VM shadow mmu at page root table
284 * kvm_flush_range() - Flush a range of guest physical addresses.
286 * @start_gfn: Guest frame number of first page in GPA range to flush.
287 * @end_gfn: Guest frame number of last page in GPA range to flush.
290 * Flushes a range of GPA mappings from the GPA page tables.
304 spin_lock(&kvm->mmu_lock); in kvm_flush_range()
305 ret = kvm_ptw_top(kvm->arch.pgd, start_gfn << PAGE_SHIFT, in kvm_flush_range()
307 spin_unlock(&kvm->mmu_lock); in kvm_flush_range()
309 ret = kvm_ptw_top(kvm->arch.pgd, start_gfn << PAGE_SHIFT, in kvm_flush_range()
317 * free pte table page after mmu_lock in kvm_flush_range()
318 * the pte table page is linked together with ctx.list in kvm_flush_range()
327 * kvm_mkclean_gpa_pt() - Make a range of guest physical addresses clean.
329 * @start_gfn: Guest frame number of first page in GPA range to flush.
330 * @end_gfn: Guest frame number of last page in GPA range to flush.
333 * trigger dirty page logging.
335 * The caller must hold the @kvm->mmu_lock spinlock.
338 * derived mappings (GVA page tables & TLB enties) to be
348 return kvm_ptw_top(kvm->arch.pgd, start_gfn << PAGE_SHIFT, end_gfn << PAGE_SHIFT, &ctx); in kvm_mkclean_gpa_pt()
352 * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages
360 * acquire @kvm->mmu_lock.
366 gfn_t base_gfn = slot->base_gfn + gfn_offset; in kvm_arch_mmu_enable_log_dirty_pt_masked()
376 kvm_ptw_top(kvm->arch.pgd, start << PAGE_SHIFT, end << PAGE_SHIFT, &ctx); in kvm_arch_mmu_enable_log_dirty_pt_masked()
392 if ((new->base_gfn + new->npages) > (kvm->arch.gpa_size >> PAGE_SHIFT)) in kvm_arch_prepare_memory_region()
393 return -ENOMEM; in kvm_arch_prepare_memory_region()
395 new->arch.flags = 0; in kvm_arch_prepare_memory_region()
396 size = new->npages * PAGE_SIZE; in kvm_arch_prepare_memory_region()
397 gpa_start = new->base_gfn << PAGE_SHIFT; in kvm_arch_prepare_memory_region()
398 hva_start = new->userspace_addr; in kvm_arch_prepare_memory_region()
401 new->arch.flags |= KVM_MEM_HUGEPAGE_CAPABLE; in kvm_arch_prepare_memory_region()
411 * memslot->userspace_addr: in kvm_arch_prepare_memory_region()
412 * +-----+--------------------+--------------------+---+ in kvm_arch_prepare_memory_region()
413 * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| in kvm_arch_prepare_memory_region()
414 * +-----+--------------------+--------------------+---+ in kvm_arch_prepare_memory_region()
416 * memslot->base_gfn << PAGE_SIZE: in kvm_arch_prepare_memory_region()
417 * +---+--------------------+--------------------+-----+ in kvm_arch_prepare_memory_region()
418 * |abc|def Stage-2 block | Stage-2 block |tvxyz| in kvm_arch_prepare_memory_region()
419 * +---+--------------------+--------------------+-----+ in kvm_arch_prepare_memory_region()
421 * If we create those stage-2 blocks, we'll end up with this in kvm_arch_prepare_memory_region()
423 * d -> f in kvm_arch_prepare_memory_region()
424 * e -> g in kvm_arch_prepare_memory_region()
425 * f -> h in kvm_arch_prepare_memory_region()
427 gpa_offset = gpa_start & (PMD_SIZE - 1); in kvm_arch_prepare_memory_region()
428 hva_offset = hva_start & (PMD_SIZE - 1); in kvm_arch_prepare_memory_region()
430 new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE; in kvm_arch_prepare_memory_region()
435 new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE; in kvm_arch_prepare_memory_region()
448 u32 old_flags = old ? old->flags : 0; in kvm_arch_commit_memory_region()
449 u32 new_flags = new ? new->flags : 0; in kvm_arch_commit_memory_region()
456 /* Discard dirty page tracking on readonly memslot */ in kvm_arch_commit_memory_region()
461 * If dirty page logging is enabled, write protect all pages in the slot in kvm_arch_commit_memory_region()
471 * Initially-all-set does not require write protecting any page in kvm_arch_commit_memory_region()
477 spin_lock(&kvm->mmu_lock); in kvm_arch_commit_memory_region()
478 /* Write protect GPA page table entries */ in kvm_arch_commit_memory_region()
479 needs_flush = kvm_mkclean_gpa_pt(kvm, new->base_gfn, in kvm_arch_commit_memory_region()
480 new->base_gfn + new->npages); in kvm_arch_commit_memory_region()
481 spin_unlock(&kvm->mmu_lock); in kvm_arch_commit_memory_region()
489 kvm_flush_range(kvm, 0, kvm->arch.gpa_size >> PAGE_SHIFT, 0); in kvm_arch_flush_shadow_all()
498 kvm_flush_range(kvm, slot->base_gfn, slot->base_gfn + slot->npages, 1); in kvm_arch_flush_shadow_memslot()
510 return kvm_ptw_top(kvm->arch.pgd, range->start << PAGE_SHIFT, in kvm_unmap_gfn_range()
511 range->end << PAGE_SHIFT, &ctx); in kvm_unmap_gfn_range()
522 return kvm_ptw_top(kvm->arch.pgd, range->start << PAGE_SHIFT, in kvm_age_gfn()
523 range->end << PAGE_SHIFT, &ctx); in kvm_age_gfn()
528 gpa_t gpa = range->start << PAGE_SHIFT; in kvm_test_age_gfn()
538 * kvm_map_page_fast() - Fast path GPA fault handler.
544 * calling into KVM. This handles marking old pages young (for idle page
545 * tracking), and dirtying of clean pages (for dirty page logging).
549 * -EFAULT on failure due to absent GPA mapping or write to
550 * read-only page, in which case KVM must be consulted.
557 struct kvm *kvm = vcpu->kvm; in kvm_map_page_fast()
560 spin_lock(&kvm->mmu_lock); in kvm_map_page_fast()
562 /* Fast path - just check GPA page table for an existing entry */ in kvm_map_page_fast()
565 ret = -EFAULT; in kvm_map_page_fast()
573 ret = -EFAULT; in kvm_map_page_fast()
584 ret = -EFAULT; in kvm_map_page_fast()
597 spin_unlock(&kvm->mmu_lock); in kvm_map_page_fast()
604 spin_unlock(&kvm->mmu_lock); in kvm_map_page_fast()
623 start = memslot->userspace_addr; in fault_supports_huge_mapping()
624 end = start + memslot->npages * PAGE_SIZE; in fault_supports_huge_mapping()
629 * for the beginning and end of a non-block aligned and non-block sized in fault_supports_huge_mapping()
642 * Lookup the mapping level for @gfn in the current mm.
649 * - Check mmu_invalidate_retry_gfn() after grabbing the mapping level, before
653 * - Hold mmu_lock AND ensure there is no in-progress MMU notifier invalidation
657 * - Do not use the result to install new mappings, e.g. use the host mapping
658 * level only to decide whether or not to zap an entry. In this case, it's
662 * Note! The lookup can still race with modifications to host page tables, but
669 int level = 0; in host_pfn_mapping_level() local
678 * Note, using the already-retrieved memslot and __gfn_to_hva_memslot() in host_pfn_mapping_level()
681 * read-only memslots due to gfn_to_hva() assuming writes. Earlier in host_pfn_mapping_level()
682 * page fault steps have already verified the guest isn't writing a in host_pfn_mapping_level()
683 * read-only memslot. in host_pfn_mapping_level()
688 * Disable IRQs to prevent concurrent tear down of host page tables, in host_pfn_mapping_level()
689 * e.g. if the primary MMU promotes a P*D to a huge page and then frees in host_pfn_mapping_level()
690 * the original page table. in host_pfn_mapping_level()
695 * Read each entry once. As above, a non-leaf entry can be promoted to in host_pfn_mapping_level()
696 * a huge page _during_ this walk. Re-reading the entry could send the in host_pfn_mapping_level()
698 * value) and then p*d_offset() walks into the target huge page instead in host_pfn_mapping_level()
699 * of the old page table (sees the new value). in host_pfn_mapping_level()
701 pgd = pgdp_get(pgd_offset(kvm->mm, hva)); in host_pfn_mapping_level()
718 level = 1; in host_pfn_mapping_level()
722 return level; in host_pfn_mapping_level()
726 * Split huge page
732 struct kvm *kvm = vcpu->kvm; in kvm_split_huge()
735 memcache = &vcpu->arch.mmu_page_cache; in kvm_split_huge()
747 kvm->stat.hugepages--; in kvm_split_huge()
748 kvm->stat.pages += PTRS_PER_PTE; in kvm_split_huge()
750 return child + (gfn & (PTRS_PER_PTE - 1)); in kvm_split_huge()
754 * kvm_map_page() - Map a guest physical page.
762 * This takes care of marking pages young or dirty (idle/dirty page tracking),
763 * asking KVM for the corresponding PFN, and creating a mapping in the GPA page
764 * tables. Derived mappings (GVA page tables and TLBs) must be handled by the
768 * -EFAULT if there is no memory region at @gpa or a write was
769 * attempted to a read-only memory region. This is usually handled
775 int srcu_idx, err, retry_no = 0, level; in kvm_map_page() local
780 struct kvm *kvm = vcpu->kvm; in kvm_map_page()
782 struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; in kvm_map_page()
783 struct page *page; in kvm_map_page() local
786 srcu_idx = srcu_read_lock(&kvm->srcu); in kvm_map_page()
794 err = -EFAULT; in kvm_map_page()
798 /* We need a minimum of cached pages ready for page table creation */ in kvm_map_page()
808 mmu_seq = kvm->mmu_invalidate_seq; in kvm_map_page()
812 * risk the page we get a reference to getting unmapped before we have a in kvm_map_page()
817 * spin_lock() in kvm_mmu_invalidate_invalidate_<page|range_end>() before in kvm_map_page()
822 /* Slow path - ask KVM core whether we can access this GPA */ in kvm_map_page()
823 pfn = kvm_faultin_pfn(vcpu, gfn, write, &writeable, &page); in kvm_map_page()
825 err = -EFAULT; in kvm_map_page()
830 spin_lock(&kvm->mmu_lock); in kvm_map_page()
837 spin_unlock(&kvm->mmu_lock); in kvm_map_page()
838 kvm_release_page_unused(page); in kvm_map_page()
865 level = 0; in kvm_map_page()
867 /* Check page level about host mmu*/ in kvm_map_page()
868 level = host_pfn_mapping_level(kvm, gfn, memslot); in kvm_map_page()
869 if (level == 1) { in kvm_map_page()
871 * Check page level about secondary mmu in kvm_map_page()
872 * Disable hugepage if it is normal page on in kvm_map_page()
877 level = 0; in kvm_map_page()
880 if (level == 1) { in kvm_map_page()
881 gfn = gfn & ~(PTRS_PER_PTE - 1); in kvm_map_page()
882 pfn = pfn & ~(PTRS_PER_PTE - 1); in kvm_map_page()
886 /* Ensure page tables are allocated */ in kvm_map_page()
887 ptep = kvm_populate_gpa(kvm, memcache, gpa, level); in kvm_map_page()
889 if (level == 1) { in kvm_map_page()
893 * there is invalid tlb with small page in kvm_map_page()
897 ++kvm->stat.hugepages; in kvm_map_page()
901 ++kvm->stat.pages; in kvm_map_page()
904 kvm_release_faultin_page(kvm, page, false, writeable); in kvm_map_page()
905 spin_unlock(&kvm->mmu_lock); in kvm_map_page()
911 srcu_read_unlock(&kvm->srcu, srcu_idx); in kvm_map_page()
924 vcpu->arch.flush_gpa = gpa; in kvm_handle_mm_fault()