Lines Matching +full:high +full:- +full:fidelity
1 // SPDX-License-Identifier: GPL-2.0-only
9 * demand-loading started 01.12.91 - seems it is high on the list of
10 * things wanted, and it should be easy to implement. - Linus
14 * Ok, demand-loading was easy, shared pages a little bit tricker. Shared
15 * pages started 02.12.91, seems to work. - Linus.
21 * Also corrected some "invalidate()"s - I wasn't doing enough of them.
27 * 19.12.91 - works, somewhat. Sometimes I get faults, don't know why.
29 * 20.12.91 - Ok, making the swap-device changeable like the root.
33 * 05.04.94 - Multi-page memory management added for v1.1.
36 * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG
69 #include <linux/memory-tiers.h>
89 #include "pgalloc-track.h"
94 #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
110 * Return true if the original pte was a uffd-wp pte marker (so the pte was
111 * wr-protected).
115 if (!userfaultfd_wp(vmf->vma)) in vmf_orig_pte_uffd_wp()
117 if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) in vmf_orig_pte_uffd_wp()
120 return pte_marker_uffd_wp(vmf->orig_pte); in vmf_orig_pte_uffd_wp()
193 mm_dec_nr_ptes(tlb->mm); in free_pte_range()
221 if (end - 1 > ceiling - 1) in free_pmd_range()
227 mm_dec_nr_pmds(tlb->mm); in free_pmd_range()
255 if (end - 1 > ceiling - 1) in free_pud_range()
261 mm_dec_nr_puds(tlb->mm); in free_pud_range()
289 if (end - 1 > ceiling - 1) in free_p4d_range()
298 * This function frees user-level page tables of a process.
314 * Why all these "- 1"s? Because 0 represents both the bottom in free_pgd_range()
315 * of the address space and the top of it (using -1 for the in free_pgd_range()
319 * Comparisons need to use "end - 1" and "ceiling - 1" (though in free_pgd_range()
330 * bother to round floor or end up - the tests don't need that. in free_pgd_range()
344 if (end - 1 > ceiling - 1) in free_pgd_range()
345 end -= PMD_SIZE; in free_pgd_range()
346 if (addr > end - 1) in free_pgd_range()
353 pgd = pgd_offset(tlb->mm, addr); in free_pgd_range()
369 unsigned long addr = vma->vm_start; in free_pgtables()
376 next = mas_find(mas, ceiling - 1); in free_pgtables()
390 hugetlb_free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
391 floor, next ? next->vm_start : ceiling); in free_pgtables()
399 while (next && next->vm_start <= vma->vm_end + PMD_SIZE in free_pgtables()
402 next = mas_find(mas, ceiling - 1); in free_pgtables()
411 free_pgd_range(tlb, addr, vma->vm_end, in free_pgtables()
412 floor, next ? next->vm_start : ceiling); in free_pgtables()
432 * of a chain of data-dependent loads, meaning most CPUs (alpha in pmd_install()
434 * seen in-order. See the alpha page table accessors for the in pmd_install()
448 return -ENOMEM; in __pte_alloc()
460 return -ENOMEM; in __pte_alloc_kernel()
490 * is found. For example, we might have a PFN-mapped pte in
498 pgd_t *pgd = pgd_offset(vma->vm_mm, addr); in print_bad_pte()
527 mapping = vma->vm_file ? vma->vm_file->f_mapping : NULL; in print_bad_pte()
531 current->comm, in print_bad_pte()
536 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); in print_bad_pte()
538 vma->vm_file, in print_bad_pte()
539 vma->vm_ops ? vma->vm_ops->fault : NULL, in print_bad_pte()
540 vma->vm_file ? vma->vm_file->f_op->mmap : NULL, in print_bad_pte()
541 mapping ? mapping->a_ops->read_folio : NULL); in print_bad_pte()
547 * vm_normal_page -- This function gets the "struct page" associated with a pte.
567 * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
599 if (vma->vm_ops && vma->vm_ops->find_special_page) in vm_normal_page()
600 return vma->vm_ops->find_special_page(vma, addr); in vm_normal_page()
601 if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in vm_normal_page()
622 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page()
623 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page()
631 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page()
632 if (pfn == vma->vm_pgoff + off) in vm_normal_page()
634 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page()
677 if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) { in vm_normal_page_pmd()
678 if (vma->vm_flags & VM_MIXEDMAP) { in vm_normal_page_pmd()
684 off = (addr - vma->vm_start) >> PAGE_SHIFT; in vm_normal_page_pmd()
685 if (pfn == vma->vm_pgoff + off) in vm_normal_page_pmd()
687 if (!is_cow_mapping(vma->vm_flags)) in vm_normal_page_pmd()
728 pte = pte_mkold(mk_pte(page, READ_ONCE(vma->vm_page_prot))); in restore_exclusive_pte()
754 set_pte_at(vma->vm_mm, address, ptep, pte); in restore_exclusive_pte()
757 * No need to invalidate - it was non-present before. However in restore_exclusive_pte()
780 return -EBUSY; in try_restore_exclusive_pte()
794 unsigned long vm_flags = dst_vma->vm_flags; in copy_nonpresent_pte()
803 return -EIO; in copy_nonpresent_pte()
806 if (unlikely(list_empty(&dst_mm->mmlist))) { in copy_nonpresent_pte()
808 if (list_empty(&dst_mm->mmlist)) in copy_nonpresent_pte()
809 list_add(&dst_mm->mmlist, in copy_nonpresent_pte()
810 &src_mm->mmlist); in copy_nonpresent_pte()
859 * We do not preserve soft-dirty information, because so in copy_nonpresent_pte()
881 VM_BUG_ON(!is_cow_mapping(src_vma->vm_flags)); in copy_nonpresent_pte()
883 return -EBUSY; in copy_nonpresent_pte()
884 return -ENOENT; in copy_nonpresent_pte()
904 * and re-use the pte the traditional way.
906 * And if we need a pre-allocated page but don't yet have
921 return -EAGAIN; in copy_present_page()
928 if (copy_mc_user_highpage(&new_folio->page, page, addr, src_vma)) in copy_present_page()
929 return -EHWPOISON; in copy_present_page()
938 pte = mk_pte(&new_folio->page, dst_vma->vm_page_prot); in copy_present_page()
941 /* Uffd-wp needs to be delivered to dest pte as well */ in copy_present_page()
943 set_pte_at(dst_vma->vm_mm, addr, dst_pte, pte); in copy_present_page()
951 struct mm_struct *src_mm = src_vma->vm_mm; in __copy_present_ptes()
954 if (is_cow_mapping(src_vma->vm_flags) && pte_write(pte)) { in __copy_present_ptes()
960 if (src_vma->vm_flags & VM_SHARED) in __copy_present_ptes()
967 set_ptes(dst_vma->vm_mm, addr, dst_pte, pte, nr); in __copy_present_ptes()
971 * Copy one present PTE, trying to batch-process subsequent PTEs that map
974 * Returns -EAGAIN if one preallocated page is required to copy the next PTE.
1000 if (src_vma->vm_flags & VM_SHARED) in copy_present_ptes()
1012 return -EAGAIN; in copy_present_ptes()
1081 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pte_range()
1082 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pte_range()
1102 * protected by mmap_lock-less collapse skipping areas with anon_vma in copy_pte_range()
1108 ret = -ENOMEM; in copy_pte_range()
1114 * retract_page_tables() are using vma->anon_vma to be exclusive, so in copy_pte_range()
1134 * We are holding two locks at this point - either of them in copy_pte_range()
1153 if (ret == -EIO) { in copy_pte_range()
1156 } else if (ret == -EBUSY) { in copy_pte_range()
1169 WARN_ON_ONCE(ret != -ENOENT); in copy_pte_range()
1172 max_nr = (end - addr) / PAGE_SIZE; in copy_pte_range()
1176 * If we need a pre-allocated page for this pte, drop the in copy_pte_range()
1180 if (unlikely(ret == -EAGAIN || ret == -EHWPOISON)) in copy_pte_range()
1184 * pre-alloc page cannot be reused by next time so as in copy_pte_range()
1203 if (ret == -EIO) { in copy_pte_range()
1206 ret = -ENOMEM; in copy_pte_range()
1210 } else if (ret == -EBUSY || unlikely(ret == -EHWPOISON)) { in copy_pte_range()
1212 } else if (ret == -EAGAIN) { in copy_pte_range()
1215 return -ENOMEM; in copy_pte_range()
1236 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pmd_range()
1237 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pmd_range()
1243 return -ENOMEM; in copy_pmd_range()
1250 VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); in copy_pmd_range()
1253 if (err == -ENOMEM) in copy_pmd_range()
1254 return -ENOMEM; in copy_pmd_range()
1263 return -ENOMEM; in copy_pmd_range()
1273 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_pud_range()
1274 struct mm_struct *src_mm = src_vma->vm_mm; in copy_pud_range()
1280 return -ENOMEM; in copy_pud_range()
1287 VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma); in copy_pud_range()
1290 if (err == -ENOMEM) in copy_pud_range()
1291 return -ENOMEM; in copy_pud_range()
1300 return -ENOMEM; in copy_pud_range()
1310 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_p4d_range()
1316 return -ENOMEM; in copy_p4d_range()
1324 return -ENOMEM; in copy_p4d_range()
1338 * Always copy pgtables when dst_vma has uffd-wp enabled even if it's in vma_needs_copy()
1339 * file-backed (e.g. shmem). Because when uffd-wp is enabled, pgtable in vma_needs_copy()
1340 * contains uffd-wp protection information, that's something we can't in vma_needs_copy()
1346 if (src_vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) in vma_needs_copy()
1349 if (src_vma->anon_vma) in vma_needs_copy()
1365 unsigned long addr = src_vma->vm_start; in copy_page_range()
1366 unsigned long end = src_vma->vm_end; in copy_page_range()
1367 struct mm_struct *dst_mm = dst_vma->vm_mm; in copy_page_range()
1368 struct mm_struct *src_mm = src_vma->vm_mm; in copy_page_range()
1380 if (unlikely(src_vma->vm_flags & VM_PFNMAP)) { in copy_page_range()
1392 is_cow = is_cow_mapping(src_vma->vm_flags); in copy_page_range()
1406 raw_write_seqcount_begin(&src_mm->write_protect_seq); in copy_page_range()
1418 ret = -ENOMEM; in copy_page_range()
1424 raw_write_seqcount_end(&src_mm->write_protect_seq); in copy_page_range()
1427 if (ret && unlikely(src_vma->vm_flags & VM_PFNMAP)) in copy_page_range()
1436 if (!details || details->reclaim_pt) in should_zap_cows()
1440 return details->even_cows; in should_zap_cows()
1451 /* Otherwise we should only zap non-anon folios */ in should_zap_folio()
1460 return details->zap_flags & ZAP_FLAG_DROP_MARKER; in zap_drop_markers()
1464 * This function makes sure that we'll replace the none pte with an uffd-wp
1467 * Returns true if uffd-wp ptes was installed, false otherwise.
1488 if (--nr == 0) in zap_install_uffd_wp_if_needed()
1503 struct mm_struct *mm = tlb->mm; in zap_present_folio_ptes()
1507 ptent = get_and_clear_full_ptes(mm, addr, pte, nr, tlb->fullmm); in zap_present_folio_ptes()
1517 rss[mm_counter(folio)] -= nr; in zap_present_folio_ptes()
1519 /* We don't need up-to-date accessed/dirty bits. */ in zap_present_folio_ptes()
1520 clear_full_ptes(mm, addr, pte, nr, tlb->fullmm); in zap_present_folio_ptes()
1521 rss[MM_ANONPAGES] -= nr; in zap_present_folio_ptes()
1543 * Zap or skip at least one present PTE, trying to batch-process subsequent
1555 struct mm_struct *mm = tlb->mm; in zap_present_ptes()
1562 /* We don't need up-to-date accessed/dirty bits. */ in zap_present_ptes()
1563 ptep_get_and_clear_full(mm, addr, pte, tlb->fullmm); in zap_present_ptes()
1617 * consider uffd-wp bit when zap. For more information, in zap_nonpresent_ptes()
1621 rss[mm_counter(folio)]--; in zap_nonpresent_ptes()
1631 rss[MM_SWAPENTS] -= nr; in zap_nonpresent_ptes()
1638 rss[mm_counter(folio)]--; in zap_nonpresent_ptes()
1662 clear_not_present_full_ptes(vma->vm_mm, addr, pte, nr, tlb->fullmm); in zap_nonpresent_ptes()
1676 int max_nr = (end - addr) / PAGE_SIZE; in do_zap_pte_range()
1686 max_nr -= nr; in do_zap_pte_range()
1710 struct mm_struct *mm = tlb->mm; in zap_pte_range()
1808 if (next - addr != HPAGE_PMD_SIZE) in zap_pmd_range()
1815 } else if (details && details->single_folio && in zap_pmd_range()
1816 folio_test_pmd_mappable(details->single_folio) && in zap_pmd_range()
1817 next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { in zap_pmd_range()
1818 spinlock_t *ptl = pmd_lock(tlb->mm, pmd); in zap_pmd_range()
1832 pmd--; in zap_pmd_range()
1850 if (next - addr != HPAGE_PUD_SIZE) { in zap_pud_range()
1851 mmap_assert_locked(tlb->mm); in zap_pud_range()
1896 pgd = pgd_offset(vma->vm_mm, addr); in unmap_page_range()
1912 unsigned long start = max(vma->vm_start, start_addr); in unmap_single_vma()
1915 if (start >= vma->vm_end) in unmap_single_vma()
1917 end = min(vma->vm_end, end_addr); in unmap_single_vma()
1918 if (end <= vma->vm_start) in unmap_single_vma()
1921 if (vma->vm_file) in unmap_single_vma()
1924 if (unlikely(vma->vm_flags & VM_PFNMAP)) in unmap_single_vma()
1930 * It is undesirable to test vma->vm_file as it in unmap_single_vma()
1931 * should be non-null for valid hugetlb area. in unmap_single_vma()
1934 * hugetlbfs ->mmap method fails, in unmap_single_vma()
1935 * mmap_region() nullifies vma->vm_file in unmap_single_vma()
1940 if (vma->vm_file) { in unmap_single_vma()
1942 details->zap_flags : 0; in unmap_single_vma()
1952 * unmap_vmas - unmap a range of memory covered by a list of vma's
1969 * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
1980 /* Careful - we need to zap private pages too! */ in unmap_vmas()
1984 mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma->vm_mm, in unmap_vmas()
1994 vma = mas_find(mas, tree_end - 1); in unmap_vmas()
2000 * zap_page_range_single - remove user pages in a given range
2015 mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma->vm_mm, in zap_page_range_single()
2018 tlb_gather_mmu(&tlb, vma->vm_mm); in zap_page_range_single()
2019 update_hiwater_rss(vma->vm_mm); in zap_page_range_single()
2022 * unmap 'address-end' not 'range.start-range.end' as range in zap_page_range_single()
2032 * zap_vma_ptes - remove ptes mapping the vma
2046 !(vma->vm_flags & VM_PFNMAP)) in zap_vma_ptes()
2087 VM_WARN_ON_ONCE(vma->vm_flags & VM_PFNMAP); in vm_mixed_zeropage_allowed()
2094 if (mm_forbids_zeropage(vma->vm_mm)) in vm_mixed_zeropage_allowed()
2097 if (is_cow_mapping(vma->vm_flags)) in vm_mixed_zeropage_allowed()
2100 if (!(vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) in vm_mixed_zeropage_allowed()
2103 * Why not allow any VMA that has vm_ops->pfn_mkwrite? GUP could in vm_mixed_zeropage_allowed()
2104 * find the shared zeropage and longterm-pin it, which would in vm_mixed_zeropage_allowed()
2106 * page due to vma->vm_ops->pfn_mkwrite, because what's mapped would in vm_mixed_zeropage_allowed()
2111 return vma->vm_ops && vma->vm_ops->pfn_mkwrite && in vm_mixed_zeropage_allowed()
2112 (vma_is_fsdax(vma) || vma->vm_flags & VM_IO); in vm_mixed_zeropage_allowed()
2121 return -EINVAL; in validate_page_before_insert()
2124 return -EINVAL; in validate_page_before_insert()
2129 return -EINVAL; in validate_page_before_insert()
2141 return -EBUSY; in insert_page_into_pte_locked()
2148 inc_mm_counter(vma->vm_mm, mm_counter_file(folio)); in insert_page_into_pte_locked()
2151 set_pte_at(vma->vm_mm, addr, pte, pteval); in insert_page_into_pte_locked()
2165 retval = -ENOMEM; in insert_page()
2166 pte = get_locked_pte(vma->vm_mm, addr, &ptl); in insert_page()
2195 struct mm_struct *const mm = vma->vm_mm; in insert_pages()
2201 ret = -EFAULT; in insert_pages()
2207 remaining_pages_total, PTRS_PER_PTE - pte_index(addr)); in insert_pages()
2210 ret = -ENOMEM; in insert_pages()
2220 ret = -EFAULT; in insert_pages()
2229 remaining_pages_total -= pte_idx; in insert_pages()
2236 pages_to_write_in_pmd -= batch_size; in insert_pages()
2237 remaining_pages_total -= batch_size; in insert_pages()
2248 * vm_insert_pages - insert multiple pages into user vma, batching the pmd lock.
2265 const unsigned long end_addr = addr + (*num * PAGE_SIZE) - 1; in vm_insert_pages()
2267 if (addr < vma->vm_start || end_addr >= vma->vm_end) in vm_insert_pages()
2268 return -EFAULT; in vm_insert_pages()
2269 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_pages()
2270 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_pages()
2271 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_pages()
2275 return insert_pages(vma, addr, pages, num, vma->vm_page_prot); in vm_insert_pages()
2280 * vm_insert_page - insert single page into user vma
2302 * Usually this function is called from f_op->mmap() handler
2303 * under mm->mmap_lock write-lock, so it can change vma->vm_flags.
2305 * function from other places, for example from page-fault handler.
2312 if (addr < vma->vm_start || addr >= vma->vm_end) in vm_insert_page()
2313 return -EFAULT; in vm_insert_page()
2314 if (!(vma->vm_flags & VM_MIXEDMAP)) { in vm_insert_page()
2315 BUG_ON(mmap_read_trylock(vma->vm_mm)); in vm_insert_page()
2316 BUG_ON(vma->vm_flags & VM_PFNMAP); in vm_insert_page()
2319 return insert_page(vma, addr, page, vma->vm_page_prot); in vm_insert_page()
2324 * __vm_map_pages - maps range of kernel pages into user vma
2340 unsigned long uaddr = vma->vm_start; in __vm_map_pages()
2345 return -ENXIO; in __vm_map_pages()
2348 if (count > num - offset) in __vm_map_pages()
2349 return -ENXIO; in __vm_map_pages()
2362 * vm_map_pages - maps range of kernel pages starts with non zero offset
2382 return __vm_map_pages(vma, pages, num, vma->vm_pgoff); in vm_map_pages()
2387 * vm_map_pages_zero - map range of kernel pages starts with zero offset
2409 struct mm_struct *mm = vma->vm_mm; in insert_pfn()
2461 * vmf_insert_pfn_prot - insert single pfn into user vma with specified pgprot
2468 * to override pgprot on a per-page basis.
2475 * pgprot typically only differs from @vma->vm_page_prot when drivers set
2476 * caching- and encryption bits different than those of @vma->vm_page_prot,
2477 * because the caching- or encryption mode may not be known at mmap() time.
2479 * This is ok as long as @vma->vm_page_prot is not used by the core vm
2482 * functions that don't touch caching- or encryption bits, using pte_modify()
2485 * Also when new page-table entries are created, this is only done using the
2486 * fault() callback, and never using the value of vma->vm_page_prot,
2487 * except for page-table entries that point to anonymous pages as the result
2502 BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); in vmf_insert_pfn_prot()
2503 BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == in vmf_insert_pfn_prot()
2505 BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); in vmf_insert_pfn_prot()
2506 BUG_ON((vma->vm_flags & VM_MIXEDMAP) && pfn_valid(pfn)); in vmf_insert_pfn_prot()
2508 if (addr < vma->vm_start || addr >= vma->vm_end) in vmf_insert_pfn_prot()
2522 * vmf_insert_pfn - insert single pfn into user vma
2530 * This function should only be called from a vm_ops->fault handler, and
2544 return vmf_insert_pfn_prot(vma, addr, pfn, vma->vm_page_prot); in vmf_insert_pfn()
2554 if (vma->vm_flags & VM_MIXEDMAP) in vm_mixed_ok()
2568 pgprot_t pgprot = vma->vm_page_prot; in __vm_insert_mixed()
2574 if (addr < vma->vm_start || addr >= vma->vm_end) in __vm_insert_mixed()
2604 if (err == -ENOMEM) in __vm_insert_mixed()
2606 if (err < 0 && err != -EBUSY) in __vm_insert_mixed()
2633 * in null mappings (currently treated as "copy-on-access")
2645 return -ENOMEM; in remap_pte_range()
2650 err = -EACCES; in remap_pte_range()
2669 pfn -= addr >> PAGE_SHIFT; in remap_pmd_range()
2672 return -ENOMEM; in remap_pmd_range()
2692 pfn -= addr >> PAGE_SHIFT; in remap_pud_range()
2695 return -ENOMEM; in remap_pud_range()
2714 pfn -= addr >> PAGE_SHIFT; in remap_p4d_range()
2717 return -ENOMEM; in remap_p4d_range()
2734 struct mm_struct *mm = vma->vm_mm; in remap_pfn_range_internal()
2738 return -EINVAL; in remap_pfn_range_internal()
2753 * There's a horrible special case to handle copy-on-write in remap_pfn_range_internal()
2755 * un-COW'ed pages by matching them up with "vma->vm_pgoff". in remap_pfn_range_internal()
2758 if (is_cow_mapping(vma->vm_flags)) { in remap_pfn_range_internal()
2759 if (addr != vma->vm_start || end != vma->vm_end) in remap_pfn_range_internal()
2760 return -EINVAL; in remap_pfn_range_internal()
2761 vma->vm_pgoff = pfn; in remap_pfn_range_internal()
2767 pfn -= addr >> PAGE_SHIFT; in remap_pfn_range_internal()
2783 * must have pre-validated the caching bits of the pgprot_t.
2803 * remap_pfn_range - remap kernel memory to userspace
2821 return -EINVAL; in remap_pfn_range()
2831 * vm_iomap_memory - remap memory to userspace
2840 * NOTE! Some drivers might want to tweak vma->vm_page_prot first to get
2841 * whatever write-combining details or similar.
2851 return -EINVAL; in vm_iomap_memory()
2853 * You *really* shouldn't map things that aren't page-aligned, in vm_iomap_memory()
2861 return -EINVAL; in vm_iomap_memory()
2864 if (vma->vm_pgoff > pages) in vm_iomap_memory()
2865 return -EINVAL; in vm_iomap_memory()
2866 pfn += vma->vm_pgoff; in vm_iomap_memory()
2867 pages -= vma->vm_pgoff; in vm_iomap_memory()
2870 vm_len = vma->vm_end - vma->vm_start; in vm_iomap_memory()
2872 return -EINVAL; in vm_iomap_memory()
2875 return io_remap_pfn_range(vma, vma->vm_start, pfn, vm_len, vma->vm_page_prot); in vm_iomap_memory()
2893 return -ENOMEM; in apply_to_pte_range()
2899 return -EINVAL; in apply_to_pte_range()
2936 return -ENOMEM; in apply_to_pmd_range()
2945 return -EINVAL; in apply_to_pmd_range()
2972 return -ENOMEM; in apply_to_pud_range()
2981 return -EINVAL; in apply_to_pud_range()
3008 return -ENOMEM; in apply_to_p4d_range()
3017 return -EINVAL; in apply_to_p4d_range()
3043 return -EINVAL; in __apply_to_page_range()
3051 err = -EINVAL; in __apply_to_page_range()
3097 * read non-atomically. Before making any commitment, on those architectures
3108 spin_lock(vmf->ptl); in pte_unmap_same()
3109 same = pte_same(ptep_get(vmf->pte), vmf->orig_pte); in pte_unmap_same()
3110 spin_unlock(vmf->ptl); in pte_unmap_same()
3113 pte_unmap(vmf->pte); in pte_unmap_same()
3114 vmf->pte = NULL; in pte_unmap_same()
3121 * -EHWPOISON: copy failed due to hwpoison in source page
3122 * -EAGAIN: copied failed (some other reason)
3130 struct vm_area_struct *vma = vmf->vma; in __wp_page_copy_user()
3131 struct mm_struct *mm = vma->vm_mm; in __wp_page_copy_user()
3132 unsigned long addr = vmf->address; in __wp_page_copy_user()
3136 return -EHWPOISON; in __wp_page_copy_user()
3142 * a "struct page" for it. We do a best-effort copy by in __wp_page_copy_user()
3144 * fails, we just zero-fill it. Live with it. in __wp_page_copy_user()
3154 vmf->pte = NULL; in __wp_page_copy_user()
3155 if (!arch_has_hw_pte_young() && !pte_young(vmf->orig_pte)) { in __wp_page_copy_user()
3158 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in __wp_page_copy_user()
3159 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in __wp_page_copy_user()
3164 if (vmf->pte) in __wp_page_copy_user()
3165 update_mmu_tlb(vma, addr, vmf->pte); in __wp_page_copy_user()
3166 ret = -EAGAIN; in __wp_page_copy_user()
3170 entry = pte_mkyoung(vmf->orig_pte); in __wp_page_copy_user()
3171 if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) in __wp_page_copy_user()
3172 update_mmu_cache_range(vmf, vma, addr, vmf->pte, 1); in __wp_page_copy_user()
3182 if (vmf->pte) in __wp_page_copy_user()
3185 /* Re-validate under PTL if the page is still mapped */ in __wp_page_copy_user()
3186 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); in __wp_page_copy_user()
3187 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in __wp_page_copy_user()
3189 if (vmf->pte) in __wp_page_copy_user()
3190 update_mmu_tlb(vma, addr, vmf->pte); in __wp_page_copy_user()
3191 ret = -EAGAIN; in __wp_page_copy_user()
3202 * use-case in __wp_page_copy_user()
3213 if (vmf->pte) in __wp_page_copy_user()
3214 pte_unmap_unlock(vmf->pte, vmf->ptl); in __wp_page_copy_user()
3224 struct file *vm_file = vma->vm_file; in __get_fault_gfp_mask()
3227 return mapping_gfp_mask(vm_file->f_mapping) | __GFP_FS | __GFP_IO; in __get_fault_gfp_mask()
3245 unsigned int old_flags = vmf->flags; in do_page_mkwrite()
3247 vmf->flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE; in do_page_mkwrite()
3249 if (vmf->vma->vm_file && in do_page_mkwrite()
3250 IS_SWAPFILE(vmf->vma->vm_file->f_mapping->host)) in do_page_mkwrite()
3253 ret = vmf->vma->vm_ops->page_mkwrite(vmf); in do_page_mkwrite()
3255 vmf->flags = old_flags; in do_page_mkwrite()
3260 if (!folio->mapping) { in do_page_mkwrite()
3277 struct vm_area_struct *vma = vmf->vma; in fault_dirty_shared_page()
3279 struct folio *folio = page_folio(vmf->page); in fault_dirty_shared_page()
3281 bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite; in fault_dirty_shared_page()
3286 * Take a local copy of the address_space - folio.mapping may be zeroed in fault_dirty_shared_page()
3288 * pinned by vma->vm_file's reference. We rely on folio_unlock()'s in fault_dirty_shared_page()
3295 file_update_time(vma->vm_file); in fault_dirty_shared_page()
3326 * any related book-keeping.
3329 __releases(vmf->ptl) in wp_page_reuse()
3331 struct vm_area_struct *vma = vmf->vma; in wp_page_reuse()
3334 VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE)); in wp_page_reuse()
3335 VM_WARN_ON(is_zero_pfn(pte_pfn(vmf->orig_pte))); in wp_page_reuse()
3339 !PageAnonExclusive(vmf->page)); in wp_page_reuse()
3345 folio_xchg_last_cpupid(folio, (1 << LAST_CPUPID_SHIFT) - 1); in wp_page_reuse()
3348 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_reuse()
3349 entry = pte_mkyoung(vmf->orig_pte); in wp_page_reuse()
3351 if (ptep_set_access_flags(vma, vmf->address, vmf->pte, entry, 1)) in wp_page_reuse()
3352 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in wp_page_reuse()
3353 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_reuse()
3359 * vm_ops that have a ->map_pages have been audited and don't need
3364 struct vm_area_struct *vma = vmf->vma; in vmf_can_call_fault()
3366 if (vma->vm_ops->map_pages || !(vmf->flags & FAULT_FLAG_VMA_LOCK)) in vmf_can_call_fault()
3373 * __vmf_anon_prepare - Prepare to handle an anonymous fault.
3379 * only protected by the per-VMA lock, the caller must retry with the
3382 * do with only the per-VMA lock held for this VMA.
3389 struct vm_area_struct *vma = vmf->vma; in __vmf_anon_prepare()
3392 if (likely(vma->anon_vma)) in __vmf_anon_prepare()
3394 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in __vmf_anon_prepare()
3395 if (!mmap_read_trylock(vma->vm_mm)) in __vmf_anon_prepare()
3400 if (vmf->flags & FAULT_FLAG_VMA_LOCK) in __vmf_anon_prepare()
3401 mmap_read_unlock(vma->vm_mm); in __vmf_anon_prepare()
3412 * High level logic flow:
3414 * - Allocate a page, copy the content of the old page to the new one.
3415 * - Handle book keeping and accounting - cgroups, mmu-notifiers, etc.
3416 * - Take the PTL. If the pte changed, bail out and release the allocated page
3417 * - If the pte is still the way we remember it, update the page table and all
3418 * relevant references. This includes dropping the reference the page-table
3420 * - In any case, unlock the PTL and drop the reference we took to the old page.
3424 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in wp_page_copy()
3425 struct vm_area_struct *vma = vmf->vma; in wp_page_copy()
3426 struct mm_struct *mm = vma->vm_mm; in wp_page_copy()
3437 if (vmf->page) in wp_page_copy()
3438 old_folio = page_folio(vmf->page); in wp_page_copy()
3443 pfn_is_zero = is_zero_pfn(pte_pfn(vmf->orig_pte)); in wp_page_copy()
3444 new_folio = folio_prealloc(mm, vma, vmf->address, pfn_is_zero); in wp_page_copy()
3451 err = __wp_page_copy_user(&new_folio->page, vmf->page, vmf); in wp_page_copy()
3455 * it's fine. If not, userspace would re-fault on in wp_page_copy()
3458 * The -EHWPOISON case will not be retried. in wp_page_copy()
3465 return err == -EHWPOISON ? VM_FAULT_HWPOISON : 0; in wp_page_copy()
3467 kmsan_copy_page_meta(&new_folio->page, vmf->page); in wp_page_copy()
3473 vmf->address & PAGE_MASK, in wp_page_copy()
3474 (vmf->address & PAGE_MASK) + PAGE_SIZE); in wp_page_copy()
3478 * Re-check the pte - we dropped the lock in wp_page_copy()
3480 vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); in wp_page_copy()
3481 if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in wp_page_copy()
3488 ksm_might_unmap_zero_page(mm, vmf->orig_pte); in wp_page_copy()
3491 flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte)); in wp_page_copy()
3492 entry = mk_pte(&new_folio->page, vma->vm_page_prot); in wp_page_copy()
3495 if (pte_soft_dirty(vmf->orig_pte)) in wp_page_copy()
3497 if (pte_uffd_wp(vmf->orig_pte)) in wp_page_copy()
3510 ptep_clear_flush(vma, vmf->address, vmf->pte); in wp_page_copy()
3511 folio_add_new_anon_rmap(new_folio, vma, vmf->address, RMAP_EXCLUSIVE); in wp_page_copy()
3514 set_pte_at(mm, vmf->address, vmf->pte, entry); in wp_page_copy()
3515 update_mmu_cache_range(vmf, vma, vmf->address, vmf->pte, 1); in wp_page_copy()
3539 folio_remove_rmap_pte(old_folio, vmf->page, vma); in wp_page_copy()
3545 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
3546 } else if (vmf->pte) { in wp_page_copy()
3547 update_mmu_tlb(vma, vmf->address, vmf->pte); in wp_page_copy()
3548 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_copy()
3574 * finish_mkwrite_fault - finish page fault for a shared mapping, making PTE
3578 * @folio: the folio of vmf->page
3581 * shared mapping due to PTE being read-only once the mapped page is prepared.
3592 WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); in finish_mkwrite_fault()
3593 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, in finish_mkwrite_fault()
3594 &vmf->ptl); in finish_mkwrite_fault()
3595 if (!vmf->pte) in finish_mkwrite_fault()
3601 if (!pte_same(ptep_get(vmf->pte), vmf->orig_pte)) { in finish_mkwrite_fault()
3602 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in finish_mkwrite_fault()
3603 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_mkwrite_fault()
3616 struct vm_area_struct *vma = vmf->vma; in wp_pfn_shared()
3618 if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) { in wp_pfn_shared()
3621 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_pfn_shared()
3626 vmf->flags |= FAULT_FLAG_MKWRITE; in wp_pfn_shared()
3627 ret = vma->vm_ops->pfn_mkwrite(vmf); in wp_pfn_shared()
3637 __releases(vmf->ptl) in wp_page_shared()
3639 struct vm_area_struct *vma = vmf->vma; in wp_page_shared()
3644 if (vma->vm_ops && vma->vm_ops->page_mkwrite) { in wp_page_shared()
3647 pte_unmap_unlock(vmf->pte, vmf->ptl); in wp_page_shared()
3731 * shared-page counter for the old page.
3734 * done by the caller (the low-level page fault routine in most cases).
3742 * We enter with non-exclusive mmap_lock (to exclude vma changes,
3747 __releases(vmf->ptl) in do_wp_page()
3749 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in do_wp_page()
3750 struct vm_area_struct *vma = vmf->vma; in do_wp_page()
3755 if (userfaultfd_pte_wp(vma, ptep_get(vmf->pte))) { in do_wp_page()
3757 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3763 * etc.) because we're only removing the uffd-wp bit, in do_wp_page()
3766 pte = pte_clear_uffd_wp(ptep_get(vmf->pte)); in do_wp_page()
3768 set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); in do_wp_page()
3773 vmf->orig_pte = pte; in do_wp_page()
3777 * Userfaultfd write-protect can defer flushes. Ensure the TLB in do_wp_page()
3780 if (unlikely(userfaultfd_wp(vmf->vma) && in do_wp_page()
3781 mm_tlb_flush_pending(vmf->vma->vm_mm))) in do_wp_page()
3782 flush_tlb_page(vmf->vma, vmf->address); in do_wp_page()
3785 vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); in do_wp_page()
3787 if (vmf->page) in do_wp_page()
3788 folio = page_folio(vmf->page); in do_wp_page()
3794 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in do_wp_page()
3800 * Just mark the pages writable and/or call ops->pfn_mkwrite. in do_wp_page()
3802 if (!vmf->page) in do_wp_page()
3815 (PageAnonExclusive(vmf->page) || wp_can_reuse_anon_folio(folio, vma))) { in do_wp_page()
3816 if (!PageAnonExclusive(vmf->page)) in do_wp_page()
3817 SetPageAnonExclusive(vmf->page); in do_wp_page()
3819 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3831 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_wp_page()
3843 zap_page_range_single(vma, start_addr, end_addr - start_addr, details); in unmap_mapping_range_vma()
3855 vba = vma->vm_pgoff; in unmap_mapping_range_tree()
3856 vea = vba + vma_pages(vma) - 1; in unmap_mapping_range_tree()
3861 ((zba - vba) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3862 ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, in unmap_mapping_range_tree()
3868 * unmap_mapping_folio() - Unmap single folio from processes.
3880 struct address_space *mapping = folio->mapping; in unmap_mapping_folio()
3887 first_index = folio->index; in unmap_mapping_folio()
3888 last_index = folio_next_index(folio) - 1; in unmap_mapping_folio()
3895 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_folio()
3896 unmap_mapping_range_tree(&mapping->i_mmap, first_index, in unmap_mapping_folio()
3902 * unmap_mapping_pages() - Unmap pages from processes.
3918 pgoff_t last_index = start + nr - 1; in unmap_mapping_pages()
3925 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))) in unmap_mapping_pages()
3926 unmap_mapping_range_tree(&mapping->i_mmap, first_index, in unmap_mapping_pages()
3933 * unmap_mapping_range - unmap the portion of all mmaps in the specified
3953 pgoff_t hlen = ((pgoff_t)(holelen) + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3958 (holebegin + holelen + PAGE_SIZE - 1) >> PAGE_SHIFT; in unmap_mapping_range()
3960 hlen = ULONG_MAX - hba + 1; in unmap_mapping_range()
3972 struct folio *folio = page_folio(vmf->page); in remove_device_exclusive_entry()
3973 struct vm_area_struct *vma = vmf->vma; in remove_device_exclusive_entry()
3979 * the PTL so a racing thread can remove the device-exclusive in remove_device_exclusive_entry()
3982 * been re-allocated after being freed all we do is lock and in remove_device_exclusive_entry()
3994 vma->vm_mm, vmf->address & PAGE_MASK, in remove_device_exclusive_entry()
3995 (vmf->address & PAGE_MASK) + PAGE_SIZE, NULL); in remove_device_exclusive_entry()
3998 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in remove_device_exclusive_entry()
3999 &vmf->ptl); in remove_device_exclusive_entry()
4000 if (likely(vmf->pte && pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in remove_device_exclusive_entry()
4001 restore_exclusive_pte(vma, vmf->page, vmf->address, vmf->pte); in remove_device_exclusive_entry()
4003 if (vmf->pte) in remove_device_exclusive_entry()
4004 pte_unmap_unlock(vmf->pte, vmf->ptl); in remove_device_exclusive_entry()
4018 if (mem_cgroup_swap_full(folio) || (vma->vm_flags & VM_LOCKED) || in should_try_to_free_swap()
4033 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in pte_marker_clear()
4034 vmf->address, &vmf->ptl); in pte_marker_clear()
4035 if (!vmf->pte) in pte_marker_clear()
4038 * Be careful so that we will only recover a special uffd-wp pte into a in pte_marker_clear()
4045 if (pte_same(vmf->orig_pte, ptep_get(vmf->pte))) in pte_marker_clear()
4046 pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte); in pte_marker_clear()
4047 pte_unmap_unlock(vmf->pte, vmf->ptl); in pte_marker_clear()
4053 if (vma_is_anonymous(vmf->vma)) in do_pte_missing()
4060 * This is actually a page-missing access, but with uffd-wp special pte
4061 * installed. It means this pte was wr-protected before being unmapped.
4067 * got unregistered - we can simply clear them. in pte_marker_handle_uffd_wp()
4069 if (unlikely(!userfaultfd_wp(vmf->vma))) in pte_marker_handle_uffd_wp()
4077 swp_entry_t entry = pte_to_swp_entry(vmf->orig_pte); in handle_pte_marker()
4087 /* Higher priority than uffd-wp when data corrupted */ in handle_pte_marker()
4104 struct vm_area_struct *vma = vmf->vma; in __alloc_swap_folio()
4108 folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address); in __alloc_swap_folio()
4112 entry = pte_to_swp_entry(vmf->orig_pte); in __alloc_swap_folio()
4113 if (mem_cgroup_swapin_charge_folio(folio, vma->vm_mm, in __alloc_swap_folio()
4136 if ((si->swap_map[offset + i] & SWAP_HAS_CACHE)) in non_swapcache_batch()
4154 addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE); in can_swapin_thp()
4155 idx = (vmf->address - addr) / PAGE_SIZE; in can_swapin_thp()
4158 if (!pte_same(pte, pte_move_swp_offset(vmf->orig_pte, -idx))) in can_swapin_thp()
4202 struct vm_area_struct *vma = vmf->vma; in alloc_swap_folio()
4213 * If uffd is active for the vma we need per-page fault fidelity to in alloc_swap_folio()
4221 * lack handling for such cases, so fallback to swapping in order-0 in alloc_swap_folio()
4227 entry = pte_to_swp_entry(vmf->orig_pte); in alloc_swap_folio()
4232 orders = thp_vma_allowable_orders(vma, vma->vm_flags, in alloc_swap_folio()
4233 TVA_IN_PF | TVA_ENFORCE_SYSFS, BIT(PMD_ORDER) - 1); in alloc_swap_folio()
4234 orders = thp_vma_suitable_orders(vma, vmf->address, orders); in alloc_swap_folio()
4236 vmf->address, orders); in alloc_swap_folio()
4241 pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in alloc_swap_folio()
4242 vmf->address & PMD_MASK, &ptl); in alloc_swap_folio()
4252 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_swap_folio()
4263 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_swap_folio()
4266 if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm, in alloc_swap_folio()
4289 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4298 struct vm_area_struct *vma = vmf->vma; in do_swap_page()
4318 entry = pte_to_swp_entry(vmf->orig_pte); in do_swap_page()
4321 migration_entry_wait(vma->vm_mm, vmf->pmd, in do_swap_page()
4322 vmf->address); in do_swap_page()
4324 vmf->page = pfn_swap_entry_to_page(entry); in do_swap_page()
4327 if (vmf->flags & FAULT_FLAG_VMA_LOCK) { in do_swap_page()
4337 vmf->page = pfn_swap_entry_to_page(entry); in do_swap_page()
4338 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
4339 vmf->address, &vmf->ptl); in do_swap_page()
4340 if (unlikely(!vmf->pte || in do_swap_page()
4341 !pte_same(ptep_get(vmf->pte), in do_swap_page()
4342 vmf->orig_pte))) in do_swap_page()
4349 get_page(vmf->page); in do_swap_page()
4350 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4351 ret = vmf->page->pgmap->ops->migrate_to_ram(vmf); in do_swap_page()
4352 put_page(vmf->page); in do_swap_page()
4358 print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); in do_swap_page()
4369 folio = swap_cache_get_folio(entry, vma, vmf->address); in do_swap_page()
4375 if (data_race(si->flags & SWP_SYNCHRONOUS_IO) && in do_swap_page()
4415 folio->swap = entry; in do_swap_page()
4417 folio->private = NULL; in do_swap_page()
4430 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_swap_page()
4431 vmf->address, &vmf->ptl); in do_swap_page()
4432 if (likely(vmf->pte && in do_swap_page()
4433 pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in do_swap_page()
4441 count_memcg_event_mm(vma->vm_mm, PGMAJFAULT); in do_swap_page()
4470 * page->index of !PageKSM() pages would be nonlinear inside the in do_swap_page()
4471 * anon VMA -- PageKSM() is lost on actual swapout. in do_swap_page()
4473 folio = ksm_might_need_to_copy(folio, vma, vmf->address); in do_swap_page()
4478 } else if (unlikely(folio == ERR_PTR(-EHWPOISON))) { in do_swap_page()
4492 if ((vmf->flags & FAULT_FLAG_WRITE) && folio == swapcache && in do_swap_page()
4502 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, in do_swap_page()
4503 &vmf->ptl); in do_swap_page()
4504 if (unlikely(!vmf->pte || !pte_same(ptep_get(vmf->pte), vmf->orig_pte))) in do_swap_page()
4515 unsigned long folio_start = ALIGN_DOWN(vmf->address, nr * PAGE_SIZE); in do_swap_page()
4516 unsigned long idx = (vmf->address - folio_start) / PAGE_SIZE; in do_swap_page()
4517 pte_t *folio_ptep = vmf->pte - idx; in do_swap_page()
4520 if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) || in do_swap_page()
4532 address = vmf->address; in do_swap_page()
4533 ptep = vmf->pte; in do_swap_page()
4537 unsigned long folio_start = address - idx * PAGE_SIZE; in do_swap_page()
4542 if (unlikely(folio_start < max(address & PMD_MASK, vma->vm_start))) in do_swap_page()
4544 if (unlikely(folio_end > pmd_addr_end(address, vma->vm_end))) in do_swap_page()
4547 folio_ptep = vmf->pte - idx; in do_swap_page()
4549 if (!pte_same(folio_pte, pte_move_swp_offset(vmf->orig_pte, -idx)) || in do_swap_page()
4557 entry = folio->swap; in do_swap_page()
4558 page = &folio->page; in do_swap_page()
4578 exclusive = pte_swp_exclusive(vmf->orig_pte); in do_swap_page()
4582 * swapcache -> certainly exclusive. in do_swap_page()
4586 data_race(si->flags & SWP_STABLE_WRITES)) { in do_swap_page()
4622 if (should_try_to_free_swap(folio, vma, vmf->flags)) in do_swap_page()
4625 add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages); in do_swap_page()
4626 add_mm_counter(vma->vm_mm, MM_SWAPENTS, -nr_pages); in do_swap_page()
4627 pte = mk_pte(page, vma->vm_page_prot); in do_swap_page()
4628 if (pte_swp_soft_dirty(vmf->orig_pte)) in do_swap_page()
4630 if (pte_swp_uffd_wp(vmf->orig_pte)) in do_swap_page()
4641 if ((vma->vm_flags & VM_WRITE) && !userfaultfd_pte_wp(vma, pte) && in do_swap_page()
4644 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
4646 vmf->flags &= ~FAULT_FLAG_WRITE; in do_swap_page()
4651 folio_ref_add(folio, nr_pages - 1); in do_swap_page()
4653 vmf->orig_pte = pte_advance_pfn(pte, page_idx); in do_swap_page()
4676 set_ptes(vma->vm_mm, address, ptep, pte, nr_pages); in do_swap_page()
4677 arch_do_swap_page_nr(vma->vm_mm, vma, address, in do_swap_page()
4694 if (vmf->flags & FAULT_FLAG_WRITE) { in do_swap_page()
4701 /* No need to invalidate - it was non-present before */ in do_swap_page()
4704 if (vmf->pte) in do_swap_page()
4705 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4717 if (vmf->pte) in do_swap_page()
4718 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_swap_page()
4751 struct vm_area_struct *vma = vmf->vma; in alloc_anon_folio()
4761 * If uffd is active for the vma we need per-page fault fidelity to in alloc_anon_folio()
4772 orders = thp_vma_allowable_orders(vma, vma->vm_flags, in alloc_anon_folio()
4773 TVA_IN_PF | TVA_ENFORCE_SYSFS, BIT(PMD_ORDER) - 1); in alloc_anon_folio()
4774 orders = thp_vma_suitable_orders(vma, vmf->address, orders); in alloc_anon_folio()
4779 pte = pte_offset_map(vmf->pmd, vmf->address & PMD_MASK); in alloc_anon_folio()
4781 return ERR_PTR(-EAGAIN); in alloc_anon_folio()
4790 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_anon_folio()
4804 addr = ALIGN_DOWN(vmf->address, PAGE_SIZE << order); in alloc_anon_folio()
4807 if (mem_cgroup_charge(folio, vma->vm_mm, gfp)) { in alloc_anon_folio()
4821 folio_zero_user(folio, vmf->address); in alloc_anon_folio()
4831 return folio_prealloc(vma->vm_mm, vma, vmf->address, true); in alloc_anon_folio()
4835 * We enter with non-exclusive mmap_lock (to exclude vma changes,
4841 struct vm_area_struct *vma = vmf->vma; in do_anonymous_page()
4842 unsigned long addr = vmf->address; in do_anonymous_page()
4848 /* File mapping without ->vm_ops ? */ in do_anonymous_page()
4849 if (vma->vm_flags & VM_SHARED) in do_anonymous_page()
4856 if (pte_alloc(vma->vm_mm, vmf->pmd)) in do_anonymous_page()
4859 /* Use the zero-page for reads */ in do_anonymous_page()
4860 if (!(vmf->flags & FAULT_FLAG_WRITE) && in do_anonymous_page()
4861 !mm_forbids_zeropage(vma->vm_mm)) { in do_anonymous_page()
4862 entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), in do_anonymous_page()
4863 vma->vm_page_prot)); in do_anonymous_page()
4864 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_anonymous_page()
4865 vmf->address, &vmf->ptl); in do_anonymous_page()
4866 if (!vmf->pte) in do_anonymous_page()
4869 update_mmu_tlb(vma, vmf->address, vmf->pte); in do_anonymous_page()
4872 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
4877 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4887 /* Returns NULL on OOM or ERR_PTR(-EAGAIN) if we must retry the fault */ in do_anonymous_page()
4895 addr = ALIGN_DOWN(vmf->address, nr_pages * PAGE_SIZE); in do_anonymous_page()
4904 entry = mk_pte(&folio->page, vma->vm_page_prot); in do_anonymous_page()
4906 if (vma->vm_flags & VM_WRITE) in do_anonymous_page()
4909 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, addr, &vmf->ptl); in do_anonymous_page()
4910 if (!vmf->pte) in do_anonymous_page()
4913 update_mmu_tlb(vma, addr, vmf->pte); in do_anonymous_page()
4915 } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) { in do_anonymous_page()
4916 update_mmu_tlb_range(vma, addr, vmf->pte, nr_pages); in do_anonymous_page()
4920 ret = check_stable_address_space(vma->vm_mm); in do_anonymous_page()
4926 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4931 folio_ref_add(folio, nr_pages - 1); in do_anonymous_page()
4932 add_mm_counter(vma->vm_mm, MM_ANONPAGES, nr_pages); in do_anonymous_page()
4939 set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr_pages); in do_anonymous_page()
4941 /* No need to invalidate - it was non-present before */ in do_anonymous_page()
4942 update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr_pages); in do_anonymous_page()
4944 if (vmf->pte) in do_anonymous_page()
4945 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_anonymous_page()
4956 * released depending on flags and vma->vm_ops->fault() return value.
4961 struct vm_area_struct *vma = vmf->vma; in __do_fault()
4980 if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { in __do_fault()
4981 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in __do_fault()
4982 if (!vmf->prealloc_pte) in __do_fault()
4986 ret = vma->vm_ops->fault(vmf); in __do_fault()
4991 folio = page_folio(vmf->page); in __do_fault()
4992 if (unlikely(PageHWPoison(vmf->page))) { in __do_fault()
4995 if (page_mapped(vmf->page)) in __do_fault()
4998 if (mapping_evict_folio(folio->mapping, folio)) in __do_fault()
5003 vmf->page = NULL; in __do_fault()
5010 VM_BUG_ON_PAGE(!folio_test_locked(folio), vmf->page); in __do_fault()
5018 struct vm_area_struct *vma = vmf->vma; in deposit_prealloc_pte()
5020 pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, vmf->prealloc_pte); in deposit_prealloc_pte()
5025 mm_inc_nr_ptes(vma->vm_mm); in deposit_prealloc_pte()
5026 vmf->prealloc_pte = NULL; in deposit_prealloc_pte()
5032 struct vm_area_struct *vma = vmf->vma; in do_set_pmd()
5033 bool write = vmf->flags & FAULT_FLAG_WRITE; in do_set_pmd()
5034 unsigned long haddr = vmf->address & HPAGE_PMD_MASK; in do_set_pmd()
5041 * PMD mappings, but PTE-mapped THP are fine. So let's simply refuse any in do_set_pmd()
5044 if (thp_disabled_by_hw() || vma_thp_disabled(vma, vma->vm_flags)) in do_set_pmd()
5052 page = &folio->page; in do_set_pmd()
5067 if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) { in do_set_pmd()
5068 vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); in do_set_pmd()
5069 if (!vmf->prealloc_pte) in do_set_pmd()
5073 vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); in do_set_pmd()
5074 if (unlikely(!pmd_none(*vmf->pmd))) in do_set_pmd()
5079 entry = mk_huge_pmd(page, vma->vm_page_prot); in do_set_pmd()
5083 add_mm_counter(vma->vm_mm, mm_counter_file(folio), HPAGE_PMD_NR); in do_set_pmd()
5092 set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry); in do_set_pmd()
5094 update_mmu_cache_pmd(vma, haddr, vmf->pmd); in do_set_pmd()
5100 spin_unlock(vmf->ptl); in do_set_pmd()
5111 * set_pte_range - Set a range of PTEs to point to pages in a folio.
5121 struct vm_area_struct *vma = vmf->vma; in set_pte_range()
5122 bool write = vmf->flags & FAULT_FLAG_WRITE; in set_pte_range()
5123 bool prefault = !in_range(vmf->address, addr, nr * PAGE_SIZE); in set_pte_range()
5127 entry = mk_pte(page, vma->vm_page_prot); in set_pte_range()
5138 /* copy-on-write page */ in set_pte_range()
5139 if (write && !(vma->vm_flags & VM_SHARED)) { in set_pte_range()
5146 set_ptes(vma->vm_mm, addr, vmf->pte, entry, nr); in set_pte_range()
5148 /* no need to invalidate: a not-present page won't be cached */ in set_pte_range()
5149 update_mmu_cache_range(vmf, vma, addr, vmf->pte, nr); in set_pte_range()
5154 if (vmf->flags & FAULT_FLAG_ORIG_PTE_VALID) in vmf_pte_changed()
5155 return !pte_same(ptep_get(vmf->pte), vmf->orig_pte); in vmf_pte_changed()
5157 return !pte_none(ptep_get(vmf->pte)); in vmf_pte_changed()
5161 * finish_fault - finish page fault once we have prepared the page to fault
5177 struct vm_area_struct *vma = vmf->vma; in finish_fault()
5181 bool is_cow = (vmf->flags & FAULT_FLAG_WRITE) && in finish_fault()
5182 !(vma->vm_flags & VM_SHARED); in finish_fault()
5188 addr = vmf->address; in finish_fault()
5192 page = vmf->cow_page; in finish_fault()
5194 page = vmf->page; in finish_fault()
5200 if (!(vma->vm_flags & VM_SHARED)) { in finish_fault()
5201 ret = check_stable_address_space(vma->vm_mm); in finish_fault()
5206 if (pmd_none(*vmf->pmd)) { in finish_fault()
5213 if (vmf->prealloc_pte) in finish_fault()
5214 pmd_install(vma->vm_mm, vmf->pmd, &vmf->prealloc_pte); in finish_fault()
5215 else if (unlikely(pte_alloc(vma->vm_mm, vmf->pmd))) in finish_fault()
5223 * Using per-page fault to maintain the uffd semantics, and same in finish_fault()
5224 * approach also applies to non-anonymous-shmem faults to avoid in finish_fault()
5232 /* The page offset of vmf->address within the VMA. */ in finish_fault()
5233 pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff; in finish_fault()
5235 pgoff_t pte_off = pte_index(vmf->address); in finish_fault()
5238 * Fallback to per-page fault in case the folio size in page in finish_fault()
5242 vma_off + (nr_pages - idx) > vma_pages(vma) || in finish_fault()
5244 pte_off + (nr_pages - idx) > PTRS_PER_PTE)) { in finish_fault()
5248 addr = vmf->address - idx * PAGE_SIZE; in finish_fault()
5249 page = &folio->page; in finish_fault()
5253 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in finish_fault()
5254 addr, &vmf->ptl); in finish_fault()
5255 if (!vmf->pte) in finish_fault()
5258 /* Re-check under ptl */ in finish_fault()
5260 update_mmu_tlb(vma, addr, vmf->pte); in finish_fault()
5263 } else if (nr_pages > 1 && !pte_range_none(vmf->pte, nr_pages)) { in finish_fault()
5265 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_fault()
5269 folio_ref_add(folio, nr_pages - 1); in finish_fault()
5272 add_mm_counter(vma->vm_mm, type, nr_pages); in finish_fault()
5276 pte_unmap_unlock(vmf->pte, vmf->ptl); in finish_fault()
5297 return -EINVAL; in fault_around_bytes_set()
5300 * The minimum value is 1 page, however this results in no fault-around in fault_around_bytes_set()
5325 * It uses vm_ops->map_pages() to map the pages, which skips the page if it's
5326 * not ready to be mapped: not up-to-date, locked, etc.
5343 pgoff_t pte_off = pte_index(vmf->address); in do_fault_around()
5344 /* The page offset of vmf->address within the VMA. */ in do_fault_around()
5345 pgoff_t vma_off = vmf->pgoff - vmf->vma->vm_pgoff; in do_fault_around()
5351 pte_off - min(pte_off, vma_off)); in do_fault_around()
5355 pte_off + vma_pages(vmf->vma) - vma_off) - 1; in do_fault_around()
5357 if (pmd_none(*vmf->pmd)) { in do_fault_around()
5358 vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); in do_fault_around()
5359 if (!vmf->prealloc_pte) in do_fault_around()
5364 ret = vmf->vma->vm_ops->map_pages(vmf, in do_fault_around()
5365 vmf->pgoff + from_pte - pte_off, in do_fault_around()
5366 vmf->pgoff + to_pte - pte_off); in do_fault_around()
5372 /* Return true if we should do read fault-around, false otherwise */
5375 /* No ->map_pages? No way to fault around... */ in should_fault_around()
5376 if (!vmf->vma->vm_ops->map_pages) in should_fault_around()
5379 if (uffd_disable_fault_around(vmf->vma)) in should_fault_around()
5392 * Let's call ->map_pages() first and use ->fault() as fallback in do_read_fault()
5411 folio = page_folio(vmf->page); in do_read_fault()
5420 struct vm_area_struct *vma = vmf->vma; in do_cow_fault()
5430 folio = folio_prealloc(vma->vm_mm, vma, vmf->address, false); in do_cow_fault()
5434 vmf->cow_page = &folio->page; in do_cow_fault()
5442 if (copy_mc_user_highpage(vmf->cow_page, vmf->page, vmf->address, vma)) { in do_cow_fault()
5450 unlock_page(vmf->page); in do_cow_fault()
5451 put_page(vmf->page); in do_cow_fault()
5462 struct vm_area_struct *vma = vmf->vma; in do_shared_fault()
5474 folio = page_folio(vmf->page); in do_shared_fault()
5480 if (vma->vm_ops->page_mkwrite) { in do_shared_fault()
5503 * We enter with non-exclusive mmap_lock (to exclude vma changes,
5512 struct vm_area_struct *vma = vmf->vma; in do_fault()
5513 struct mm_struct *vm_mm = vma->vm_mm; in do_fault()
5519 if (!vma->vm_ops->fault) { in do_fault()
5520 vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, in do_fault()
5521 vmf->address, &vmf->ptl); in do_fault()
5522 if (unlikely(!vmf->pte)) in do_fault()
5532 if (unlikely(pte_none(ptep_get(vmf->pte)))) in do_fault()
5537 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_fault()
5539 } else if (!(vmf->flags & FAULT_FLAG_WRITE)) in do_fault()
5541 else if (!(vma->vm_flags & VM_SHARED)) in do_fault()
5547 if (vmf->prealloc_pte) { in do_fault()
5548 pte_free(vm_mm, vmf->prealloc_pte); in do_fault()
5549 vmf->prealloc_pte = NULL; in do_fault()
5558 struct vm_area_struct *vma = vmf->vma; in numa_migrate_check()
5575 if (folio_likely_mapped_shared(folio) && (vma->vm_flags & VM_SHARED)) in numa_migrate_check()
5582 *last_cpupid = (-1 & LAST_CPUPID_MASK); in numa_migrate_check()
5608 pte = pte_modify(old_pte, vma->vm_page_prot); in numa_rebuild_single_mapping()
5620 int nr = pte_pfn(fault_pte) - folio_pfn(folio); in numa_rebuild_large_mapping()
5621 unsigned long start, end, addr = vmf->address; in numa_rebuild_large_mapping()
5622 unsigned long addr_start = addr - (nr << PAGE_SHIFT); in numa_rebuild_large_mapping()
5627 start = max3(addr_start, pt_start, vma->vm_start); in numa_rebuild_large_mapping()
5629 vma->vm_end); in numa_rebuild_large_mapping()
5630 start_ptep = vmf->pte - ((addr - start) >> PAGE_SHIFT); in numa_rebuild_large_mapping()
5644 ptent = pte_modify(ptent, vma->vm_page_prot); in numa_rebuild_large_mapping()
5657 struct vm_area_struct *vma = vmf->vma; in do_numa_page()
5671 spin_lock(vmf->ptl); in do_numa_page()
5673 old_pte = ptep_get(vmf->pte); in do_numa_page()
5675 if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { in do_numa_page()
5676 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
5680 pte = pte_modify(old_pte, vma->vm_page_prot); in do_numa_page()
5688 can_change_pte_writable(vma, vmf->address, pte)) in do_numa_page()
5691 folio = vm_normal_folio(vma, vmf->address, pte); in do_numa_page()
5698 target_nid = numa_migrate_check(folio, vmf, vmf->address, &flags, in do_numa_page()
5707 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
5720 vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, in do_numa_page()
5721 vmf->address, &vmf->ptl); in do_numa_page()
5722 if (unlikely(!vmf->pte)) in do_numa_page()
5724 if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { in do_numa_page()
5725 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
5731 * non-accessible ptes, some can allow access by kernel mode. in do_numa_page()
5737 numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, in do_numa_page()
5739 pte_unmap_unlock(vmf->pte, vmf->ptl); in do_numa_page()
5748 struct vm_area_struct *vma = vmf->vma; in create_huge_pmd()
5751 if (vma->vm_ops->huge_fault) in create_huge_pmd()
5752 return vma->vm_ops->huge_fault(vmf, PMD_ORDER); in create_huge_pmd()
5759 struct vm_area_struct *vma = vmf->vma; in wp_huge_pmd()
5760 const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE; in wp_huge_pmd()
5765 userfaultfd_huge_pmd_wp(vma, vmf->orig_pmd)) { in wp_huge_pmd()
5766 if (userfaultfd_wp_async(vmf->vma)) in wp_huge_pmd()
5773 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in wp_huge_pmd()
5774 if (vma->vm_ops->huge_fault) { in wp_huge_pmd()
5775 ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER); in wp_huge_pmd()
5782 /* COW or write-notify handled on pte level: split pmd. */ in wp_huge_pmd()
5783 __split_huge_pmd(vma, vmf->pmd, vmf->address, false, NULL); in wp_huge_pmd()
5792 struct vm_area_struct *vma = vmf->vma; in create_huge_pud()
5796 if (vma->vm_ops->huge_fault) in create_huge_pud()
5797 return vma->vm_ops->huge_fault(vmf, PUD_ORDER); in create_huge_pud()
5806 struct vm_area_struct *vma = vmf->vma; in wp_huge_pud()
5812 if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) { in wp_huge_pud()
5813 if (vma->vm_ops->huge_fault) { in wp_huge_pud()
5814 ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER); in wp_huge_pud()
5820 /* COW or write-notify not handled on PUD level: split pud.*/ in wp_huge_pud()
5821 __split_huge_pud(vma, vmf->pud, vmf->address); in wp_huge_pud()
5835 * We enter with non-exclusive mmap_lock (to exclude vma changes, but allow
5845 if (unlikely(pmd_none(*vmf->pmd))) { in handle_pte_fault()
5847 * Leave __pte_alloc() until later: because vm_ops->fault may in handle_pte_fault()
5852 vmf->pte = NULL; in handle_pte_fault()
5853 vmf->flags &= ~FAULT_FLAG_ORIG_PTE_VALID; in handle_pte_fault()
5863 * Use the maywrite version to indicate that vmf->pte may be in handle_pte_fault()
5870 vmf->pte = pte_offset_map_rw_nolock(vmf->vma->vm_mm, vmf->pmd, in handle_pte_fault()
5871 vmf->address, &dummy_pmdval, in handle_pte_fault()
5872 &vmf->ptl); in handle_pte_fault()
5873 if (unlikely(!vmf->pte)) in handle_pte_fault()
5875 vmf->orig_pte = ptep_get_lockless(vmf->pte); in handle_pte_fault()
5876 vmf->flags |= FAULT_FLAG_ORIG_PTE_VALID; in handle_pte_fault()
5878 if (pte_none(vmf->orig_pte)) { in handle_pte_fault()
5879 pte_unmap(vmf->pte); in handle_pte_fault()
5880 vmf->pte = NULL; in handle_pte_fault()
5884 if (!vmf->pte) in handle_pte_fault()
5887 if (!pte_present(vmf->orig_pte)) in handle_pte_fault()
5890 if (pte_protnone(vmf->orig_pte) && vma_is_accessible(vmf->vma)) in handle_pte_fault()
5893 spin_lock(vmf->ptl); in handle_pte_fault()
5894 entry = vmf->orig_pte; in handle_pte_fault()
5895 if (unlikely(!pte_same(ptep_get(vmf->pte), entry))) { in handle_pte_fault()
5896 update_mmu_tlb(vmf->vma, vmf->address, vmf->pte); in handle_pte_fault()
5899 if (vmf->flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) { in handle_pte_fault()
5902 else if (likely(vmf->flags & FAULT_FLAG_WRITE)) in handle_pte_fault()
5906 if (ptep_set_access_flags(vmf->vma, vmf->address, vmf->pte, entry, in handle_pte_fault()
5907 vmf->flags & FAULT_FLAG_WRITE)) { in handle_pte_fault()
5908 update_mmu_cache_range(vmf, vmf->vma, vmf->address, in handle_pte_fault()
5909 vmf->pte, 1); in handle_pte_fault()
5912 if (vmf->flags & FAULT_FLAG_TRIED) in handle_pte_fault()
5920 if (vmf->flags & FAULT_FLAG_WRITE) in handle_pte_fault()
5921 flush_tlb_fix_spurious_fault(vmf->vma, vmf->address, in handle_pte_fault()
5922 vmf->pte); in handle_pte_fault()
5925 pte_unmap_unlock(vmf->pte, vmf->ptl); in handle_pte_fault()
5946 struct mm_struct *mm = vma->vm_mm; in __handle_mm_fault()
5947 unsigned long vm_flags = vma->vm_flags; in __handle_mm_fault()
6032 * mm_account_fault - Do page fault accounting
6035 * of perf event counters, but we'll still do the per-task accounting to
6044 * still be in per-arch page fault handlers at the entry of page fault.
6081 current->maj_flt++; in mm_account_fault()
6083 current->min_flt++; in mm_account_fault()
6103 current->in_lru_fault = vma_has_recency(vma); in lru_gen_enter_fault()
6108 current->in_lru_fault = false; in lru_gen_exit_fault()
6128 * just treat it like an ordinary read-fault otherwise. in sanitize_fault_flags()
6130 if (!is_cow_mapping(vma->vm_flags)) in sanitize_fault_flags()
6133 /* Write faults on read-only mappings are impossible ... */ in sanitize_fault_flags()
6134 if (WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE))) in sanitize_fault_flags()
6137 if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE) && in sanitize_fault_flags()
6138 !is_cow_mapping(vma->vm_flags))) in sanitize_fault_flags()
6143 * Per-VMA locks can't be used with FAULT_FLAG_RETRY_NOWAIT because of in sanitize_fault_flags()
6166 struct mm_struct *mm = vma->vm_mm; in handle_mm_fault()
6183 is_droppable = !!(vma->vm_flags & VM_DROPPABLE); in handle_mm_fault()
6195 ret = hugetlb_fault(vma->vm_mm, vma, address, flags); in handle_mm_fault()
6200 * Warning: It is no longer safe to dereference vma-> after this point, in handle_mm_fault()
6253 * from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but in mmap_upgrade_trylock()
6298 if (likely(vma && (vma->vm_start <= addr))) in lock_mm_and_find_vma()
6305 if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) { in lock_mm_and_find_vma()
6316 * re-take it, and also look up the vma again, in lock_mm_and_find_vma()
6317 * re-checking it. in lock_mm_and_find_vma()
6326 if (vma->vm_start <= addr) in lock_mm_and_find_vma()
6328 if (!(vma->vm_flags & VM_GROWSDOWN)) in lock_mm_and_find_vma()
6354 MA_STATE(mas, &mm->mm_mt, address, address); in lock_vma_under_rcu()
6367 if (vma->detached) { in lock_vma_under_rcu()
6381 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) in lock_vma_under_rcu()
6399 * We've already handled the fast-path in-line.
6405 return -ENOMEM; in __p4d_alloc()
6407 spin_lock(&mm->page_table_lock); in __p4d_alloc()
6414 spin_unlock(&mm->page_table_lock); in __p4d_alloc()
6422 * We've already handled the fast-path in-line.
6428 return -ENOMEM; in __pud_alloc()
6430 spin_lock(&mm->page_table_lock); in __pud_alloc()
6437 spin_unlock(&mm->page_table_lock); in __pud_alloc()
6445 * We've already handled the fast-path in-line.
6452 return -ENOMEM; in __pmd_alloc()
6473 args->lock = lock; in pfnmap_args_setup()
6474 args->ptep = ptep; in pfnmap_args_setup()
6475 args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT); in pfnmap_args_setup()
6476 args->pgprot = pgprot; in pfnmap_args_setup()
6477 args->writable = writable; in pfnmap_args_setup()
6478 args->special = special; in pfnmap_args_setup()
6484 struct file *file = vma->vm_file; in pfnmap_lockdep_assert()
6485 struct address_space *mapping = file ? file->f_mapping : NULL; in pfnmap_lockdep_assert()
6488 lockdep_assert(lockdep_is_held(&mapping->i_mmap_rwsem) || in pfnmap_lockdep_assert()
6489 lockdep_is_held(&vma->vm_mm->mmap_lock)); in pfnmap_lockdep_assert()
6491 lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock)); in pfnmap_lockdep_assert()
6496 * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
6499 * The caller needs to setup args->vma and args->address to point to the
6516 * a later point in time can trigger use-after-free.
6528 struct vm_area_struct *vma = args->vma; in follow_pfnmap_start()
6529 unsigned long address = args->address; in follow_pfnmap_start()
6530 struct mm_struct *mm = vma->vm_mm; in follow_pfnmap_start()
6540 if (unlikely(address < vma->vm_start || address >= vma->vm_end)) in follow_pfnmap_start()
6543 if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) in follow_pfnmap_start()
6598 return -EINVAL; in follow_pfnmap_start()
6611 if (args->lock) in follow_pfnmap_end()
6612 spin_unlock(args->lock); in follow_pfnmap_end()
6613 if (args->ptep) in follow_pfnmap_end()
6614 pte_unmap(args->ptep); in follow_pfnmap_end()
6620 * generic_access_phys - generic implementation for iomem mmap access
6638 int ret = -EINVAL; in generic_access_phys()
6644 return -EINVAL; in generic_access_phys()
6651 return -EINVAL; in generic_access_phys()
6655 return -ENOMEM; in generic_access_phys()
6717 return buf - old_buf; in __access_remote_vm()
6729 if (vma->vm_ops && vma->vm_ops->access) in __access_remote_vm()
6730 bytes = vma->vm_ops->access(vma, addr, buf, in __access_remote_vm()
6737 offset = addr & (PAGE_SIZE-1); in __access_remote_vm()
6738 if (bytes > PAGE_SIZE-offset) in __access_remote_vm()
6739 bytes = PAGE_SIZE-offset; in __access_remote_vm()
6752 len -= bytes; in __access_remote_vm()
6758 return buf - old_buf; in __access_remote_vm()
6762 * access_remote_vm - access another process' address space
6807 struct mm_struct *mm = current->mm; in print_vma_addr()
6817 if (vma && vma->vm_file) { in print_vma_addr()
6818 struct file *f = vma->vm_file; in print_vma_addr()
6819 ip -= vma->vm_start; in print_vma_addr()
6820 ip += vma->vm_pgoff << PAGE_SHIFT; in print_vma_addr()
6822 vma->vm_start, in print_vma_addr()
6823 vma->vm_end - vma->vm_start); in print_vma_addr()
6834 if (current->mm) in __might_fault()
6835 might_lock_read(¤t->mm->mmap_lock); in __might_fault()
6853 ~(((unsigned long)nr_pages << PAGE_SHIFT) - 1); in process_huge_page()
6857 n = (addr_hint - addr) / PAGE_SIZE; in process_huge_page()
6863 for (i = nr_pages - 1; i >= 2 * n; i--) { in process_huge_page()
6871 base = nr_pages - 2 * (nr_pages - n); in process_huge_page()
6872 l = nr_pages - n; in process_huge_page()
6882 * Process remaining subpages in left-right-left-right pattern in process_huge_page()
6887 int right_idx = base + 2 * l - 1 - i; in process_huge_page()
6923 * folio_zero_user - Zero a folio which will be mapped to userspace.
6954 return -EHWPOISON; in copy_user_gigantic_page()
6968 struct page *dst = folio_page(copy_arg->dst, idx); in copy_subpage()
6969 struct page *src = folio_page(copy_arg->src, idx); in copy_subpage()
6971 if (copy_mc_user_highpage(dst, src, addr, copy_arg->vma)) in copy_subpage()
6972 return -EHWPOISON; in copy_subpage()
7012 ret_val -= (PAGE_SIZE - rc); in copy_folio_from_user()
7030 page_ptl_cachep = kmem_cache_create("page->ptl", sizeof(spinlock_t), 0, in ptlock_cache_init()
7041 ptdesc->ptl = ptl; in ptlock_alloc()
7047 if (ptdesc->ptl) in ptlock_free()
7048 kmem_cache_free(page_ptl_cachep, ptdesc->ptl); in ptlock_free()