1 // SPDX-License-Identifier: GPL-2.0
2 
3 #ifndef __KVM_X86_MMU_TDP_ITER_H
4 #define __KVM_X86_MMU_TDP_ITER_H
5 
6 #include <linux/kvm_host.h>
7 
8 #include "mmu.h"
9 #include "spte.h"
10 
11 /*
12  * TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
13  * to be zapped while holding mmu_lock for read, and to allow TLB flushes to be
14  * batched without having to collect the list of zapped SPs.  Flows that can
15  * remove SPs must service pending TLB flushes prior to dropping RCU protection.
16  */
kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)17 static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
18 {
19 	return READ_ONCE(*rcu_dereference(sptep));
20 }
21 
kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep,u64 new_spte)22 static inline u64 kvm_tdp_mmu_write_spte_atomic(tdp_ptep_t sptep, u64 new_spte)
23 {
24 	KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
25 	return xchg(rcu_dereference(sptep), new_spte);
26 }
27 
__kvm_tdp_mmu_write_spte(tdp_ptep_t sptep,u64 new_spte)28 static inline void __kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 new_spte)
29 {
30 	KVM_MMU_WARN_ON(is_ept_ve_possible(new_spte));
31 	WRITE_ONCE(*rcu_dereference(sptep), new_spte);
32 }
33 
34 /*
35  * SPTEs must be modified atomically if they are shadow-present, leaf
36  * SPTEs, and have volatile bits, i.e. has bits that can be set outside
37  * of mmu_lock.  The Writable bit can be set by KVM's fast page fault
38  * handler, and Accessed and Dirty bits can be set by the CPU.
39  *
40  * Note, non-leaf SPTEs do have Accessed bits and those bits are
41  * technically volatile, but KVM doesn't consume the Accessed bit of
42  * non-leaf SPTEs, i.e. KVM doesn't care if it clobbers the bit.  This
43  * logic needs to be reassessed if KVM were to use non-leaf Accessed
44  * bits, e.g. to skip stepping down into child SPTEs when aging SPTEs.
45  */
kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte,int level)46 static inline bool kvm_tdp_mmu_spte_need_atomic_write(u64 old_spte, int level)
47 {
48 	return is_shadow_present_pte(old_spte) &&
49 	       is_last_spte(old_spte, level) &&
50 	       spte_has_volatile_bits(old_spte);
51 }
52 
kvm_tdp_mmu_write_spte(tdp_ptep_t sptep,u64 old_spte,u64 new_spte,int level)53 static inline u64 kvm_tdp_mmu_write_spte(tdp_ptep_t sptep, u64 old_spte,
54 					 u64 new_spte, int level)
55 {
56 	if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level))
57 		return kvm_tdp_mmu_write_spte_atomic(sptep, new_spte);
58 
59 	__kvm_tdp_mmu_write_spte(sptep, new_spte);
60 	return old_spte;
61 }
62 
tdp_mmu_clear_spte_bits(tdp_ptep_t sptep,u64 old_spte,u64 mask,int level)63 static inline u64 tdp_mmu_clear_spte_bits(tdp_ptep_t sptep, u64 old_spte,
64 					  u64 mask, int level)
65 {
66 	atomic64_t *sptep_atomic;
67 
68 	if (kvm_tdp_mmu_spte_need_atomic_write(old_spte, level)) {
69 		sptep_atomic = (atomic64_t *)rcu_dereference(sptep);
70 		return (u64)atomic64_fetch_and(~mask, sptep_atomic);
71 	}
72 
73 	__kvm_tdp_mmu_write_spte(sptep, old_spte & ~mask);
74 	return old_spte;
75 }
76 
77 /*
78  * A TDP iterator performs a pre-order walk over a TDP paging structure.
79  */
80 struct tdp_iter {
81 	/*
82 	 * The iterator will traverse the paging structure towards the mapping
83 	 * for this GFN.
84 	 */
85 	gfn_t next_last_level_gfn;
86 	/*
87 	 * The next_last_level_gfn at the time when the thread last
88 	 * yielded. Only yielding when the next_last_level_gfn !=
89 	 * yielded_gfn helps ensure forward progress.
90 	 */
91 	gfn_t yielded_gfn;
92 	/* Pointers to the page tables traversed to reach the current SPTE */
93 	tdp_ptep_t pt_path[PT64_ROOT_MAX_LEVEL];
94 	/* A pointer to the current SPTE */
95 	tdp_ptep_t sptep;
96 	/* The lowest GFN (mask bits excluded) mapped by the current SPTE */
97 	gfn_t gfn;
98 	/* Mask applied to convert the GFN to the mapping GPA */
99 	gfn_t gfn_bits;
100 	/* The level of the root page given to the iterator */
101 	int root_level;
102 	/* The lowest level the iterator should traverse to */
103 	int min_level;
104 	/* The iterator's current level within the paging structure */
105 	int level;
106 	/* The address space ID, i.e. SMM vs. regular. */
107 	int as_id;
108 	/* A snapshot of the value at sptep */
109 	u64 old_spte;
110 	/*
111 	 * Whether the iterator has a valid state. This will be false if the
112 	 * iterator walks off the end of the paging structure.
113 	 */
114 	bool valid;
115 	/*
116 	 * True if KVM dropped mmu_lock and yielded in the middle of a walk, in
117 	 * which case tdp_iter_next() needs to restart the walk at the root
118 	 * level instead of advancing to the next entry.
119 	 */
120 	bool yielded;
121 };
122 
123 /*
124  * Iterates over every SPTE mapping the GFN range [start, end) in a
125  * preorder traversal.
126  */
127 #define for_each_tdp_pte_min_level(iter, kvm, root, min_level, start, end)		  \
128 	for (tdp_iter_start(&iter, root, min_level, start, kvm_gfn_root_bits(kvm, root)); \
129 	     iter.valid && iter.gfn < end;						  \
130 	     tdp_iter_next(&iter))
131 
132 #define for_each_tdp_pte_min_level_all(iter, root, min_level)		\
133 	for (tdp_iter_start(&iter, root, min_level, 0, 0);		\
134 		iter.valid && iter.gfn < tdp_mmu_max_gfn_exclusive();	\
135 		tdp_iter_next(&iter))
136 
137 #define for_each_tdp_pte(iter, kvm, root, start, end)				\
138 	for_each_tdp_pte_min_level(iter, kvm, root, PG_LEVEL_4K, start, end)
139 
140 tdp_ptep_t spte_to_child_pt(u64 pte, int level);
141 
142 void tdp_iter_start(struct tdp_iter *iter, struct kvm_mmu_page *root,
143 		    int min_level, gfn_t next_last_level_gfn, gfn_t gfn_bits);
144 void tdp_iter_next(struct tdp_iter *iter);
145 void tdp_iter_restart(struct tdp_iter *iter);
146 
147 #endif /* __KVM_X86_MMU_TDP_ITER_H */
148