1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
4 * Author: Darrick J. Wong <[email protected]>
5 */
6 #include "xfs.h"
7 #include "xfs_fs.h"
8 #include "xfs_shared.h"
9 #include "xfs_format.h"
10 #include "xfs_log_format.h"
11 #include "xfs_trans_resv.h"
12 #include "xfs_bit.h"
13 #include "xfs_sb.h"
14 #include "xfs_mount.h"
15 #include "xfs_defer.h"
16 #include "xfs_trans.h"
17 #include "xfs_metafile.h"
18 #include "xfs_trace.h"
19 #include "xfs_inode.h"
20 #include "xfs_quota.h"
21 #include "xfs_errortag.h"
22 #include "xfs_error.h"
23 #include "xfs_alloc.h"
24
25 static const struct {
26 enum xfs_metafile_type mtype;
27 const char *name;
28 } xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };
29
30 const char *
xfs_metafile_type_str(enum xfs_metafile_type metatype)31 xfs_metafile_type_str(enum xfs_metafile_type metatype)
32 {
33 unsigned int i;
34
35 for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
36 if (xfs_metafile_type_strs[i].mtype == metatype)
37 return xfs_metafile_type_strs[i].name;
38 }
39
40 return NULL;
41 }
42
43 /* Set up an inode to be recognized as a metadata directory inode. */
44 void
xfs_metafile_set_iflag(struct xfs_trans * tp,struct xfs_inode * ip,enum xfs_metafile_type metafile_type)45 xfs_metafile_set_iflag(
46 struct xfs_trans *tp,
47 struct xfs_inode *ip,
48 enum xfs_metafile_type metafile_type)
49 {
50 VFS_I(ip)->i_mode &= ~0777;
51 VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
52 VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
53 if (S_ISDIR(VFS_I(ip)->i_mode))
54 ip->i_diflags |= XFS_METADIR_DIFLAGS;
55 else
56 ip->i_diflags |= XFS_METAFILE_DIFLAGS;
57 ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
58 ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
59 ip->i_metatype = metafile_type;
60 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
61 }
62
63 /* Clear the metadata directory inode flag. */
64 void
xfs_metafile_clear_iflag(struct xfs_trans * tp,struct xfs_inode * ip)65 xfs_metafile_clear_iflag(
66 struct xfs_trans *tp,
67 struct xfs_inode *ip)
68 {
69 ASSERT(xfs_is_metadir_inode(ip));
70 ASSERT(VFS_I(ip)->i_nlink == 0);
71
72 ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
73 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
74 }
75
76 /*
77 * Is the amount of space that could be allocated towards a given metadata
78 * file at or beneath a certain threshold?
79 */
80 static inline bool
xfs_metafile_resv_can_cover(struct xfs_inode * ip,int64_t rhs)81 xfs_metafile_resv_can_cover(
82 struct xfs_inode *ip,
83 int64_t rhs)
84 {
85 /*
86 * The amount of space that can be allocated to this metadata file is
87 * the remaining reservation for the particular metadata file + the
88 * global free block count. Take care of the first case to avoid
89 * touching the per-cpu counter.
90 */
91 if (ip->i_delayed_blks >= rhs)
92 return true;
93
94 /*
95 * There aren't enough blocks left in the inode's reservation, but it
96 * isn't critical unless there also isn't enough free space.
97 */
98 return __percpu_counter_compare(&ip->i_mount->m_fdblocks,
99 rhs - ip->i_delayed_blks, 2048) >= 0;
100 }
101
102 /*
103 * Is this metadata file critically low on blocks? For now we'll define that
104 * as the number of blocks we can get our hands on being less than 10% of what
105 * we reserved or less than some arbitrary number (maximum btree height).
106 */
107 bool
xfs_metafile_resv_critical(struct xfs_inode * ip)108 xfs_metafile_resv_critical(
109 struct xfs_inode *ip)
110 {
111 uint64_t asked_low_water;
112
113 if (!ip)
114 return false;
115
116 ASSERT(xfs_is_metadir_inode(ip));
117 trace_xfs_metafile_resv_critical(ip, 0);
118
119 if (!xfs_metafile_resv_can_cover(ip, ip->i_mount->m_rtbtree_maxlevels))
120 return true;
121
122 asked_low_water = div_u64(ip->i_meta_resv_asked, 10);
123 if (!xfs_metafile_resv_can_cover(ip, asked_low_water))
124 return true;
125
126 return XFS_TEST_ERROR(false, ip->i_mount,
127 XFS_ERRTAG_METAFILE_RESV_CRITICAL);
128 }
129
130 /* Allocate a block from the metadata file's reservation. */
131 void
xfs_metafile_resv_alloc_space(struct xfs_inode * ip,struct xfs_alloc_arg * args)132 xfs_metafile_resv_alloc_space(
133 struct xfs_inode *ip,
134 struct xfs_alloc_arg *args)
135 {
136 int64_t len = args->len;
137
138 ASSERT(xfs_is_metadir_inode(ip));
139 ASSERT(args->resv == XFS_AG_RESV_METAFILE);
140
141 trace_xfs_metafile_resv_alloc_space(ip, args->len);
142
143 /*
144 * Allocate the blocks from the metadata inode's block reservation
145 * and update the ondisk sb counter.
146 */
147 if (ip->i_delayed_blks > 0) {
148 int64_t from_resv;
149
150 from_resv = min_t(int64_t, len, ip->i_delayed_blks);
151 ip->i_delayed_blks -= from_resv;
152 xfs_mod_delalloc(ip, 0, -from_resv);
153 xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS,
154 -from_resv);
155 len -= from_resv;
156 }
157
158 /*
159 * Any allocation in excess of the reservation requires in-core and
160 * on-disk fdblocks updates. If we can grab @len blocks from the
161 * in-core fdblocks then all we need to do is update the on-disk
162 * superblock; if not, then try to steal some from the transaction's
163 * block reservation. Overruns are only expected for rmap btrees.
164 */
165 if (len) {
166 unsigned int field;
167 int error;
168
169 error = xfs_dec_fdblocks(ip->i_mount, len, true);
170 if (error)
171 field = XFS_TRANS_SB_FDBLOCKS;
172 else
173 field = XFS_TRANS_SB_RES_FDBLOCKS;
174
175 xfs_trans_mod_sb(args->tp, field, -len);
176 }
177
178 ip->i_nblocks += args->len;
179 xfs_trans_log_inode(args->tp, ip, XFS_ILOG_CORE);
180 }
181
182 /* Free a block to the metadata file's reservation. */
183 void
xfs_metafile_resv_free_space(struct xfs_inode * ip,struct xfs_trans * tp,xfs_filblks_t len)184 xfs_metafile_resv_free_space(
185 struct xfs_inode *ip,
186 struct xfs_trans *tp,
187 xfs_filblks_t len)
188 {
189 int64_t to_resv;
190
191 ASSERT(xfs_is_metadir_inode(ip));
192 trace_xfs_metafile_resv_free_space(ip, len);
193
194 ip->i_nblocks -= len;
195 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
196
197 /*
198 * Add the freed blocks back into the inode's delalloc reservation
199 * until it reaches the maximum size. Update the ondisk fdblocks only.
200 */
201 to_resv = ip->i_meta_resv_asked - (ip->i_nblocks + ip->i_delayed_blks);
202 if (to_resv > 0) {
203 to_resv = min_t(int64_t, to_resv, len);
204 ip->i_delayed_blks += to_resv;
205 xfs_mod_delalloc(ip, 0, to_resv);
206 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, to_resv);
207 len -= to_resv;
208 }
209
210 /*
211 * Everything else goes back to the filesystem, so update the in-core
212 * and on-disk counters.
213 */
214 if (len)
215 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, len);
216 }
217
218 /* Release a metadata file's space reservation. */
219 void
xfs_metafile_resv_free(struct xfs_inode * ip)220 xfs_metafile_resv_free(
221 struct xfs_inode *ip)
222 {
223 /* Non-btree metadata inodes don't need space reservations. */
224 if (!ip || !ip->i_meta_resv_asked)
225 return;
226
227 ASSERT(xfs_is_metadir_inode(ip));
228 trace_xfs_metafile_resv_free(ip, 0);
229
230 if (ip->i_delayed_blks) {
231 xfs_mod_delalloc(ip, 0, -ip->i_delayed_blks);
232 xfs_add_fdblocks(ip->i_mount, ip->i_delayed_blks);
233 ip->i_delayed_blks = 0;
234 }
235 ip->i_meta_resv_asked = 0;
236 }
237
238 /* Set up a metadata file's space reservation. */
239 int
xfs_metafile_resv_init(struct xfs_inode * ip,xfs_filblks_t ask)240 xfs_metafile_resv_init(
241 struct xfs_inode *ip,
242 xfs_filblks_t ask)
243 {
244 xfs_filblks_t hidden_space;
245 xfs_filblks_t used;
246 int error;
247
248 if (!ip || ip->i_meta_resv_asked > 0)
249 return 0;
250
251 ASSERT(xfs_is_metadir_inode(ip));
252
253 /*
254 * Space taken by all other metadata btrees are accounted on-disk as
255 * used space. We therefore only hide the space that is reserved but
256 * not used by the trees.
257 */
258 used = ip->i_nblocks;
259 if (used > ask)
260 ask = used;
261 hidden_space = ask - used;
262
263 error = xfs_dec_fdblocks(ip->i_mount, hidden_space, true);
264 if (error) {
265 trace_xfs_metafile_resv_init_error(ip, error, _RET_IP_);
266 return error;
267 }
268
269 xfs_mod_delalloc(ip, 0, hidden_space);
270 ip->i_delayed_blks = hidden_space;
271 ip->i_meta_resv_asked = ask;
272
273 trace_xfs_metafile_resv_init(ip, ask);
274 return 0;
275 }
276