1 // Copyright 2019 The ChromiumOS Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 use std::borrow::Cow;
6 use std::cell::RefCell;
7 use std::cmp;
8 use std::collections::btree_map;
9 use std::collections::BTreeMap;
10 use std::ffi::CStr;
11 use std::ffi::CString;
12 #[cfg(feature = "fs_runtime_ugid_map")]
13 use std::ffi::OsStr;
14 use std::fs::File;
15 use std::io;
16 use std::mem;
17 use std::mem::size_of;
18 use std::mem::MaybeUninit;
19 use std::os::raw::c_int;
20 use std::os::raw::c_long;
21 #[cfg(feature = "fs_runtime_ugid_map")]
22 use std::os::unix::ffi::OsStrExt;
23 #[cfg(feature = "fs_runtime_ugid_map")]
24 use std::path::Path;
25 use std::ptr;
26 use std::ptr::addr_of;
27 use std::ptr::addr_of_mut;
28 use std::sync::atomic::AtomicBool;
29 use std::sync::atomic::AtomicU64;
30 use std::sync::atomic::Ordering;
31 use std::sync::Arc;
32 use std::sync::MutexGuard;
33 #[cfg(feature = "fs_permission_translation")]
34 use std::sync::RwLock;
35 use std::time::Duration;
36
37 #[cfg(feature = "arc_quota")]
38 use base::debug;
39 use base::error;
40 use base::ioctl_ior_nr;
41 use base::ioctl_iow_nr;
42 use base::ioctl_iowr_nr;
43 use base::ioctl_with_mut_ptr;
44 use base::ioctl_with_ptr;
45 use base::syscall;
46 use base::unix::FileFlags;
47 use base::warn;
48 use base::AsRawDescriptor;
49 use base::FromRawDescriptor;
50 use base::IoctlNr;
51 use base::Protection;
52 use base::RawDescriptor;
53 use fuse::filesystem::Context;
54 use fuse::filesystem::DirectoryIterator;
55 use fuse::filesystem::Entry;
56 use fuse::filesystem::FileSystem;
57 use fuse::filesystem::FsOptions;
58 use fuse::filesystem::GetxattrReply;
59 use fuse::filesystem::IoctlFlags;
60 use fuse::filesystem::IoctlReply;
61 use fuse::filesystem::ListxattrReply;
62 use fuse::filesystem::OpenOptions;
63 use fuse::filesystem::RemoveMappingOne;
64 use fuse::filesystem::SetattrValid;
65 use fuse::filesystem::ZeroCopyReader;
66 use fuse::filesystem::ZeroCopyWriter;
67 use fuse::filesystem::ROOT_ID;
68 use fuse::sys::WRITE_KILL_PRIV;
69 use fuse::Mapper;
70 #[cfg(feature = "arc_quota")]
71 use protobuf::Message;
72 use sync::Mutex;
73 #[cfg(feature = "arc_quota")]
74 use system_api::client::OrgChromiumSpaced;
75 #[cfg(feature = "arc_quota")]
76 use system_api::spaced::SetProjectIdReply;
77 #[cfg(feature = "arc_quota")]
78 use system_api::spaced::SetProjectInheritanceFlagReply;
79 use zerocopy::AsBytes;
80 use zerocopy::FromBytes;
81 use zerocopy::FromZeroes;
82
83 #[cfg(feature = "arc_quota")]
84 use crate::virtio::fs::arc_ioctl::FsPathXattrDataBuffer;
85 #[cfg(feature = "arc_quota")]
86 use crate::virtio::fs::arc_ioctl::FsPermissionDataBuffer;
87 #[cfg(feature = "arc_quota")]
88 use crate::virtio::fs::arc_ioctl::XattrData;
89 use crate::virtio::fs::caps::Capability;
90 use crate::virtio::fs::caps::Caps;
91 use crate::virtio::fs::caps::Set as CapSet;
92 use crate::virtio::fs::caps::Value as CapValue;
93 use crate::virtio::fs::config::CachePolicy;
94 use crate::virtio::fs::config::Config;
95 #[cfg(feature = "fs_permission_translation")]
96 use crate::virtio::fs::config::PermissionData;
97 use crate::virtio::fs::expiring_map::ExpiringMap;
98 use crate::virtio::fs::multikey::MultikeyBTreeMap;
99 use crate::virtio::fs::read_dir::ReadDir;
100
101 const EMPTY_CSTR: &CStr = c"";
102 const PROC_CSTR: &CStr = c"/proc";
103 const UNLABELED_CSTR: &CStr = c"unlabeled";
104
105 const USER_VIRTIOFS_XATTR: &[u8] = b"user.virtiofs.";
106 const SECURITY_XATTR: &[u8] = b"security.";
107 const SELINUX_XATTR: &[u8] = b"security.selinux";
108
109 const FSCRYPT_KEY_DESCRIPTOR_SIZE: usize = 8;
110 const FSCRYPT_KEY_IDENTIFIER_SIZE: usize = 16;
111
112 #[cfg(feature = "arc_quota")]
113 const FS_PROJINHERIT_FL: c_int = 0x20000000;
114
115 // 25 seconds is the default timeout for dbus-send.
116 #[cfg(feature = "arc_quota")]
117 const DEFAULT_DBUS_TIMEOUT: Duration = Duration::from_secs(25);
118
119 /// Internal utility wrapper for `cros_tracing::trace_event!()` macro with VirtioFS calls.
120 macro_rules! fs_trace {
121 ($tag:expr, $name:expr, $($arg:expr),+) => {
122 cros_tracing::trace_event!(VirtioFs, $name, $tag, $($arg),*)
123 };
124 }
125
126 #[repr(C)]
127 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
128 struct fscrypt_policy_v1 {
129 _version: u8,
130 _contents_encryption_mode: u8,
131 _filenames_encryption_mode: u8,
132 _flags: u8,
133 _master_key_descriptor: [u8; FSCRYPT_KEY_DESCRIPTOR_SIZE],
134 }
135
136 #[repr(C)]
137 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
138 struct fscrypt_policy_v2 {
139 _version: u8,
140 _contents_encryption_mode: u8,
141 _filenames_encryption_mode: u8,
142 _flags: u8,
143 __reserved: [u8; 4],
144 master_key_identifier: [u8; FSCRYPT_KEY_IDENTIFIER_SIZE],
145 }
146
147 #[repr(C)]
148 #[derive(Copy, Clone, FromZeroes, FromBytes)]
149 union fscrypt_policy {
150 _version: u8,
151 _v1: fscrypt_policy_v1,
152 _v2: fscrypt_policy_v2,
153 }
154
155 #[repr(C)]
156 #[derive(Copy, Clone, FromZeroes, FromBytes)]
157 struct fscrypt_get_policy_ex_arg {
158 policy_size: u64, /* input/output */
159 policy: fscrypt_policy, /* output */
160 }
161
162 impl From<&fscrypt_get_policy_ex_arg> for &[u8] {
from(value: &fscrypt_get_policy_ex_arg) -> Self163 fn from(value: &fscrypt_get_policy_ex_arg) -> Self {
164 assert!(value.policy_size <= size_of::<fscrypt_policy>() as u64);
165 let data_raw: *const fscrypt_get_policy_ex_arg = value;
166 // SAFETY: the length of the output slice is asserted to be within the struct it points to
167 unsafe {
168 std::slice::from_raw_parts(
169 data_raw.cast(),
170 value.policy_size as usize + size_of::<u64>(),
171 )
172 }
173 }
174 }
175
176 ioctl_iowr_nr!(FS_IOC_GET_ENCRYPTION_POLICY_EX, 'f' as u32, 22, [u8; 9]);
177
178 #[repr(C)]
179 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
180 struct fsxattr {
181 fsx_xflags: u32, /* xflags field value (get/set) */
182 fsx_extsize: u32, /* extsize field value (get/set) */
183 fsx_nextents: u32, /* nextents field value (get) */
184 fsx_projid: u32, /* project identifier (get/set) */
185 fsx_cowextsize: u32, /* CoW extsize field value (get/set) */
186 fsx_pad: [u8; 8],
187 }
188
189 ioctl_ior_nr!(FS_IOC_FSGETXATTR, 'X' as u32, 31, fsxattr);
190 ioctl_iow_nr!(FS_IOC_FSSETXATTR, 'X' as u32, 32, fsxattr);
191
192 ioctl_ior_nr!(FS_IOC_GETFLAGS, 'f' as u32, 1, c_long);
193 ioctl_iow_nr!(FS_IOC_SETFLAGS, 'f' as u32, 2, c_long);
194
195 ioctl_ior_nr!(FS_IOC32_GETFLAGS, 'f' as u32, 1, u32);
196 ioctl_iow_nr!(FS_IOC32_SETFLAGS, 'f' as u32, 2, u32);
197
198 ioctl_ior_nr!(FS_IOC64_GETFLAGS, 'f' as u32, 1, u64);
199 ioctl_iow_nr!(FS_IOC64_SETFLAGS, 'f' as u32, 2, u64);
200
201 #[cfg(feature = "arc_quota")]
202 ioctl_iow_nr!(FS_IOC_SETPERMISSION, 'f' as u32, 1, FsPermissionDataBuffer);
203 #[cfg(feature = "arc_quota")]
204 ioctl_iow_nr!(FS_IOC_SETPATHXATTR, 'f' as u32, 1, FsPathXattrDataBuffer);
205
206 #[repr(C)]
207 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
208 struct fsverity_enable_arg {
209 _version: u32,
210 _hash_algorithm: u32,
211 _block_size: u32,
212 salt_size: u32,
213 salt_ptr: u64,
214 sig_size: u32,
215 __reserved1: u32,
216 sig_ptr: u64,
217 __reserved2: [u64; 11],
218 }
219
220 #[repr(C)]
221 #[derive(Clone, Copy, AsBytes, FromZeroes, FromBytes)]
222 struct fsverity_digest {
223 _digest_algorithm: u16,
224 digest_size: u16,
225 // __u8 digest[];
226 }
227
228 ioctl_iow_nr!(FS_IOC_ENABLE_VERITY, 'f' as u32, 133, fsverity_enable_arg);
229 ioctl_iowr_nr!(FS_IOC_MEASURE_VERITY, 'f' as u32, 134, fsverity_digest);
230
231 pub type Inode = u64;
232 type Handle = u64;
233
234 #[derive(Clone, Copy, Debug, PartialOrd, Ord, PartialEq, Eq)]
235 struct InodeAltKey {
236 ino: libc::ino64_t,
237 dev: libc::dev_t,
238 }
239
240 #[derive(PartialEq, Eq, Debug)]
241 enum FileType {
242 Regular,
243 Directory,
244 Other,
245 }
246
247 impl From<libc::mode_t> for FileType {
from(mode: libc::mode_t) -> Self248 fn from(mode: libc::mode_t) -> Self {
249 match mode & libc::S_IFMT {
250 libc::S_IFREG => FileType::Regular,
251 libc::S_IFDIR => FileType::Directory,
252 _ => FileType::Other,
253 }
254 }
255 }
256
257 #[derive(Debug)]
258 struct InodeData {
259 inode: Inode,
260 // (File, open_flags)
261 file: Mutex<(File, libc::c_int)>,
262 refcount: AtomicU64,
263 filetype: FileType,
264 path: String,
265 }
266
267 impl AsRawDescriptor for InodeData {
as_raw_descriptor(&self) -> RawDescriptor268 fn as_raw_descriptor(&self) -> RawDescriptor {
269 self.file.lock().0.as_raw_descriptor()
270 }
271 }
272
273 #[derive(Debug)]
274 struct HandleData {
275 inode: Inode,
276 file: Mutex<File>,
277 }
278
279 impl AsRawDescriptor for HandleData {
as_raw_descriptor(&self) -> RawDescriptor280 fn as_raw_descriptor(&self) -> RawDescriptor {
281 self.file.lock().as_raw_descriptor()
282 }
283 }
284
285 macro_rules! scoped_cred {
286 ($name:ident, $ty:ty, $syscall_nr:expr) => {
287 #[derive(Debug)]
288 struct $name {
289 old: $ty,
290 }
291
292 impl $name {
293 // Changes the effective uid/gid of the current thread to `val`. Changes the thread's
294 // credentials back to `old` when the returned struct is dropped.
295 fn new(val: $ty, old: $ty) -> io::Result<Option<$name>> {
296 if val == old {
297 // Nothing to do since we already have the correct value.
298 return Ok(None);
299 }
300
301 // We want credential changes to be per-thread because otherwise
302 // we might interfere with operations being carried out on other
303 // threads with different uids/gids. However, posix requires that
304 // all threads in a process share the same credentials. To do this
305 // libc uses signals to ensure that when one thread changes its
306 // credentials the other threads do the same thing.
307 //
308 // So instead we invoke the syscall directly in order to get around
309 // this limitation. Another option is to use the setfsuid and
310 // setfsgid systems calls. However since those calls have no way to
311 // return an error, it's preferable to do this instead.
312
313 // SAFETY: this call is safe because it doesn't modify any memory and we
314 // check the return value.
315 let res = unsafe { libc::syscall($syscall_nr, -1, val, -1) };
316 if res == 0 {
317 Ok(Some($name { old }))
318 } else {
319 Err(io::Error::last_os_error())
320 }
321 }
322 }
323
324 impl Drop for $name {
325 fn drop(&mut self) {
326 // SAFETY: trivially safe
327 let res = unsafe { libc::syscall($syscall_nr, -1, self.old, -1) };
328 if res < 0 {
329 error!(
330 "failed to change credentials back to {}: {}",
331 self.old,
332 io::Error::last_os_error(),
333 );
334 }
335 }
336 }
337 };
338 }
339 #[cfg(not(target_arch = "arm"))]
340 scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid);
341 #[cfg(target_arch = "arm")]
342 scoped_cred!(ScopedUid, libc::uid_t, libc::SYS_setresuid32);
343
344 #[cfg(not(target_arch = "arm"))]
345 scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid);
346 #[cfg(target_arch = "arm")]
347 scoped_cred!(ScopedGid, libc::gid_t, libc::SYS_setresgid32);
348
349 #[cfg(not(target_arch = "arm"))]
350 const SYS_GETEUID: libc::c_long = libc::SYS_geteuid;
351 #[cfg(target_arch = "arm")]
352 const SYS_GETEUID: libc::c_long = libc::SYS_geteuid32;
353
354 #[cfg(not(target_arch = "arm"))]
355 const SYS_GETEGID: libc::c_long = libc::SYS_getegid;
356 #[cfg(target_arch = "arm")]
357 const SYS_GETEGID: libc::c_long = libc::SYS_getegid32;
358
359 thread_local! {
360 // SAFETY: both calls take no parameters and only return an integer value. The kernel also
361 // guarantees that they can never fail.
362 static THREAD_EUID: libc::uid_t = unsafe { libc::syscall(SYS_GETEUID) as libc::uid_t };
363 // SAFETY: both calls take no parameters and only return an integer value. The kernel also
364 // guarantees that they can never fail.
365 static THREAD_EGID: libc::gid_t = unsafe { libc::syscall(SYS_GETEGID) as libc::gid_t };
366 }
367
set_creds( uid: libc::uid_t, gid: libc::gid_t, ) -> io::Result<(Option<ScopedUid>, Option<ScopedGid>)>368 fn set_creds(
369 uid: libc::uid_t,
370 gid: libc::gid_t,
371 ) -> io::Result<(Option<ScopedUid>, Option<ScopedGid>)> {
372 let olduid = THREAD_EUID.with(|uid| *uid);
373 let oldgid = THREAD_EGID.with(|gid| *gid);
374
375 // We have to change the gid before we change the uid because if we change the uid first then we
376 // lose the capability to change the gid. However changing back can happen in any order.
377 ScopedGid::new(gid, oldgid).and_then(|gid| Ok((ScopedUid::new(uid, olduid)?, gid)))
378 }
379
380 thread_local!(static THREAD_FSCREATE: RefCell<Option<File>> = const { RefCell::new(None) });
381
382 // Opens and returns a write-only handle to /proc/thread-self/attr/fscreate. Panics if it fails to
383 // open the file.
open_fscreate(proc: &File) -> File384 fn open_fscreate(proc: &File) -> File {
385 let fscreate = c"thread-self/attr/fscreate";
386
387 // SAFETY: this doesn't modify any memory and we check the return value.
388 let raw_descriptor = unsafe {
389 libc::openat(
390 proc.as_raw_descriptor(),
391 fscreate.as_ptr(),
392 libc::O_CLOEXEC | libc::O_WRONLY,
393 )
394 };
395
396 // We don't expect this to fail and we're not in a position to return an error here so just
397 // panic.
398 if raw_descriptor < 0 {
399 panic!(
400 "Failed to open /proc/thread-self/attr/fscreate: {}",
401 io::Error::last_os_error()
402 );
403 }
404
405 // SAFETY: safe because we just opened this descriptor.
406 unsafe { File::from_raw_descriptor(raw_descriptor) }
407 }
408
409 struct ScopedSecurityContext;
410
411 impl ScopedSecurityContext {
new(proc: &File, ctx: &CStr) -> io::Result<ScopedSecurityContext>412 fn new(proc: &File, ctx: &CStr) -> io::Result<ScopedSecurityContext> {
413 THREAD_FSCREATE.with(|thread_fscreate| {
414 let mut fscreate = thread_fscreate.borrow_mut();
415 let file = fscreate.get_or_insert_with(|| open_fscreate(proc));
416 // SAFETY: this doesn't modify any memory and we check the return value.
417 let ret = unsafe {
418 libc::write(
419 file.as_raw_descriptor(),
420 ctx.as_ptr() as *const libc::c_void,
421 ctx.to_bytes_with_nul().len(),
422 )
423 };
424 if ret < 0 {
425 Err(io::Error::last_os_error())
426 } else {
427 Ok(ScopedSecurityContext)
428 }
429 })
430 }
431 }
432
433 impl Drop for ScopedSecurityContext {
drop(&mut self)434 fn drop(&mut self) {
435 THREAD_FSCREATE.with(|thread_fscreate| {
436 // expect is safe here because the thread local would have been initialized by the call
437 // to `new` above.
438 let fscreate = thread_fscreate.borrow();
439 let file = fscreate
440 .as_ref()
441 .expect("Uninitialized thread-local when dropping ScopedSecurityContext");
442
443 // SAFETY: this doesn't modify any memory and we check the return value.
444 let ret = unsafe { libc::write(file.as_raw_descriptor(), ptr::null(), 0) };
445
446 if ret < 0 {
447 warn!(
448 "Failed to restore security context: {}",
449 io::Error::last_os_error()
450 );
451 }
452 })
453 }
454 }
455
456 struct ScopedUmask {
457 old: libc::mode_t,
458 mask: libc::mode_t,
459 }
460
461 impl ScopedUmask {
new(mask: libc::mode_t) -> ScopedUmask462 fn new(mask: libc::mode_t) -> ScopedUmask {
463 ScopedUmask {
464 // SAFETY: this doesn't modify any memory and always succeeds.
465 old: unsafe { libc::umask(mask) },
466 mask,
467 }
468 }
469 }
470
471 impl Drop for ScopedUmask {
drop(&mut self)472 fn drop(&mut self) {
473 // SAFETY: this doesn't modify any memory and always succeeds.
474 let previous = unsafe { libc::umask(self.old) };
475 debug_assert_eq!(
476 previous, self.mask,
477 "umask changed while holding ScopedUmask"
478 );
479 }
480 }
481
482 struct ScopedFsetid(Caps);
483 impl Drop for ScopedFsetid {
drop(&mut self)484 fn drop(&mut self) {
485 if let Err(e) = raise_cap_fsetid(&mut self.0) {
486 error!(
487 "Failed to restore CAP_FSETID: {}. Some operations may be broken.",
488 e
489 )
490 }
491 }
492 }
493
raise_cap_fsetid(c: &mut Caps) -> io::Result<()>494 fn raise_cap_fsetid(c: &mut Caps) -> io::Result<()> {
495 c.update(&[Capability::Fsetid], CapSet::Effective, CapValue::Set)?;
496 c.apply()
497 }
498
499 // Drops CAP_FSETID from the effective set for the current thread and returns an RAII guard that
500 // adds the capability back when it is dropped.
drop_cap_fsetid() -> io::Result<ScopedFsetid>501 fn drop_cap_fsetid() -> io::Result<ScopedFsetid> {
502 let mut caps = Caps::for_current_thread()?;
503 caps.update(&[Capability::Fsetid], CapSet::Effective, CapValue::Clear)?;
504 caps.apply()?;
505 Ok(ScopedFsetid(caps))
506 }
507
ebadf() -> io::Error508 fn ebadf() -> io::Error {
509 io::Error::from_raw_os_error(libc::EBADF)
510 }
511
eexist() -> io::Error512 fn eexist() -> io::Error {
513 io::Error::from_raw_os_error(libc::EEXIST)
514 }
515
stat<F: AsRawDescriptor + ?Sized>(f: &F) -> io::Result<libc::stat64>516 fn stat<F: AsRawDescriptor + ?Sized>(f: &F) -> io::Result<libc::stat64> {
517 let mut st: MaybeUninit<libc::stat64> = MaybeUninit::<libc::stat64>::zeroed();
518
519 // SAFETY: the kernel will only write data in `st` and we check the return value.
520 syscall!(unsafe {
521 libc::fstatat64(
522 f.as_raw_descriptor(),
523 EMPTY_CSTR.as_ptr(),
524 st.as_mut_ptr(),
525 libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
526 )
527 })?;
528
529 // SAFETY: the kernel guarantees that the struct is now fully initialized.
530 Ok(unsafe { st.assume_init() })
531 }
532
statat<D: AsRawDescriptor>(dir: &D, name: &CStr) -> io::Result<libc::stat64>533 fn statat<D: AsRawDescriptor>(dir: &D, name: &CStr) -> io::Result<libc::stat64> {
534 let mut st = MaybeUninit::<libc::stat64>::zeroed();
535
536 // SAFETY: the kernel will only write data in `st` and we check the return value.
537 syscall!(unsafe {
538 libc::fstatat64(
539 dir.as_raw_descriptor(),
540 name.as_ptr(),
541 st.as_mut_ptr(),
542 libc::AT_SYMLINK_NOFOLLOW,
543 )
544 })?;
545
546 // SAFETY: the kernel guarantees that the struct is now fully initialized.
547 Ok(unsafe { st.assume_init() })
548 }
549
550 #[cfg(feature = "arc_quota")]
is_android_project_id(project_id: u32) -> bool551 fn is_android_project_id(project_id: u32) -> bool {
552 // The following constants defines the valid range of project ID used by
553 // Android and are taken from android_filesystem_config.h in Android
554 // codebase.
555 //
556 // Project IDs reserved for Android files on external storage. Total 100 IDs
557 // from PROJECT_ID_EXT_DEFAULT (1000) are reserved.
558 const PROJECT_ID_FOR_ANDROID_FILES: std::ops::RangeInclusive<u32> = 1000..=1099;
559 // Project IDs reserved for Android apps.
560 // The lower-limit of the range is PROJECT_ID_EXT_DATA_START.
561 // The upper-limit of the range differs before and after T. Here we use that
562 // of T (PROJECT_ID_APP_CACHE_END) as it is larger.
563 const PROJECT_ID_FOR_ANDROID_APPS: std::ops::RangeInclusive<u32> = 20000..=69999;
564
565 PROJECT_ID_FOR_ANDROID_FILES.contains(&project_id)
566 || PROJECT_ID_FOR_ANDROID_APPS.contains(&project_id)
567 }
568
569 /// Per-directory cache for `PassthroughFs::ascii_casefold_lookup()`.
570 ///
571 /// The key of the underlying `BTreeMap` is a lower-cased file name in the direcoty.
572 /// The value is the case-sensitive file name stored in the host file system.
573 /// We assume that if PassthroughFs has exclusive access to the filesystem, this cache exhaustively
574 /// covers all file names that exist within the directory.
575 /// So every `PassthroughFs`'s handler that adds or removes files in the directory is expected to
576 /// update this cache.
577 struct CasefoldCache(BTreeMap<Vec<u8>, CString>);
578
579 impl CasefoldCache {
new(dir: &InodeData) -> io::Result<Self>580 fn new(dir: &InodeData) -> io::Result<Self> {
581 let mut mp = BTreeMap::new();
582
583 let mut buf = [0u8; 1024];
584 let mut offset = 0;
585 loop {
586 let mut read_dir = ReadDir::new(dir, offset, &mut buf[..])?;
587 if read_dir.remaining() == 0 {
588 break;
589 }
590
591 while let Some(entry) = read_dir.next() {
592 offset = entry.offset as libc::off64_t;
593 let entry_name = entry.name;
594 mp.insert(
595 entry_name.to_bytes().to_ascii_lowercase(),
596 entry_name.to_owned(),
597 );
598 }
599 }
600 Ok(Self(mp))
601 }
602
insert(&mut self, name: &CStr)603 fn insert(&mut self, name: &CStr) {
604 let lower_case = name.to_bytes().to_ascii_lowercase();
605 self.0.insert(lower_case, name.into());
606 }
607
lookup(&self, name: &[u8]) -> Option<CString>608 fn lookup(&self, name: &[u8]) -> Option<CString> {
609 let lower = name.to_ascii_lowercase();
610 self.0.get(&lower).cloned()
611 }
612
remove(&mut self, name: &CStr)613 fn remove(&mut self, name: &CStr) {
614 let lower_case = name.to_bytes().to_ascii_lowercase();
615 self.0.remove(&lower_case);
616 }
617 }
618
619 /// Time expiring mapping from an inode of a directory to `CasefoldCache` for the directory.
620 /// Each entry will be expired after `timeout`.
621 /// When ascii_casefold is disabled, this struct does nothing.
622 struct ExpiringCasefoldLookupCaches {
623 inner: ExpiringMap<Inode, CasefoldCache>,
624 }
625
626 impl ExpiringCasefoldLookupCaches {
new(timeout: Duration) -> Self627 fn new(timeout: Duration) -> Self {
628 Self {
629 inner: ExpiringMap::new(timeout),
630 }
631 }
632
insert(&mut self, parent: Inode, name: &CStr)633 fn insert(&mut self, parent: Inode, name: &CStr) {
634 if let Some(dir_cache) = self.inner.get_mut(&parent) {
635 dir_cache.insert(name);
636 }
637 }
638
remove(&mut self, parent: Inode, name: &CStr)639 fn remove(&mut self, parent: Inode, name: &CStr) {
640 if let Some(dir_cache) = self.inner.get_mut(&parent) {
641 dir_cache.remove(name);
642 }
643 }
644
forget(&mut self, parent: Inode)645 fn forget(&mut self, parent: Inode) {
646 self.inner.remove(&parent);
647 }
648
649 /// Get `CasefoldCache` for the given directory.
650 /// If the cache doesn't exist, generate it by fetching directory information with
651 /// `getdents64()`.
get(&mut self, parent: &InodeData) -> io::Result<&CasefoldCache>652 fn get(&mut self, parent: &InodeData) -> io::Result<&CasefoldCache> {
653 self.inner
654 .get_or_insert_with(&parent.inode, || CasefoldCache::new(parent))
655 }
656
657 #[cfg(test)]
exists_in_cache(&mut self, parent: Inode, name: &CStr) -> bool658 fn exists_in_cache(&mut self, parent: Inode, name: &CStr) -> bool {
659 if let Some(dir_cache) = self.inner.get(&parent) {
660 dir_cache.lookup(name.to_bytes()).is_some()
661 } else {
662 false
663 }
664 }
665 }
666
667 #[cfg(feature = "fs_permission_translation")]
668 impl PermissionData {
need_set_permission(&self, path: &str) -> bool669 pub(crate) fn need_set_permission(&self, path: &str) -> bool {
670 path.starts_with(&self.perm_path)
671 }
672 }
673
674 /// A file system that simply "passes through" all requests it receives to the underlying file
675 /// system. To keep the implementation simple it servers the contents of its root directory. Users
676 /// that wish to serve only a specific directory should set up the environment so that that
677 /// directory ends up as the root of the file system process. One way to accomplish this is via a
678 /// combination of mount namespaces and the pivot_root system call.
679 pub struct PassthroughFs {
680 // Mutex that must be acquired before executing a process-wide operation such as fchdir.
681 process_lock: Mutex<()>,
682 // virtio-fs tag that the guest uses when mounting. This is only used for debugging
683 // when tracing is enabled.
684 tag: String,
685
686 // File descriptors for various points in the file system tree.
687 inodes: Mutex<MultikeyBTreeMap<Inode, InodeAltKey, Arc<InodeData>>>,
688 next_inode: AtomicU64,
689
690 // File descriptors for open files and directories. Unlike the fds in `inodes`, these _can_ be
691 // used for reading and writing data.
692 handles: Mutex<BTreeMap<Handle, Arc<HandleData>>>,
693 next_handle: AtomicU64,
694
695 // File descriptor pointing to the `/proc` directory. This is used to convert an fd from
696 // `inodes` into one that can go into `handles`. This is accomplished by reading the
697 // `self/fd/{}` symlink. We keep an open fd here in case the file system tree that we are meant
698 // to be serving doesn't have access to `/proc`.
699 proc: File,
700
701 // Whether writeback caching is enabled for this directory. This will only be true when
702 // `cfg.writeback` is true and `init` was called with `FsOptions::WRITEBACK_CACHE`.
703 writeback: AtomicBool,
704
705 // Whether zero message opens are supported by the kernel driver.
706 zero_message_open: AtomicBool,
707
708 // Whether zero message opendir is supported by the kernel driver.
709 zero_message_opendir: AtomicBool,
710
711 // Used to communicate with other processes using D-Bus.
712 #[cfg(feature = "arc_quota")]
713 dbus_connection: Option<Mutex<dbus::blocking::Connection>>,
714 #[cfg(feature = "arc_quota")]
715 dbus_fd: Option<std::os::unix::io::RawFd>,
716
717 // Time-expiring cache for `ascii_casefold_lookup()`.
718 // The key is an inode of a directory, and the value is a cache for the directory.
719 // Each value will be expired `cfg.timeout` after it's created.
720 //
721 // TODO(b/267748212): Instead of per-device Mutex, we might want to have per-directory Mutex
722 // if we use PassthroughFs in multi-threaded environments.
723 expiring_casefold_lookup_caches: Option<Mutex<ExpiringCasefoldLookupCaches>>,
724
725 // paths and coresponding permission setting set by `crosvm_client_fs_permission_set` API
726 #[cfg(feature = "fs_permission_translation")]
727 permission_paths: RwLock<Vec<PermissionData>>,
728
729 // paths and coresponding xattr setting set by `crosvm_client_fs_xattr_set` API
730 #[cfg(feature = "arc_quota")]
731 xattr_paths: RwLock<Vec<XattrData>>,
732
733 cfg: Config,
734
735 // Set the root directory when pivot root isn't enabled for jailed process.
736 //
737 // virtio-fs typically uses mount namespaces and pivot_root for file system isolation,
738 // making the jailed process's root directory "/".
739 //
740 // However, Android's security model prevents crosvm from having the necessary SYS_ADMIN
741 // capability for mount namespaces and pivot_root. This lack of isolation means that
742 // root_dir defaults to the path provided via "--shared-dir".
743 root_dir: String,
744 }
745
746 impl std::fmt::Debug for PassthroughFs {
fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result747 fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
748 f.debug_struct("PassthroughFs")
749 .field("tag", &self.tag)
750 .field("next_inode", &self.next_inode)
751 .field("next_handle", &self.next_handle)
752 .field("proc", &self.proc)
753 .field("writeback", &self.writeback)
754 .field("zero_message_open", &self.zero_message_open)
755 .field("zero_message_opendir", &self.zero_message_opendir)
756 .field("cfg", &self.cfg)
757 .finish()
758 }
759 }
760
761 impl PassthroughFs {
new(tag: &str, cfg: Config) -> io::Result<PassthroughFs>762 pub fn new(tag: &str, cfg: Config) -> io::Result<PassthroughFs> {
763 // SAFETY: this doesn't modify any memory and we check the return value.
764 let raw_descriptor = syscall!(unsafe {
765 libc::openat64(
766 libc::AT_FDCWD,
767 PROC_CSTR.as_ptr(),
768 libc::O_PATH | libc::O_NOFOLLOW | libc::O_CLOEXEC,
769 )
770 })?;
771
772 // Privileged UIDs can use D-Bus to perform some operations.
773 #[cfg(feature = "arc_quota")]
774 let (dbus_connection, dbus_fd) = if cfg.privileged_quota_uids.is_empty() {
775 (None, None)
776 } else {
777 let mut channel = dbus::channel::Channel::get_private(dbus::channel::BusType::System)
778 .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
779 channel.set_watch_enabled(true);
780 let dbus_fd = channel.watch().fd;
781 channel.set_watch_enabled(false);
782 (
783 Some(Mutex::new(dbus::blocking::Connection::from(channel))),
784 Some(dbus_fd),
785 )
786 };
787
788 // SAFETY: safe because we just opened this descriptor.
789 let proc = unsafe { File::from_raw_descriptor(raw_descriptor) };
790
791 let expiring_casefold_lookup_caches = if cfg.ascii_casefold {
792 Some(Mutex::new(ExpiringCasefoldLookupCaches::new(cfg.timeout)))
793 } else {
794 None
795 };
796
797 #[allow(unused_mut)]
798 let mut passthroughfs = PassthroughFs {
799 process_lock: Mutex::new(()),
800 tag: tag.to_string(),
801 inodes: Mutex::new(MultikeyBTreeMap::new()),
802 next_inode: AtomicU64::new(ROOT_ID + 1),
803
804 handles: Mutex::new(BTreeMap::new()),
805 next_handle: AtomicU64::new(1),
806
807 proc,
808
809 writeback: AtomicBool::new(false),
810 zero_message_open: AtomicBool::new(false),
811 zero_message_opendir: AtomicBool::new(false),
812
813 #[cfg(feature = "arc_quota")]
814 dbus_connection,
815 #[cfg(feature = "arc_quota")]
816 dbus_fd,
817 expiring_casefold_lookup_caches,
818 #[cfg(feature = "fs_permission_translation")]
819 permission_paths: RwLock::new(Vec::new()),
820 #[cfg(feature = "arc_quota")]
821 xattr_paths: RwLock::new(Vec::new()),
822 cfg,
823 root_dir: "/".to_string(),
824 };
825
826 #[cfg(feature = "fs_runtime_ugid_map")]
827 passthroughfs.set_permission_path();
828
829 cros_tracing::trace_simple_print!(
830 VirtioFs,
831 "New PassthroughFS initialized: {:?}",
832 passthroughfs
833 );
834 Ok(passthroughfs)
835 }
836
837 #[cfg(feature = "fs_runtime_ugid_map")]
set_permission_path(&mut self)838 fn set_permission_path(&mut self) {
839 if !self.cfg.ugid_map.is_empty() {
840 let mut write_lock = self
841 .permission_paths
842 .write()
843 .expect("Failed to acquire write lock on permission_paths");
844 *write_lock = self.cfg.ugid_map.clone();
845 }
846 }
847
848 #[cfg(feature = "fs_runtime_ugid_map")]
set_root_dir(&mut self, shared_dir: String) -> io::Result<()>849 pub fn set_root_dir(&mut self, shared_dir: String) -> io::Result<()> {
850 let canonicalized_root = match std::fs::canonicalize(shared_dir) {
851 Ok(path) => path,
852 Err(e) => {
853 return Err(io::Error::new(
854 io::ErrorKind::InvalidInput,
855 format!("Failed to canonicalize root_dir: {}", e),
856 ));
857 }
858 };
859 self.root_dir = canonicalized_root.to_string_lossy().to_string();
860 Ok(())
861 }
862
cfg(&self) -> &Config863 pub fn cfg(&self) -> &Config {
864 &self.cfg
865 }
866
keep_rds(&self) -> Vec<RawDescriptor>867 pub fn keep_rds(&self) -> Vec<RawDescriptor> {
868 #[cfg_attr(not(feature = "arc_quota"), allow(unused_mut))]
869 let mut keep_rds = vec![self.proc.as_raw_descriptor()];
870 #[cfg(feature = "arc_quota")]
871 if let Some(fd) = self.dbus_fd {
872 keep_rds.push(fd);
873 }
874 keep_rds
875 }
876
rewrite_xattr_name<'xattr>(&self, name: &'xattr CStr) -> Cow<'xattr, CStr>877 fn rewrite_xattr_name<'xattr>(&self, name: &'xattr CStr) -> Cow<'xattr, CStr> {
878 if !self.cfg.rewrite_security_xattrs {
879 return Cow::Borrowed(name);
880 }
881
882 // Does not include nul-terminator.
883 let buf = name.to_bytes();
884 if !buf.starts_with(SECURITY_XATTR) || buf == SELINUX_XATTR {
885 return Cow::Borrowed(name);
886 }
887
888 let mut newname = USER_VIRTIOFS_XATTR.to_vec();
889 newname.extend_from_slice(buf);
890
891 // The unwrap is safe here because the prefix doesn't contain any interior nul-bytes and the
892 // to_bytes() call above will not return a byte slice with any interior nul-bytes either.
893 Cow::Owned(CString::new(newname).expect("Failed to re-write xattr name"))
894 }
895
find_inode(&self, inode: Inode) -> io::Result<Arc<InodeData>>896 fn find_inode(&self, inode: Inode) -> io::Result<Arc<InodeData>> {
897 self.inodes.lock().get(&inode).cloned().ok_or_else(ebadf)
898 }
899
find_handle(&self, handle: Handle, inode: Inode) -> io::Result<Arc<HandleData>>900 fn find_handle(&self, handle: Handle, inode: Inode) -> io::Result<Arc<HandleData>> {
901 self.handles
902 .lock()
903 .get(&handle)
904 .filter(|hd| hd.inode == inode)
905 .cloned()
906 .ok_or_else(ebadf)
907 }
908
open_fd(&self, fd: RawDescriptor, flags: i32) -> io::Result<File>909 fn open_fd(&self, fd: RawDescriptor, flags: i32) -> io::Result<File> {
910 let pathname = CString::new(format!("self/fd/{}", fd))
911 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
912
913 // SAFETY: this doesn't modify any memory and we check the return value. We don't really
914 // check `flags` because if the kernel can't handle poorly specified flags then we have
915 // much bigger problems. Also, clear the `O_NOFOLLOW` flag if it is set since we need
916 // to follow the `/proc/self/fd` symlink to get the file.
917 let raw_descriptor = syscall!(unsafe {
918 libc::openat64(
919 self.proc.as_raw_descriptor(),
920 pathname.as_ptr(),
921 (flags | libc::O_CLOEXEC) & !(libc::O_NOFOLLOW | libc::O_DIRECT),
922 )
923 })?;
924
925 // SAFETY: safe because we just opened this descriptor.
926 Ok(unsafe { File::from_raw_descriptor(raw_descriptor) })
927 }
928
929 /// Modifies the provided open flags based on the writeback caching configuration.
930 /// Return the updated open flags.
update_open_flags(&self, mut flags: i32) -> i32931 fn update_open_flags(&self, mut flags: i32) -> i32 {
932 // When writeback caching is enabled, the kernel may send read requests even if the
933 // userspace program opened the file write-only. So we need to ensure that we have opened
934 // the file for reading as well as writing.
935 let writeback = self.writeback.load(Ordering::Relaxed);
936 if writeback && flags & libc::O_ACCMODE == libc::O_WRONLY {
937 flags &= !libc::O_ACCMODE;
938 flags |= libc::O_RDWR;
939 }
940
941 // When writeback caching is enabled the kernel is responsible for handling `O_APPEND`.
942 // However, this breaks atomicity as the file may have changed on disk, invalidating the
943 // cached copy of the data in the kernel and the offset that the kernel thinks is the end of
944 // the file. Just allow this for now as it is the user's responsibility to enable writeback
945 // caching only for directories that are not shared. It also means that we need to clear the
946 // `O_APPEND` flag.
947 if writeback && flags & libc::O_APPEND != 0 {
948 flags &= !libc::O_APPEND;
949 }
950
951 flags
952 }
953
open_inode(&self, inode: &InodeData, mut flags: i32) -> io::Result<File>954 fn open_inode(&self, inode: &InodeData, mut flags: i32) -> io::Result<File> {
955 // handle writeback caching cases
956 flags = self.update_open_flags(flags);
957
958 self.open_fd(inode.as_raw_descriptor(), flags)
959 }
960
961 // Increases the inode refcount and returns the inode.
increase_inode_refcount(&self, inode_data: &InodeData) -> Inode962 fn increase_inode_refcount(&self, inode_data: &InodeData) -> Inode {
963 // Matches with the release store in `forget`.
964 inode_data.refcount.fetch_add(1, Ordering::Acquire);
965 inode_data.inode
966 }
967
968 // Creates a new entry for `f` or increases the refcount of the existing entry for `f`.
969 // The inodes mutex lock must not be already taken by the same thread otherwise this
970 // will deadlock.
add_entry( &self, f: File, #[cfg_attr(not(feature = "fs_permission_translation"), allow(unused_mut))] mut st: libc::stat64, open_flags: libc::c_int, path: String, ) -> Entry971 fn add_entry(
972 &self,
973 f: File,
974 #[cfg_attr(not(feature = "fs_permission_translation"), allow(unused_mut))]
975 mut st: libc::stat64,
976 open_flags: libc::c_int,
977 path: String,
978 ) -> Entry {
979 #[cfg(feature = "arc_quota")]
980 self.set_permission(&mut st, &path);
981 #[cfg(feature = "fs_runtime_ugid_map")]
982 self.set_ugid_permission(&mut st, &path);
983 let mut inodes = self.inodes.lock();
984
985 let altkey = InodeAltKey {
986 ino: st.st_ino,
987 dev: st.st_dev,
988 };
989
990 let inode = if let Some(data) = inodes.get_alt(&altkey) {
991 self.increase_inode_refcount(data)
992 } else {
993 let inode = self.next_inode.fetch_add(1, Ordering::Relaxed);
994 inodes.insert(
995 inode,
996 altkey,
997 Arc::new(InodeData {
998 inode,
999 file: Mutex::new((f, open_flags)),
1000 refcount: AtomicU64::new(1),
1001 filetype: st.st_mode.into(),
1002 path,
1003 }),
1004 );
1005
1006 inode
1007 };
1008
1009 Entry {
1010 inode,
1011 generation: 0,
1012 attr: st,
1013 // We use the same timeout for the attribute and the entry.
1014 attr_timeout: self.cfg.timeout,
1015 entry_timeout: self.cfg.timeout,
1016 }
1017 }
1018
1019 /// Acquires lock of `expiring_casefold_lookup_caches` if `ascii_casefold` is enabled.
lock_casefold_lookup_caches(&self) -> Option<MutexGuard<'_, ExpiringCasefoldLookupCaches>>1020 fn lock_casefold_lookup_caches(&self) -> Option<MutexGuard<'_, ExpiringCasefoldLookupCaches>> {
1021 self.expiring_casefold_lookup_caches
1022 .as_ref()
1023 .map(|c| c.lock())
1024 }
1025
1026 // Returns an actual case-sensitive file name that matches with the given `name`.
1027 // Returns `Ok(None)` if no file matches with the give `name`.
1028 // This function will panic if casefold is not enabled.
get_case_unfolded_name( &self, parent: &InodeData, name: &[u8], ) -> io::Result<Option<CString>>1029 fn get_case_unfolded_name(
1030 &self,
1031 parent: &InodeData,
1032 name: &[u8],
1033 ) -> io::Result<Option<CString>> {
1034 let mut caches = self
1035 .lock_casefold_lookup_caches()
1036 .expect("casefold must be enabled");
1037 let dir_cache = caches.get(parent)?;
1038 Ok(dir_cache.lookup(name))
1039 }
1040
1041 // Performs an ascii case insensitive lookup.
ascii_casefold_lookup(&self, parent: &InodeData, name: &[u8]) -> io::Result<Entry>1042 fn ascii_casefold_lookup(&self, parent: &InodeData, name: &[u8]) -> io::Result<Entry> {
1043 match self.get_case_unfolded_name(parent, name)? {
1044 None => Err(io::Error::from_raw_os_error(libc::ENOENT)),
1045 Some(actual_name) => self.do_lookup(parent, &actual_name),
1046 }
1047 }
1048
1049 #[cfg(test)]
exists_in_casefold_cache(&self, parent: Inode, name: &CStr) -> bool1050 fn exists_in_casefold_cache(&self, parent: Inode, name: &CStr) -> bool {
1051 let mut cache = self
1052 .lock_casefold_lookup_caches()
1053 .expect("casefold must be enabled");
1054 cache.exists_in_cache(parent, name)
1055 }
1056
do_lookup(&self, parent: &InodeData, name: &CStr) -> io::Result<Entry>1057 fn do_lookup(&self, parent: &InodeData, name: &CStr) -> io::Result<Entry> {
1058 #[cfg_attr(not(feature = "fs_permission_translation"), allow(unused_mut))]
1059 let mut st = statat(parent, name)?;
1060
1061 let altkey = InodeAltKey {
1062 ino: st.st_ino,
1063 dev: st.st_dev,
1064 };
1065
1066 let path = format!(
1067 "{}/{}",
1068 parent.path.clone(),
1069 name.to_str().unwrap_or("<non UTF-8 str>")
1070 );
1071
1072 // Check if we already have an entry before opening a new file.
1073 if let Some(data) = self.inodes.lock().get_alt(&altkey) {
1074 // Return the same inode with the reference counter increased.
1075 #[cfg(feature = "arc_quota")]
1076 self.set_permission(&mut st, &path);
1077 #[cfg(feature = "fs_runtime_ugid_map")]
1078 self.set_ugid_permission(&mut st, &path);
1079 return Ok(Entry {
1080 inode: self.increase_inode_refcount(data),
1081 generation: 0,
1082 attr: st,
1083 // We use the same timeout for the attribute and the entry.
1084 attr_timeout: self.cfg.timeout,
1085 entry_timeout: self.cfg.timeout,
1086 });
1087 }
1088
1089 // Open a regular file with O_RDONLY to store in `InodeData` so explicit open requests can
1090 // be skipped later if the ZERO_MESSAGE_{OPEN,OPENDIR} features are enabled.
1091 // If the crosvm process doesn't have a read permission, fall back to O_PATH below.
1092 let mut flags = libc::O_RDONLY | libc::O_NOFOLLOW | libc::O_CLOEXEC;
1093 match FileType::from(st.st_mode) {
1094 FileType::Regular => {}
1095 FileType::Directory => flags |= libc::O_DIRECTORY,
1096 FileType::Other => flags |= libc::O_PATH,
1097 };
1098
1099 // SAFETY: this doesn't modify any memory and we check the return value.
1100 let fd = match unsafe {
1101 syscall!(libc::openat64(
1102 parent.as_raw_descriptor(),
1103 name.as_ptr(),
1104 flags
1105 ))
1106 } {
1107 Ok(fd) => fd,
1108 Err(e) if e.errno() == libc::EACCES => {
1109 // If O_RDONLY is unavailable, fall back to O_PATH to get an FD to store in
1110 // `InodeData`.
1111 // Note that some operations which should be allowed without read permissions
1112 // require syscalls that don't support O_PATH fds. For those syscalls, we will
1113 // need to fall back to their path-based equivalents with /self/fd/${FD}.
1114 // e.g. `fgetxattr()` for an O_PATH FD fails while `getxaattr()` for /self/fd/${FD}
1115 // works.
1116 flags |= libc::O_PATH;
1117 // SAFETY: this doesn't modify any memory and we check the return value.
1118 unsafe {
1119 syscall!(libc::openat64(
1120 parent.as_raw_descriptor(),
1121 name.as_ptr(),
1122 flags
1123 ))
1124 }?
1125 }
1126 Err(e) => {
1127 return Err(e.into());
1128 }
1129 };
1130
1131 // SAFETY: safe because we own the fd.
1132 let f = unsafe { File::from_raw_descriptor(fd) };
1133 // We made sure the lock acquired for `self.inodes` is released automatically when
1134 // the if block above is exited, so a call to `self.add_entry()` should not cause a deadlock
1135 // here. This would not be the case if this were executed in an else block instead.
1136 Ok(self.add_entry(f, st, flags, path))
1137 }
1138
get_cache_open_options(&self, flags: u32) -> OpenOptions1139 fn get_cache_open_options(&self, flags: u32) -> OpenOptions {
1140 let mut opts = OpenOptions::empty();
1141 match self.cfg.cache_policy {
1142 // We only set the direct I/O option on files.
1143 CachePolicy::Never => opts.set(
1144 OpenOptions::DIRECT_IO,
1145 flags & (libc::O_DIRECTORY as u32) == 0,
1146 ),
1147 CachePolicy::Always => {
1148 opts |= if flags & (libc::O_DIRECTORY as u32) == 0 {
1149 OpenOptions::KEEP_CACHE
1150 } else {
1151 OpenOptions::CACHE_DIR
1152 }
1153 }
1154 _ => {}
1155 };
1156 opts
1157 }
1158
1159 // Performs lookup using original name first, if it fails and ascii_casefold is enabled,
1160 // it tries to unfold the name and do lookup again.
do_lookup_with_casefold_fallback( &self, parent: &InodeData, name: &CStr, ) -> io::Result<Entry>1161 fn do_lookup_with_casefold_fallback(
1162 &self,
1163 parent: &InodeData,
1164 name: &CStr,
1165 ) -> io::Result<Entry> {
1166 let mut res = self.do_lookup(parent, name);
1167 // If `ascii_casefold` is enabled, fallback to `ascii_casefold_lookup()`.
1168 if res.is_err() && self.cfg.ascii_casefold {
1169 res = self.ascii_casefold_lookup(parent, name.to_bytes());
1170 }
1171 res
1172 }
1173
do_open(&self, inode: Inode, flags: u32) -> io::Result<(Option<Handle>, OpenOptions)>1174 fn do_open(&self, inode: Inode, flags: u32) -> io::Result<(Option<Handle>, OpenOptions)> {
1175 let inode_data = self.find_inode(inode)?;
1176
1177 let file = Mutex::new(self.open_inode(&inode_data, flags as i32)?);
1178
1179 let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
1180 let data = HandleData { inode, file };
1181
1182 self.handles.lock().insert(handle, Arc::new(data));
1183
1184 let opts = self.get_cache_open_options(flags);
1185
1186 Ok((Some(handle), opts))
1187 }
1188
do_open_at( &self, parent_data: Arc<InodeData>, name: &CStr, inode: Inode, flags: u32, ) -> io::Result<(Option<Handle>, OpenOptions)>1189 fn do_open_at(
1190 &self,
1191 parent_data: Arc<InodeData>,
1192 name: &CStr,
1193 inode: Inode,
1194 flags: u32,
1195 ) -> io::Result<(Option<Handle>, OpenOptions)> {
1196 let open_flags = self.update_open_flags(flags as i32);
1197
1198 let fd_open = syscall!(
1199 // SAFETY: return value is checked.
1200 unsafe {
1201 libc::openat64(
1202 parent_data.as_raw_descriptor(),
1203 name.as_ptr(),
1204 (open_flags | libc::O_CLOEXEC) & !(libc::O_NOFOLLOW | libc::O_DIRECT),
1205 )
1206 }
1207 )?;
1208
1209 // SAFETY: fd_open is valid
1210 let file_open = unsafe { File::from_raw_descriptor(fd_open) };
1211 let handle = self.next_handle.fetch_add(1, Ordering::Relaxed);
1212 let data = HandleData {
1213 inode,
1214 file: Mutex::new(file_open),
1215 };
1216
1217 self.handles.lock().insert(handle, Arc::new(data));
1218
1219 let opts = self.get_cache_open_options(open_flags as u32);
1220 Ok((Some(handle), opts))
1221 }
1222
do_release(&self, inode: Inode, handle: Handle) -> io::Result<()>1223 fn do_release(&self, inode: Inode, handle: Handle) -> io::Result<()> {
1224 let mut handles = self.handles.lock();
1225
1226 if let btree_map::Entry::Occupied(e) = handles.entry(handle) {
1227 if e.get().inode == inode {
1228 // We don't need to close the file here because that will happen automatically when
1229 // the last `Arc` is dropped.
1230 e.remove();
1231 return Ok(());
1232 }
1233 }
1234
1235 Err(ebadf())
1236 }
1237
do_getattr(&self, inode: &InodeData) -> io::Result<(libc::stat64, Duration)>1238 fn do_getattr(&self, inode: &InodeData) -> io::Result<(libc::stat64, Duration)> {
1239 #[allow(unused_mut)]
1240 let mut st = stat(inode)?;
1241
1242 #[cfg(feature = "arc_quota")]
1243 self.set_permission(&mut st, &inode.path);
1244 #[cfg(feature = "fs_runtime_ugid_map")]
1245 self.set_ugid_permission(&mut st, &inode.path);
1246 Ok((st, self.cfg.timeout))
1247 }
1248
do_unlink(&self, parent: &InodeData, name: &CStr, flags: libc::c_int) -> io::Result<()>1249 fn do_unlink(&self, parent: &InodeData, name: &CStr, flags: libc::c_int) -> io::Result<()> {
1250 // SAFETY: this doesn't modify any memory and we check the return value.
1251 syscall!(unsafe { libc::unlinkat(parent.as_raw_descriptor(), name.as_ptr(), flags) })?;
1252 Ok(())
1253 }
1254
do_fsync<F: AsRawDescriptor>(&self, file: &F, datasync: bool) -> io::Result<()>1255 fn do_fsync<F: AsRawDescriptor>(&self, file: &F, datasync: bool) -> io::Result<()> {
1256 // SAFETY: this doesn't modify any memory and we check the return value.
1257 syscall!(unsafe {
1258 if datasync {
1259 libc::fdatasync(file.as_raw_descriptor())
1260 } else {
1261 libc::fsync(file.as_raw_descriptor())
1262 }
1263 })?;
1264
1265 Ok(())
1266 }
1267
1268 // Changes the CWD to `self.proc`, runs `f`, and then changes the CWD back to the root
1269 // directory. This effectively emulates an *at syscall starting at /proc, which is useful when
1270 // there is no *at syscall available. Panics if any of the fchdir calls fail or if there is no
1271 // root inode.
1272 //
1273 // NOTE: this method acquires an `self`-wide lock. If any locks are acquired in `f`, care must
1274 // be taken to avoid the risk of deadlocks.
with_proc_chdir<F, T>(&self, f: F) -> T where F: FnOnce() -> T,1275 fn with_proc_chdir<F, T>(&self, f: F) -> T
1276 where
1277 F: FnOnce() -> T,
1278 {
1279 let root = self.find_inode(ROOT_ID).expect("failed to find root inode");
1280
1281 // Acquire a lock for `fchdir`.
1282 let _proc_lock = self.process_lock.lock();
1283 // SAFETY: this doesn't modify any memory and we check the return value. Since the
1284 // fchdir should never fail we just use debug_asserts.
1285 let proc_cwd = unsafe { libc::fchdir(self.proc.as_raw_descriptor()) };
1286 debug_assert_eq!(
1287 proc_cwd,
1288 0,
1289 "failed to fchdir to /proc: {}",
1290 io::Error::last_os_error()
1291 );
1292
1293 let res = f();
1294
1295 // SAFETY: this doesn't modify any memory and we check the return value. Since the
1296 // fchdir should never fail we just use debug_asserts.
1297 let root_cwd = unsafe { libc::fchdir(root.as_raw_descriptor()) };
1298 debug_assert_eq!(
1299 root_cwd,
1300 0,
1301 "failed to fchdir back to root directory: {}",
1302 io::Error::last_os_error()
1303 );
1304
1305 res
1306 }
1307
do_getxattr(&self, inode: &InodeData, name: &CStr, value: &mut [u8]) -> io::Result<usize>1308 fn do_getxattr(&self, inode: &InodeData, name: &CStr, value: &mut [u8]) -> io::Result<usize> {
1309 let file = inode.file.lock();
1310 let o_path_file = (file.1 & libc::O_PATH) != 0;
1311 let res = if o_path_file {
1312 // For FDs opened with `O_PATH`, we cannot call `fgetxattr` normally. Instead we
1313 // emulate an _at syscall by changing the CWD to /proc, running the path based syscall,
1314 // and then setting the CWD back to the root directory.
1315 let path = CString::new(format!("self/fd/{}", file.0.as_raw_descriptor()))
1316 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1317
1318 // SAFETY: this will only modify `value` and we check the return value.
1319 self.with_proc_chdir(|| unsafe {
1320 libc::getxattr(
1321 path.as_ptr(),
1322 name.as_ptr(),
1323 value.as_mut_ptr() as *mut libc::c_void,
1324 value.len() as libc::size_t,
1325 )
1326 })
1327 } else {
1328 // For regular files and directories, we can just use fgetxattr.
1329 // SAFETY: this will only write to `value` and we check the return value.
1330 unsafe {
1331 libc::fgetxattr(
1332 file.0.as_raw_descriptor(),
1333 name.as_ptr(),
1334 value.as_mut_ptr() as *mut libc::c_void,
1335 value.len() as libc::size_t,
1336 )
1337 }
1338 };
1339
1340 if res < 0 {
1341 Err(io::Error::last_os_error())
1342 } else {
1343 Ok(res as usize)
1344 }
1345 }
1346
get_encryption_policy_ex<R: io::Read>( &self, inode: Inode, handle: Handle, mut r: R, ) -> io::Result<IoctlReply>1347 fn get_encryption_policy_ex<R: io::Read>(
1348 &self,
1349 inode: Inode,
1350 handle: Handle,
1351 mut r: R,
1352 ) -> io::Result<IoctlReply> {
1353 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1354 self.find_inode(inode)?
1355 } else {
1356 self.find_handle(handle, inode)?
1357 };
1358
1359 // SAFETY: this struct only has integer fields and any value is valid.
1360 let mut arg = unsafe { MaybeUninit::<fscrypt_get_policy_ex_arg>::zeroed().assume_init() };
1361 r.read_exact(arg.policy_size.as_bytes_mut())?;
1362
1363 let policy_size = cmp::min(arg.policy_size, size_of::<fscrypt_policy>() as u64);
1364 arg.policy_size = policy_size;
1365
1366 let res =
1367 // SAFETY: the kernel will only write to `arg` and we check the return value.
1368 unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_GET_ENCRYPTION_POLICY_EX, &mut arg) };
1369 if res < 0 {
1370 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1371 } else {
1372 let len = size_of::<u64>() + arg.policy_size as usize;
1373 Ok(IoctlReply::Done(Ok(<&[u8]>::from(&arg)[..len].to_vec())))
1374 }
1375 }
1376
get_fsxattr(&self, inode: Inode, handle: Handle) -> io::Result<IoctlReply>1377 fn get_fsxattr(&self, inode: Inode, handle: Handle) -> io::Result<IoctlReply> {
1378 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1379 self.find_inode(inode)?
1380 } else {
1381 self.find_handle(handle, inode)?
1382 };
1383
1384 let mut buf = MaybeUninit::<fsxattr>::zeroed();
1385
1386 // SAFETY: the kernel will only write to `buf` and we check the return value.
1387 let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_FSGETXATTR, buf.as_mut_ptr()) };
1388 if res < 0 {
1389 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1390 } else {
1391 // SAFETY: the kernel guarantees that the policy is now initialized.
1392 let xattr = unsafe { buf.assume_init() };
1393 Ok(IoctlReply::Done(Ok(xattr.as_bytes().to_vec())))
1394 }
1395 }
1396
set_fsxattr<R: io::Read>( &self, #[cfg_attr(not(feature = "arc_quota"), allow(unused_variables))] ctx: Context, inode: Inode, handle: Handle, mut r: R, ) -> io::Result<IoctlReply>1397 fn set_fsxattr<R: io::Read>(
1398 &self,
1399 #[cfg_attr(not(feature = "arc_quota"), allow(unused_variables))] ctx: Context,
1400 inode: Inode,
1401 handle: Handle,
1402 mut r: R,
1403 ) -> io::Result<IoctlReply> {
1404 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1405 self.find_inode(inode)?
1406 } else {
1407 self.find_handle(handle, inode)?
1408 };
1409
1410 let mut in_attr = fsxattr::new_zeroed();
1411 r.read_exact(in_attr.as_bytes_mut())?;
1412
1413 #[cfg(feature = "arc_quota")]
1414 let st = stat(&*data)?;
1415
1416 // Changing quota project ID requires CAP_FOWNER or being file owner.
1417 // Here we use privileged_quota_uids because we cannot perform a CAP_FOWNER check.
1418 #[cfg(feature = "arc_quota")]
1419 if ctx.uid == st.st_uid || self.cfg.privileged_quota_uids.contains(&ctx.uid) {
1420 // Get the current fsxattr.
1421 let mut buf = MaybeUninit::<fsxattr>::zeroed();
1422 // SAFETY: the kernel will only write to `buf` and we check the return value.
1423 let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_FSGETXATTR, buf.as_mut_ptr()) };
1424 if res < 0 {
1425 return Ok(IoctlReply::Done(Err(io::Error::last_os_error())));
1426 }
1427 // SAFETY: the kernel guarantees that the policy is now initialized.
1428 let current_attr = unsafe { buf.assume_init() };
1429
1430 // Project ID cannot be changed inside a user namespace.
1431 // Use Spaced to avoid this restriction.
1432 if current_attr.fsx_projid != in_attr.fsx_projid {
1433 let connection = self.dbus_connection.as_ref().unwrap().lock();
1434 let proxy = connection.with_proxy(
1435 "org.chromium.Spaced",
1436 "/org/chromium/Spaced",
1437 DEFAULT_DBUS_TIMEOUT,
1438 );
1439 let project_id = in_attr.fsx_projid;
1440 if !is_android_project_id(project_id) {
1441 return Err(io::Error::from_raw_os_error(libc::EINVAL));
1442 }
1443 let file_clone = base::SafeDescriptor::try_from(&*data)?;
1444 match proxy.set_project_id(file_clone.into(), project_id) {
1445 Ok(r) => {
1446 let r = SetProjectIdReply::parse_from_bytes(&r)
1447 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1448 if !r.success {
1449 return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1450 r.error,
1451 ))));
1452 }
1453 }
1454 Err(e) => {
1455 return Err(io::Error::new(io::ErrorKind::Other, e));
1456 }
1457 };
1458 }
1459 }
1460
1461 // SAFETY: this doesn't modify any memory and we check the return value.
1462 let res = unsafe { ioctl_with_ptr(&*data, FS_IOC_FSSETXATTR, &in_attr) };
1463 if res < 0 {
1464 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1465 } else {
1466 Ok(IoctlReply::Done(Ok(Vec::new())))
1467 }
1468 }
1469
get_flags(&self, inode: Inode, handle: Handle) -> io::Result<IoctlReply>1470 fn get_flags(&self, inode: Inode, handle: Handle) -> io::Result<IoctlReply> {
1471 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1472 self.find_inode(inode)?
1473 } else {
1474 self.find_handle(handle, inode)?
1475 };
1476
1477 // The ioctl encoding is a long but the parameter is actually an int.
1478 let mut flags: c_int = 0;
1479
1480 // SAFETY: the kernel will only write to `flags` and we check the return value.
1481 let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_GETFLAGS, &mut flags) };
1482 if res < 0 {
1483 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1484 } else {
1485 Ok(IoctlReply::Done(Ok(flags.to_ne_bytes().to_vec())))
1486 }
1487 }
1488
set_flags<R: io::Read>( &self, #[cfg_attr(not(feature = "arc_quota"), allow(unused_variables))] ctx: Context, inode: Inode, handle: Handle, mut r: R, ) -> io::Result<IoctlReply>1489 fn set_flags<R: io::Read>(
1490 &self,
1491 #[cfg_attr(not(feature = "arc_quota"), allow(unused_variables))] ctx: Context,
1492 inode: Inode,
1493 handle: Handle,
1494 mut r: R,
1495 ) -> io::Result<IoctlReply> {
1496 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1497 self.find_inode(inode)?
1498 } else {
1499 self.find_handle(handle, inode)?
1500 };
1501
1502 // The ioctl encoding is a long but the parameter is actually an int.
1503 let mut in_flags: c_int = 0;
1504 r.read_exact(in_flags.as_bytes_mut())?;
1505
1506 #[cfg(feature = "arc_quota")]
1507 let st = stat(&*data)?;
1508
1509 // Only privleged uid can perform FS_IOC_SETFLAGS through cryptohome.
1510 #[cfg(feature = "arc_quota")]
1511 if ctx.uid == st.st_uid || self.cfg.privileged_quota_uids.contains(&ctx.uid) {
1512 // Get the current flag.
1513 let mut buf = MaybeUninit::<c_int>::zeroed();
1514 // SAFETY: the kernel will only write to `buf` and we check the return value.
1515 let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_GETFLAGS, buf.as_mut_ptr()) };
1516 if res < 0 {
1517 return Ok(IoctlReply::Done(Err(io::Error::last_os_error())));
1518 }
1519 // SAFETY: the kernel guarantees that the policy is now initialized.
1520 let current_flags = unsafe { buf.assume_init() };
1521
1522 // Project inheritance flag cannot be changed inside a user namespace.
1523 // Use Spaced to avoid this restriction.
1524 if (in_flags & FS_PROJINHERIT_FL) != (current_flags & FS_PROJINHERIT_FL) {
1525 let connection = self.dbus_connection.as_ref().unwrap().lock();
1526 let proxy = connection.with_proxy(
1527 "org.chromium.Spaced",
1528 "/org/chromium/Spaced",
1529 DEFAULT_DBUS_TIMEOUT,
1530 );
1531 // If the input flags contain FS_PROJINHERIT_FL, then it is a set. Otherwise it is a
1532 // reset.
1533 let enable = (in_flags & FS_PROJINHERIT_FL) == FS_PROJINHERIT_FL;
1534 let file_clone = base::SafeDescriptor::try_from(&*data)?;
1535 match proxy.set_project_inheritance_flag(file_clone.into(), enable) {
1536 Ok(r) => {
1537 let r = SetProjectInheritanceFlagReply::parse_from_bytes(&r)
1538 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
1539 if !r.success {
1540 return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1541 r.error,
1542 ))));
1543 }
1544 }
1545 Err(e) => {
1546 return Err(io::Error::new(io::ErrorKind::Other, e));
1547 }
1548 };
1549 }
1550 }
1551
1552 // SAFETY: this doesn't modify any memory and we check the return value.
1553 let res = unsafe { ioctl_with_ptr(&*data, FS_IOC_SETFLAGS, &in_flags) };
1554 if res < 0 {
1555 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1556 } else {
1557 Ok(IoctlReply::Done(Ok(Vec::new())))
1558 }
1559 }
1560
enable_verity<R: io::Read>( &self, inode: Inode, handle: Handle, mut r: R, ) -> io::Result<IoctlReply>1561 fn enable_verity<R: io::Read>(
1562 &self,
1563 inode: Inode,
1564 handle: Handle,
1565 mut r: R,
1566 ) -> io::Result<IoctlReply> {
1567 let inode_data = self.find_inode(inode)?;
1568
1569 // These match the return codes from `fsverity_ioctl_enable` in the kernel.
1570 match inode_data.filetype {
1571 FileType::Regular => {}
1572 FileType::Directory => return Err(io::Error::from_raw_os_error(libc::EISDIR)),
1573 FileType::Other => return Err(io::Error::from_raw_os_error(libc::EINVAL)),
1574 }
1575
1576 {
1577 // We cannot enable verity while holding a writable fd so get a new one, if necessary.
1578 let mut file = inode_data.file.lock();
1579 let mut flags = file.1;
1580 match flags & libc::O_ACCMODE {
1581 libc::O_WRONLY | libc::O_RDWR => {
1582 flags &= !libc::O_ACCMODE;
1583 flags |= libc::O_RDONLY;
1584
1585 // We need to get a read-only handle for this file.
1586 let newfile = self.open_fd(file.0.as_raw_descriptor(), libc::O_RDONLY)?;
1587 *file = (newfile, flags);
1588 }
1589 libc::O_RDONLY => {}
1590 _ => panic!("Unexpected flags: {:#x}", flags),
1591 }
1592 }
1593
1594 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1595 inode_data
1596 } else {
1597 let data = self.find_handle(handle, inode)?;
1598
1599 {
1600 // We can't enable verity while holding a writable fd. We don't know whether the
1601 // file was opened for writing so check it here. We don't expect
1602 // this to be a frequent operation so the extra latency should be
1603 // fine.
1604 let mut file = data.file.lock();
1605 let flags = FileFlags::from_file(&*file).map_err(io::Error::from)?;
1606 match flags {
1607 FileFlags::ReadWrite | FileFlags::Write => {
1608 // We need to get a read-only handle for this file.
1609 *file = self.open_fd(file.as_raw_descriptor(), libc::O_RDONLY)?;
1610 }
1611 FileFlags::Read => {}
1612 }
1613 }
1614
1615 data
1616 };
1617
1618 let mut arg = fsverity_enable_arg::new_zeroed();
1619 r.read_exact(arg.as_bytes_mut())?;
1620
1621 let mut salt;
1622 if arg.salt_size > 0 {
1623 if arg.salt_size > self.max_buffer_size() {
1624 return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1625 libc::ENOMEM,
1626 ))));
1627 }
1628 salt = vec![0; arg.salt_size as usize];
1629 r.read_exact(&mut salt)?;
1630 arg.salt_ptr = salt.as_ptr() as usize as u64;
1631 } else {
1632 arg.salt_ptr = 0;
1633 }
1634
1635 let mut sig;
1636 if arg.sig_size > 0 {
1637 if arg.sig_size > self.max_buffer_size() {
1638 return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1639 libc::ENOMEM,
1640 ))));
1641 }
1642 sig = vec![0; arg.sig_size as usize];
1643 r.read_exact(&mut sig)?;
1644 arg.sig_ptr = sig.as_ptr() as usize as u64;
1645 } else {
1646 arg.sig_ptr = 0;
1647 }
1648
1649 // SAFETY: this doesn't modify any memory and we check the return value.
1650 let res = unsafe { ioctl_with_ptr(&*data, FS_IOC_ENABLE_VERITY, &arg) };
1651 if res < 0 {
1652 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1653 } else {
1654 Ok(IoctlReply::Done(Ok(Vec::new())))
1655 }
1656 }
1657
measure_verity<R: io::Read>( &self, inode: Inode, handle: Handle, mut r: R, out_size: u32, ) -> io::Result<IoctlReply>1658 fn measure_verity<R: io::Read>(
1659 &self,
1660 inode: Inode,
1661 handle: Handle,
1662 mut r: R,
1663 out_size: u32,
1664 ) -> io::Result<IoctlReply> {
1665 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
1666 self.find_inode(inode)?
1667 } else {
1668 self.find_handle(handle, inode)?
1669 };
1670
1671 let mut digest = fsverity_digest::new_zeroed();
1672 r.read_exact(digest.as_bytes_mut())?;
1673
1674 // Taken from fs/verity/fsverity_private.h.
1675 const FS_VERITY_MAX_DIGEST_SIZE: u16 = 64;
1676
1677 // This digest size is what the fsverity command line utility uses.
1678 const DIGEST_SIZE: u16 = FS_VERITY_MAX_DIGEST_SIZE * 2 + 1;
1679 const BUFLEN: usize = size_of::<fsverity_digest>() + DIGEST_SIZE as usize;
1680 const ROUNDED_LEN: usize =
1681 (BUFLEN + size_of::<fsverity_digest>() - 1) / size_of::<fsverity_digest>();
1682
1683 // Make sure we get a properly aligned allocation.
1684 let mut buf = [MaybeUninit::<fsverity_digest>::uninit(); ROUNDED_LEN];
1685
1686 // SAFETY: we are only writing data and not reading uninitialized memory.
1687 unsafe {
1688 // TODO: Replace with `MaybeUninit::slice_as_mut_ptr` once it is stabilized.
1689 addr_of_mut!((*(buf.as_mut_ptr() as *mut fsverity_digest)).digest_size)
1690 .write(DIGEST_SIZE)
1691 };
1692
1693 // SAFETY: this will only modify `buf` and we check the return value.
1694 let res = unsafe { ioctl_with_mut_ptr(&*data, FS_IOC_MEASURE_VERITY, buf.as_mut_ptr()) };
1695 if res < 0 {
1696 Ok(IoctlReply::Done(Err(io::Error::last_os_error())))
1697 } else {
1698 let digest_size =
1699 // SAFETY: this value was initialized by us already and then overwritten by the kernel.
1700 // TODO: Replace with `MaybeUninit::slice_as_ptr` once it is stabilized.
1701 unsafe { addr_of!((*(buf.as_ptr() as *const fsverity_digest)).digest_size).read() };
1702 let outlen = size_of::<fsverity_digest>() as u32 + u32::from(digest_size);
1703
1704 // The kernel guarantees this but it doesn't hurt to be paranoid.
1705 debug_assert!(outlen <= (ROUNDED_LEN * size_of::<fsverity_digest>()) as u32);
1706 if digest.digest_size < digest_size || out_size < outlen {
1707 return Ok(IoctlReply::Done(Err(io::Error::from_raw_os_error(
1708 libc::EOVERFLOW,
1709 ))));
1710 }
1711
1712 let buf: [MaybeUninit<u8>; ROUNDED_LEN * size_of::<fsverity_digest>()] =
1713 // SAFETY: any bit pattern is valid for `MaybeUninit<u8>` and `fsverity_digest`
1714 // doesn't contain any references.
1715 unsafe { mem::transmute(buf) };
1716
1717 let buf =
1718 // SAFETY: Casting to `*const [u8]` is safe because the kernel guarantees that the
1719 // first `outlen` bytes of `buf` are initialized and `MaybeUninit<u8>` is guaranteed
1720 // to have the same layout as `u8`.
1721 // TODO: Replace with `MaybeUninit::slice_assume_init_ref` once it is stabilized.
1722 unsafe { &*(&buf[..outlen as usize] as *const [MaybeUninit<u8>] as *const [u8]) };
1723 Ok(IoctlReply::Done(Ok(buf.to_vec())))
1724 }
1725 }
1726 }
1727
1728 #[cfg(feature = "fs_runtime_ugid_map")]
1729 impl PassthroughFs {
find_and_set_ugid_permission( &self, st: &mut libc::stat64, path: &str, is_root_path: bool, ) -> bool1730 fn find_and_set_ugid_permission(
1731 &self,
1732 st: &mut libc::stat64,
1733 path: &str,
1734 is_root_path: bool,
1735 ) -> bool {
1736 for perm_data in self
1737 .permission_paths
1738 .read()
1739 .expect("acquire permission_paths read lock")
1740 .iter()
1741 {
1742 if (is_root_path && perm_data.perm_path == "/")
1743 || (!is_root_path
1744 && perm_data.perm_path != "/"
1745 && perm_data.need_set_permission(path))
1746 {
1747 self.set_permission_from_data(st, perm_data);
1748 return true;
1749 }
1750 }
1751 false
1752 }
1753
set_permission_from_data(&self, st: &mut libc::stat64, perm_data: &PermissionData)1754 fn set_permission_from_data(&self, st: &mut libc::stat64, perm_data: &PermissionData) {
1755 st.st_uid = perm_data.guest_uid;
1756 st.st_gid = perm_data.guest_gid;
1757 st.st_mode = (st.st_mode & libc::S_IFMT) | (0o777 & !perm_data.umask);
1758 }
1759
1760 /// Set permission according to path
set_ugid_permission(&self, st: &mut libc::stat64, path: &str)1761 fn set_ugid_permission(&self, st: &mut libc::stat64, path: &str) {
1762 let is_root_path = path.is_empty();
1763
1764 if self.find_and_set_ugid_permission(st, path, is_root_path) {
1765 return;
1766 }
1767
1768 if let Some(perm_data) = self
1769 .permission_paths
1770 .read()
1771 .expect("acquire permission_paths read lock")
1772 .iter()
1773 .find(|pd| pd.perm_path == "/")
1774 {
1775 self.set_permission_from_data(st, perm_data);
1776 }
1777 }
1778
1779 /// Set host uid/gid to configured value according to path
change_ugid_creds(&self, ctx: &Context, parent_data: &InodeData, name: &CStr) -> (u32, u32)1780 fn change_ugid_creds(&self, ctx: &Context, parent_data: &InodeData, name: &CStr) -> (u32, u32) {
1781 let path = format!(
1782 "{}/{}",
1783 parent_data.path.clone(),
1784 name.to_str().unwrap_or("<non UTF-8 str>")
1785 );
1786
1787 let is_root_path = path.is_empty();
1788
1789 if self.find_ugid_creds_for_path(&path, is_root_path).is_some() {
1790 return self.find_ugid_creds_for_path(&path, is_root_path).unwrap();
1791 }
1792
1793 if let Some(perm_data) = self
1794 .permission_paths
1795 .read()
1796 .expect("acquire permission_paths read lock")
1797 .iter()
1798 .find(|pd| pd.perm_path == "/")
1799 {
1800 return (perm_data.host_uid, perm_data.host_gid);
1801 }
1802
1803 (ctx.uid, ctx.gid)
1804 }
1805
find_ugid_creds_for_path(&self, path: &str, is_root_path: bool) -> Option<(u32, u32)>1806 fn find_ugid_creds_for_path(&self, path: &str, is_root_path: bool) -> Option<(u32, u32)> {
1807 for perm_data in self
1808 .permission_paths
1809 .read()
1810 .expect("acquire permission_paths read lock")
1811 .iter()
1812 {
1813 if (is_root_path && perm_data.perm_path == "/")
1814 || (!is_root_path
1815 && perm_data.perm_path != "/"
1816 && perm_data.need_set_permission(path))
1817 {
1818 return Some((perm_data.host_uid, perm_data.host_gid));
1819 }
1820 }
1821 None
1822 }
1823 }
1824
1825 #[cfg(feature = "arc_quota")]
1826 impl PassthroughFs {
1827 /// Convert u8 slice to string
string_from_u8_slice(&self, buf: &[u8]) -> io::Result<String>1828 fn string_from_u8_slice(&self, buf: &[u8]) -> io::Result<String> {
1829 match CStr::from_bytes_until_nul(buf).map(|s| s.to_string_lossy().to_string()) {
1830 Ok(s) => Ok(s),
1831 Err(e) => {
1832 error!("fail to convert u8 slice to string: {}", e);
1833 Err(io::Error::from_raw_os_error(libc::EINVAL))
1834 }
1835 }
1836 }
1837
1838 /// Set permission according to path
set_permission(&self, st: &mut libc::stat64, path: &str)1839 fn set_permission(&self, st: &mut libc::stat64, path: &str) {
1840 for perm_data in self
1841 .permission_paths
1842 .read()
1843 .expect("acquire permission_paths read lock")
1844 .iter()
1845 {
1846 if perm_data.need_set_permission(path) {
1847 st.st_uid = perm_data.guest_uid;
1848 st.st_gid = perm_data.guest_gid;
1849 st.st_mode = (st.st_mode & libc::S_IFMT) | (0o777 & !perm_data.umask);
1850 }
1851 }
1852 }
1853
1854 /// Set host uid/gid to configured value according to path
change_creds(&self, ctx: &Context, parent_data: &InodeData, name: &CStr) -> (u32, u32)1855 fn change_creds(&self, ctx: &Context, parent_data: &InodeData, name: &CStr) -> (u32, u32) {
1856 let path = format!(
1857 "{}/{}",
1858 parent_data.path.clone(),
1859 name.to_str().unwrap_or("<non UTF-8 str>")
1860 );
1861
1862 for perm_data in self
1863 .permission_paths
1864 .read()
1865 .expect("acquire permission_paths read lock")
1866 .iter()
1867 {
1868 if perm_data.need_set_permission(&path) {
1869 return (perm_data.host_uid, perm_data.host_gid);
1870 }
1871 }
1872
1873 (ctx.uid, ctx.gid)
1874 }
1875
read_permission_data<R: io::Read>(&self, mut r: R) -> io::Result<PermissionData>1876 fn read_permission_data<R: io::Read>(&self, mut r: R) -> io::Result<PermissionData> {
1877 let mut fs_permission_data = FsPermissionDataBuffer::new_zeroed();
1878 r.read_exact(fs_permission_data.as_bytes_mut())?;
1879
1880 let perm_path = self.string_from_u8_slice(&fs_permission_data.perm_path)?;
1881 if !perm_path.starts_with('/') {
1882 error!("FS_IOC_SETPERMISSION: perm path must start with '/'");
1883 return Err(io::Error::from_raw_os_error(libc::EINVAL));
1884 }
1885 Ok(PermissionData {
1886 guest_uid: fs_permission_data.guest_uid,
1887 guest_gid: fs_permission_data.guest_gid,
1888 host_uid: fs_permission_data.host_uid,
1889 host_gid: fs_permission_data.host_gid,
1890 umask: fs_permission_data.umask,
1891 perm_path,
1892 })
1893 }
1894
1895 /// Sets uid/gid/umask for all files and directories under a specific path.
1896 ///
1897 /// This ioctl does not correspond to any upstream FUSE feature. It is used for arcvm
1898 /// It associates the specified path with the provide uid, gid, and umask values within the
1899 /// filesystem metadata.
1900 ///
1901 /// During subsequent lookup operations, the stored uid/gid/umask values are retrieved and
1902 /// applied to all files and directories found under the registered path. Before sending
1903 /// file stat information to the client, the uid and gid are substituted by `guest_uid` and
1904 /// `guest_gid` if the file falls under the registered path. The file mode is masked by the
1905 /// umask.
1906 ///
1907 /// When the guest creates a file within the specified path, the file gid/uid stat in host
1908 /// will be overwritten to `host_uid` and `host_gid` values.
1909 ///
1910 /// This functionality enables dynamic configuration of ownership and permissions for a
1911 /// specific directory hierarchy within the filesystem.
1912 ///
1913 /// # Notes
1914 /// - This method affects all existing and future files under the registered path.
1915 /// - The original file ownership and permissions are overridden by the provided values.
1916 /// - The registered path should not be renamed
1917 /// - Refer go/remove-mount-passthrough-fuse for more design details
set_permission_by_path<R: io::Read>(&self, r: R) -> IoctlReply1918 fn set_permission_by_path<R: io::Read>(&self, r: R) -> IoctlReply {
1919 if self
1920 .permission_paths
1921 .read()
1922 .expect("acquire permission_paths read lock")
1923 .len()
1924 >= self.cfg.max_dynamic_perm
1925 {
1926 error!(
1927 "FS_IOC_SETPERMISSION exceeds limits of max_dynamic_perm: {}",
1928 self.cfg.max_dynamic_perm
1929 );
1930 return IoctlReply::Done(Err(io::Error::from_raw_os_error(libc::EPERM)));
1931 }
1932
1933 let perm_data = match self.read_permission_data(r) {
1934 Ok(data) => data,
1935 Err(e) => {
1936 error!("fail to read permission data: {}", e);
1937 return IoctlReply::Done(Err(e));
1938 }
1939 };
1940
1941 self.permission_paths
1942 .write()
1943 .expect("acquire permission_paths write lock")
1944 .push(perm_data);
1945
1946 IoctlReply::Done(Ok(Vec::new()))
1947 }
1948
1949 // Get xattr value according to path and name
get_xattr_by_path(&self, path: &str, name: &str) -> Option<String>1950 fn get_xattr_by_path(&self, path: &str, name: &str) -> Option<String> {
1951 self.xattr_paths
1952 .read()
1953 .expect("acquire permission_paths read lock")
1954 .iter()
1955 .find(|data| data.need_set_guest_xattr(path, name))
1956 .map(|data| data.xattr_value.clone())
1957 }
1958
skip_host_set_xattr(&self, path: &str, name: &str) -> bool1959 fn skip_host_set_xattr(&self, path: &str, name: &str) -> bool {
1960 self.get_xattr_by_path(path, name).is_some()
1961 }
1962
read_xattr_data<R: io::Read>(&self, mut r: R) -> io::Result<XattrData>1963 fn read_xattr_data<R: io::Read>(&self, mut r: R) -> io::Result<XattrData> {
1964 let mut fs_path_xattr_data = FsPathXattrDataBuffer::new_zeroed();
1965 r.read_exact(fs_path_xattr_data.as_bytes_mut())?;
1966
1967 let xattr_path = self.string_from_u8_slice(&fs_path_xattr_data.path)?;
1968 if !xattr_path.starts_with('/') {
1969 error!("FS_IOC_SETPATHXATTR: perm path must start with '/'");
1970 return Err(io::Error::from_raw_os_error(libc::EINVAL));
1971 }
1972 let xattr_name = self.string_from_u8_slice(&fs_path_xattr_data.xattr_name)?;
1973 let xattr_value = self.string_from_u8_slice(&fs_path_xattr_data.xattr_value)?;
1974
1975 Ok(XattrData {
1976 xattr_path,
1977 xattr_name,
1978 xattr_value,
1979 })
1980 }
1981
1982 /// Sets xattr value for all files and directories under a specific path.
1983 ///
1984 /// This ioctl does not correspond to any upstream FUSE feature. It is used for arcvm.
1985 /// It associates the specified path and xattr name with a value.
1986 ///
1987 /// When the getxattr is called for the specified path and name, the predefined
1988 /// value is returned.
1989 ///
1990 /// # Notes
1991 /// - This method affects all existing and future files under the registered path.
1992 /// - The SECURITY_CONTEXT feature will be disabled if this ioctl is enabled.
1993 /// - The registered path should not be renamed
1994 /// - Refer go/remove-mount-passthrough-fuse for more design details
set_xattr_by_path<R: io::Read>(&self, r: R) -> IoctlReply1995 fn set_xattr_by_path<R: io::Read>(&self, r: R) -> IoctlReply {
1996 if self
1997 .xattr_paths
1998 .read()
1999 .expect("acquire xattr_paths read lock")
2000 .len()
2001 >= self.cfg.max_dynamic_xattr
2002 {
2003 error!(
2004 "FS_IOC_SETPATHXATTR exceeds limits of max_dynamic_xattr: {}",
2005 self.cfg.max_dynamic_xattr
2006 );
2007 return IoctlReply::Done(Err(io::Error::from_raw_os_error(libc::EPERM)));
2008 }
2009
2010 let xattr_data = match self.read_xattr_data(r) {
2011 Ok(data) => data,
2012 Err(e) => {
2013 error!("fail to read xattr data: {}", e);
2014 return IoctlReply::Done(Err(e));
2015 }
2016 };
2017
2018 self.xattr_paths
2019 .write()
2020 .expect("acquire xattr_paths write lock")
2021 .push(xattr_data);
2022
2023 IoctlReply::Done(Ok(Vec::new()))
2024 }
2025
do_getxattr_with_filter( &self, data: Arc<InodeData>, name: Cow<CStr>, buf: &mut [u8], ) -> io::Result<usize>2026 fn do_getxattr_with_filter(
2027 &self,
2028 data: Arc<InodeData>,
2029 name: Cow<CStr>,
2030 buf: &mut [u8],
2031 ) -> io::Result<usize> {
2032 let res: usize = match self.get_xattr_by_path(&data.path, &name.to_string_lossy()) {
2033 Some(predifined_xattr) => {
2034 let x = predifined_xattr.into_bytes();
2035 if x.len() > buf.len() {
2036 return Err(io::Error::from_raw_os_error(libc::ERANGE));
2037 }
2038 buf[..x.len()].copy_from_slice(&x);
2039 x.len()
2040 }
2041 None => self.do_getxattr(&data, &name, &mut buf[..])?,
2042 };
2043 Ok(res)
2044 }
2045 }
2046
2047 /// Decrements the refcount of the inode.
2048 /// Returns `true` if the refcount became 0.
forget_one( inodes: &mut MultikeyBTreeMap<Inode, InodeAltKey, Arc<InodeData>>, inode: Inode, count: u64, ) -> bool2049 fn forget_one(
2050 inodes: &mut MultikeyBTreeMap<Inode, InodeAltKey, Arc<InodeData>>,
2051 inode: Inode,
2052 count: u64,
2053 ) -> bool {
2054 if let Some(data) = inodes.get(&inode) {
2055 // Acquiring the write lock on the inode map prevents new lookups from incrementing the
2056 // refcount but there is the possibility that a previous lookup already acquired a
2057 // reference to the inode data and is in the process of updating the refcount so we need
2058 // to loop here until we can decrement successfully.
2059 loop {
2060 let refcount = data.refcount.load(Ordering::Relaxed);
2061
2062 // Saturating sub because it doesn't make sense for a refcount to go below zero and
2063 // we don't want misbehaving clients to cause integer overflow.
2064 let new_count = refcount.saturating_sub(count);
2065
2066 // Synchronizes with the acquire load in `do_lookup`.
2067 if data
2068 .refcount
2069 .compare_exchange_weak(refcount, new_count, Ordering::Release, Ordering::Relaxed)
2070 .is_ok()
2071 {
2072 if new_count == 0 {
2073 // We just removed the last refcount for this inode. There's no need for an
2074 // acquire fence here because we hold a write lock on the inode map and any
2075 // thread that is waiting to do a forget on the same inode will have to wait
2076 // until we release the lock. So there's is no other release store for us to
2077 // synchronize with before deleting the entry.
2078 inodes.remove(&inode);
2079 return true;
2080 }
2081 break;
2082 }
2083 }
2084 }
2085 false
2086 }
2087
2088 // Strips any `user.virtiofs.` prefix from `buf`. If buf contains one or more nul-bytes, each
2089 // nul-byte-separated slice is treated as a C string and the prefix is stripped from each one.
strip_xattr_prefix(buf: &mut Vec<u8>)2090 fn strip_xattr_prefix(buf: &mut Vec<u8>) {
2091 fn next_cstr(b: &[u8], start: usize) -> Option<&[u8]> {
2092 if start >= b.len() {
2093 return None;
2094 }
2095
2096 let end = b[start..]
2097 .iter()
2098 .position(|&c| c == b'\0')
2099 .map(|p| start + p + 1)
2100 .unwrap_or(b.len());
2101
2102 Some(&b[start..end])
2103 }
2104
2105 let mut pos = 0;
2106 while let Some(name) = next_cstr(buf, pos) {
2107 if !name.starts_with(USER_VIRTIOFS_XATTR) {
2108 pos += name.len();
2109 continue;
2110 }
2111
2112 let newlen = name.len() - USER_VIRTIOFS_XATTR.len();
2113 buf.drain(pos..pos + USER_VIRTIOFS_XATTR.len());
2114 pos += newlen;
2115 }
2116 }
2117
2118 impl FileSystem for PassthroughFs {
2119 type Inode = Inode;
2120 type Handle = Handle;
2121 type DirIter = ReadDir<Box<[u8]>>;
2122
init(&self, capable: FsOptions) -> io::Result<FsOptions>2123 fn init(&self, capable: FsOptions) -> io::Result<FsOptions> {
2124 let root = CString::new(self.root_dir.clone())
2125 .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?;
2126
2127 let flags = libc::O_DIRECTORY | libc::O_NOFOLLOW | libc::O_CLOEXEC;
2128 // SAFETY: this doesn't modify any memory and we check the return value.
2129 let raw_descriptor = unsafe { libc::openat64(libc::AT_FDCWD, root.as_ptr(), flags) };
2130 if raw_descriptor < 0 {
2131 return Err(io::Error::last_os_error());
2132 }
2133
2134 // SAFETY: safe because we just opened this descriptor above.
2135 let f = unsafe { File::from_raw_descriptor(raw_descriptor) };
2136
2137 let st = stat(&f)?;
2138
2139 // SAFETY: this doesn't modify any memory and there is no need to check the return
2140 // value because this system call always succeeds. We need to clear the umask here because
2141 // we want the client to be able to set all the bits in the mode.
2142 unsafe { libc::umask(0o000) };
2143
2144 let mut inodes = self.inodes.lock();
2145
2146 // Not sure why the root inode gets a refcount of 2 but that's what libfuse does.
2147 inodes.insert(
2148 ROOT_ID,
2149 InodeAltKey {
2150 ino: st.st_ino,
2151 dev: st.st_dev,
2152 },
2153 Arc::new(InodeData {
2154 inode: ROOT_ID,
2155 file: Mutex::new((f, flags)),
2156 refcount: AtomicU64::new(2),
2157 filetype: st.st_mode.into(),
2158 path: "".to_string(),
2159 }),
2160 );
2161
2162 let mut opts = FsOptions::DO_READDIRPLUS
2163 | FsOptions::READDIRPLUS_AUTO
2164 | FsOptions::EXPORT_SUPPORT
2165 | FsOptions::DONT_MASK
2166 | FsOptions::CACHE_SYMLINKS;
2167
2168 // Device using dynamic xattr feature will have different security context in
2169 // host and guests. The SECURITY_CONTEXT feature should not be enabled in the
2170 // device.
2171 if self.cfg.max_dynamic_xattr == 0 && self.cfg.security_ctx {
2172 opts |= FsOptions::SECURITY_CONTEXT;
2173 }
2174
2175 if self.cfg.posix_acl {
2176 opts |= FsOptions::POSIX_ACL;
2177 }
2178 if self.cfg.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) {
2179 opts |= FsOptions::WRITEBACK_CACHE;
2180 self.writeback.store(true, Ordering::Relaxed);
2181 }
2182 if self.cfg.cache_policy == CachePolicy::Always {
2183 if capable.contains(FsOptions::ZERO_MESSAGE_OPEN) {
2184 opts |= FsOptions::ZERO_MESSAGE_OPEN;
2185 self.zero_message_open.store(true, Ordering::Relaxed);
2186 }
2187 if capable.contains(FsOptions::ZERO_MESSAGE_OPENDIR) {
2188 opts |= FsOptions::ZERO_MESSAGE_OPENDIR;
2189 self.zero_message_opendir.store(true, Ordering::Relaxed);
2190 }
2191 }
2192 Ok(opts)
2193 }
2194
destroy(&self)2195 fn destroy(&self) {
2196 cros_tracing::trace_simple_print!(VirtioFs, "{:?}: destroy", self);
2197 self.handles.lock().clear();
2198 self.inodes.lock().clear();
2199 }
2200
statfs(&self, _ctx: Context, inode: Inode) -> io::Result<libc::statvfs64>2201 fn statfs(&self, _ctx: Context, inode: Inode) -> io::Result<libc::statvfs64> {
2202 let _trace = fs_trace!(self.tag, "statfs", inode);
2203 let data = self.find_inode(inode)?;
2204
2205 let mut out = MaybeUninit::<libc::statvfs64>::zeroed();
2206
2207 // SAFETY: this will only modify `out` and we check the return value.
2208 syscall!(unsafe { libc::fstatvfs64(data.as_raw_descriptor(), out.as_mut_ptr()) })?;
2209
2210 // SAFETY: the kernel guarantees that `out` has been initialized.
2211 Ok(unsafe { out.assume_init() })
2212 }
2213
lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry>2214 fn lookup(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<Entry> {
2215 let data = self.find_inode(parent)?;
2216 #[allow(unused_variables)]
2217 let path = format!(
2218 "{}/{}",
2219 data.path,
2220 name.to_str().unwrap_or("<non UTF-8 path>")
2221 );
2222 let _trace = fs_trace!(self.tag, "lookup", parent, path);
2223
2224 let mut res = self.do_lookup_with_casefold_fallback(&data, name);
2225
2226 // FUSE takes a inode=0 as a request to do negative dentry cache.
2227 // So, if `negative_timeout` is set, return success with the timeout value and inode=0 as a
2228 // response.
2229 if let Err(e) = &res {
2230 if e.kind() == std::io::ErrorKind::NotFound && !self.cfg.negative_timeout.is_zero() {
2231 res = Ok(Entry::new_negative(self.cfg.negative_timeout));
2232 }
2233 }
2234
2235 res
2236 }
2237
forget(&self, _ctx: Context, inode: Inode, count: u64)2238 fn forget(&self, _ctx: Context, inode: Inode, count: u64) {
2239 let _trace = fs_trace!(self.tag, "forget", inode, count);
2240 let mut inodes = self.inodes.lock();
2241 let caches = self.lock_casefold_lookup_caches();
2242 if forget_one(&mut inodes, inode, count) {
2243 if let Some(mut c) = caches {
2244 c.forget(inode);
2245 }
2246 }
2247 }
2248
batch_forget(&self, _ctx: Context, requests: Vec<(Inode, u64)>)2249 fn batch_forget(&self, _ctx: Context, requests: Vec<(Inode, u64)>) {
2250 let mut inodes = self.inodes.lock();
2251 let mut caches = self.lock_casefold_lookup_caches();
2252 for (inode, count) in requests {
2253 if forget_one(&mut inodes, inode, count) {
2254 if let Some(c) = caches.as_mut() {
2255 c.forget(inode);
2256 }
2257 }
2258 }
2259 }
2260
opendir( &self, _ctx: Context, inode: Inode, flags: u32, ) -> io::Result<(Option<Handle>, OpenOptions)>2261 fn opendir(
2262 &self,
2263 _ctx: Context,
2264 inode: Inode,
2265 flags: u32,
2266 ) -> io::Result<(Option<Handle>, OpenOptions)> {
2267 let _trace = fs_trace!(self.tag, "opendir", inode, flags);
2268 if self.zero_message_opendir.load(Ordering::Relaxed) {
2269 Err(io::Error::from_raw_os_error(libc::ENOSYS))
2270 } else {
2271 self.do_open(inode, flags | (libc::O_DIRECTORY as u32))
2272 }
2273 }
2274
releasedir( &self, _ctx: Context, inode: Inode, _flags: u32, handle: Handle, ) -> io::Result<()>2275 fn releasedir(
2276 &self,
2277 _ctx: Context,
2278 inode: Inode,
2279 _flags: u32,
2280 handle: Handle,
2281 ) -> io::Result<()> {
2282 let _trace = fs_trace!(self.tag, "releasedir", inode, handle);
2283 if self.zero_message_opendir.load(Ordering::Relaxed) {
2284 Ok(())
2285 } else {
2286 self.do_release(inode, handle)
2287 }
2288 }
2289
mkdir( &self, ctx: Context, parent: Inode, name: &CStr, mode: u32, umask: u32, security_ctx: Option<&CStr>, ) -> io::Result<Entry>2290 fn mkdir(
2291 &self,
2292 ctx: Context,
2293 parent: Inode,
2294 name: &CStr,
2295 mode: u32,
2296 umask: u32,
2297 security_ctx: Option<&CStr>,
2298 ) -> io::Result<Entry> {
2299 let _trace = fs_trace!(self.tag, "mkdir", parent, name, mode, umask, security_ctx);
2300 let data = self.find_inode(parent)?;
2301
2302 let _ctx = security_ctx
2303 .filter(|ctx| *ctx != UNLABELED_CSTR)
2304 .map(|ctx| ScopedSecurityContext::new(&self.proc, ctx))
2305 .transpose()?;
2306
2307 #[allow(unused_variables)]
2308 #[cfg(feature = "arc_quota")]
2309 let (uid, gid) = self.change_creds(&ctx, &data, name);
2310 #[cfg(feature = "fs_runtime_ugid_map")]
2311 let (uid, gid) = self.change_ugid_creds(&ctx, &data, name);
2312 #[cfg(not(feature = "fs_permission_translation"))]
2313 let (uid, gid) = (ctx.uid, ctx.gid);
2314
2315 let (_uid, _gid) = set_creds(uid, gid)?;
2316 {
2317 let casefold_cache = self.lock_casefold_lookup_caches();
2318 let _scoped_umask = ScopedUmask::new(umask);
2319
2320 // SAFETY: this doesn't modify any memory and we check the return value.
2321 syscall!(unsafe { libc::mkdirat(data.as_raw_descriptor(), name.as_ptr(), mode) })?;
2322 if let Some(mut c) = casefold_cache {
2323 c.insert(data.inode, name);
2324 }
2325 }
2326 self.do_lookup(&data, name)
2327 }
2328
rmdir(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()>2329 fn rmdir(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> {
2330 let _trace = fs_trace!(self.tag, "rmdir", parent, name);
2331 let data = self.find_inode(parent)?;
2332 let casefold_cache = self.lock_casefold_lookup_caches();
2333 // TODO(b/278691962): If ascii_casefold is enabled, we need to call
2334 // `get_case_unfolded_name()` to get the actual name to be unlinked.
2335 self.do_unlink(&data, name, libc::AT_REMOVEDIR)?;
2336 if let Some(mut c) = casefold_cache {
2337 c.remove(data.inode, name);
2338 }
2339 Ok(())
2340 }
2341
readdir( &self, _ctx: Context, inode: Inode, handle: Handle, size: u32, offset: u64, ) -> io::Result<Self::DirIter>2342 fn readdir(
2343 &self,
2344 _ctx: Context,
2345 inode: Inode,
2346 handle: Handle,
2347 size: u32,
2348 offset: u64,
2349 ) -> io::Result<Self::DirIter> {
2350 let _trace = fs_trace!(self.tag, "readdir", inode, handle, size, offset);
2351 let buf = vec![0; size as usize].into_boxed_slice();
2352
2353 if self.zero_message_opendir.load(Ordering::Relaxed) {
2354 let data = self.find_inode(inode)?;
2355 ReadDir::new(&*data, offset as libc::off64_t, buf)
2356 } else {
2357 let data = self.find_handle(handle, inode)?;
2358
2359 let dir = data.file.lock();
2360
2361 ReadDir::new(&*dir, offset as libc::off64_t, buf)
2362 }
2363 }
2364
open( &self, _ctx: Context, inode: Inode, flags: u32, ) -> io::Result<(Option<Handle>, OpenOptions)>2365 fn open(
2366 &self,
2367 _ctx: Context,
2368 inode: Inode,
2369 flags: u32,
2370 ) -> io::Result<(Option<Handle>, OpenOptions)> {
2371 if self.zero_message_open.load(Ordering::Relaxed) {
2372 let _trace = fs_trace!(self.tag, "open (zero-message)", inode, flags);
2373 Err(io::Error::from_raw_os_error(libc::ENOSYS))
2374 } else {
2375 let _trace = fs_trace!(self.tag, "open", inode, flags);
2376 self.do_open(inode, flags)
2377 }
2378 }
2379
release( &self, _ctx: Context, inode: Inode, _flags: u32, handle: Handle, _flush: bool, _flock_release: bool, _lock_owner: Option<u64>, ) -> io::Result<()>2380 fn release(
2381 &self,
2382 _ctx: Context,
2383 inode: Inode,
2384 _flags: u32,
2385 handle: Handle,
2386 _flush: bool,
2387 _flock_release: bool,
2388 _lock_owner: Option<u64>,
2389 ) -> io::Result<()> {
2390 if self.zero_message_open.load(Ordering::Relaxed) {
2391 let _trace = fs_trace!(self.tag, "release (zero-message)", inode, handle);
2392 Ok(())
2393 } else {
2394 let _trace = fs_trace!(self.tag, "release", inode, handle);
2395 self.do_release(inode, handle)
2396 }
2397 }
2398
chromeos_tmpfile( &self, ctx: Context, parent: Self::Inode, mode: u32, umask: u32, security_ctx: Option<&CStr>, ) -> io::Result<Entry>2399 fn chromeos_tmpfile(
2400 &self,
2401 ctx: Context,
2402 parent: Self::Inode,
2403 mode: u32,
2404 umask: u32,
2405 security_ctx: Option<&CStr>,
2406 ) -> io::Result<Entry> {
2407 let _trace = fs_trace!(
2408 self.tag,
2409 "chromeos_tempfile",
2410 parent,
2411 mode,
2412 umask,
2413 security_ctx
2414 );
2415 let data = self.find_inode(parent)?;
2416
2417 let _ctx = security_ctx
2418 .filter(|ctx| *ctx != UNLABELED_CSTR)
2419 .map(|ctx| ScopedSecurityContext::new(&self.proc, ctx))
2420 .transpose()?;
2421
2422 let tmpflags = libc::O_RDWR | libc::O_TMPFILE | libc::O_CLOEXEC | libc::O_NOFOLLOW;
2423
2424 let current_dir = c".";
2425
2426 #[allow(unused_variables)]
2427 #[cfg(feature = "arc_quota")]
2428 let (uid, gid) = self.change_creds(&ctx, &data, current_dir);
2429 #[cfg(feature = "fs_runtime_ugid_map")]
2430 let (uid, gid) = self.change_ugid_creds(&ctx, &data, current_dir);
2431 #[cfg(not(feature = "fs_permission_translation"))]
2432 let (uid, gid) = (ctx.uid, ctx.gid);
2433
2434 let (_uid, _gid) = set_creds(uid, gid)?;
2435
2436 let fd = {
2437 let _scoped_umask = ScopedUmask::new(umask);
2438
2439 // SAFETY: this doesn't modify any memory and we check the return value.
2440 syscall!(unsafe {
2441 libc::openat64(
2442 data.as_raw_descriptor(),
2443 current_dir.as_ptr(),
2444 tmpflags,
2445 mode,
2446 )
2447 })?
2448 };
2449 // No need to add casefold_cache becuase we created an anonymous file.
2450
2451 // SAFETY: safe because we just opened this fd.
2452 let tmpfile = unsafe { File::from_raw_descriptor(fd) };
2453 let st = stat(&tmpfile)?;
2454 let path = format!(
2455 "{}/{}",
2456 data.path.clone(),
2457 current_dir.to_str().unwrap_or("<non UTF-8 str>")
2458 );
2459 Ok(self.add_entry(tmpfile, st, tmpflags, path))
2460 }
2461
create( &self, ctx: Context, parent: Inode, name: &CStr, mode: u32, flags: u32, umask: u32, security_ctx: Option<&CStr>, ) -> io::Result<(Entry, Option<Handle>, OpenOptions)>2462 fn create(
2463 &self,
2464 ctx: Context,
2465 parent: Inode,
2466 name: &CStr,
2467 mode: u32,
2468 flags: u32,
2469 umask: u32,
2470 security_ctx: Option<&CStr>,
2471 ) -> io::Result<(Entry, Option<Handle>, OpenOptions)> {
2472 let _trace = fs_trace!(
2473 self.tag,
2474 "create",
2475 parent,
2476 name,
2477 mode,
2478 flags,
2479 umask,
2480 security_ctx
2481 );
2482 let data = self.find_inode(parent)?;
2483
2484 let _ctx = security_ctx
2485 .filter(|ctx| *ctx != UNLABELED_CSTR)
2486 .map(|ctx| ScopedSecurityContext::new(&self.proc, ctx))
2487 .transpose()?;
2488
2489 #[allow(unused_variables)]
2490 #[cfg(feature = "arc_quota")]
2491 let (uid, gid) = self.change_creds(&ctx, &data, name);
2492 #[cfg(feature = "fs_runtime_ugid_map")]
2493 let (uid, gid) = self.change_ugid_creds(&ctx, &data, name);
2494 #[cfg(not(feature = "fs_permission_translation"))]
2495 let (uid, gid) = (ctx.uid, ctx.gid);
2496
2497 let (_uid, _gid) = set_creds(uid, gid)?;
2498
2499 let flags = self.update_open_flags(flags as i32);
2500 let create_flags =
2501 (flags | libc::O_CREAT | libc::O_CLOEXEC | libc::O_NOFOLLOW) & !libc::O_DIRECT;
2502
2503 let fd = {
2504 let _scoped_umask = ScopedUmask::new(umask);
2505 let casefold_cache = self.lock_casefold_lookup_caches();
2506
2507 // SAFETY: this doesn't modify any memory and we check the return value. We don't really
2508 // check `flags` because if the kernel can't handle poorly specified flags then we have
2509 // much bigger problems.
2510 // TODO(b/278691962): If ascii_casefold is enabled, we need to call
2511 // `get_case_unfolded_name()` to get the actual name to be created.
2512 let fd = syscall!(unsafe {
2513 libc::openat64(data.as_raw_descriptor(), name.as_ptr(), create_flags, mode)
2514 })?;
2515 if let Some(mut c) = casefold_cache {
2516 c.insert(parent, name);
2517 }
2518 fd
2519 };
2520
2521 // SAFETY: safe because we just opened this fd.
2522 let file = unsafe { File::from_raw_descriptor(fd) };
2523
2524 let st = stat(&file)?;
2525 let path = format!(
2526 "{}/{}",
2527 data.path.clone(),
2528 name.to_str().unwrap_or("<non UTF-8 str>")
2529 );
2530 let entry = self.add_entry(file, st, create_flags, path);
2531
2532 let (handle, opts) = if self.zero_message_open.load(Ordering::Relaxed) {
2533 (None, OpenOptions::KEEP_CACHE)
2534 } else {
2535 self.do_open_at(
2536 data,
2537 name,
2538 entry.inode,
2539 flags as u32 & !((libc::O_CREAT | libc::O_EXCL | libc::O_NOCTTY) as u32),
2540 )
2541 .inspect_err(|_e| {
2542 // Don't leak the entry.
2543 self.forget(ctx, entry.inode, 1);
2544 })?
2545 };
2546 Ok((entry, handle, opts))
2547 }
2548
unlink(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()>2549 fn unlink(&self, _ctx: Context, parent: Inode, name: &CStr) -> io::Result<()> {
2550 let _trace = fs_trace!(self.tag, "unlink", parent, name);
2551 let data = self.find_inode(parent)?;
2552 let casefold_cache = self.lock_casefold_lookup_caches();
2553 // TODO(b/278691962): If ascii_casefold is enabled, we need to call
2554 // `get_case_unfolded_name()` to get the actual name to be unlinked.
2555 self.do_unlink(&data, name, 0)?;
2556 if let Some(mut c) = casefold_cache {
2557 c.remove(data.inode, name);
2558 }
2559 Ok(())
2560 }
2561
read<W: io::Write + ZeroCopyWriter>( &self, _ctx: Context, inode: Inode, handle: Handle, mut w: W, size: u32, offset: u64, _lock_owner: Option<u64>, _flags: u32, ) -> io::Result<usize>2562 fn read<W: io::Write + ZeroCopyWriter>(
2563 &self,
2564 _ctx: Context,
2565 inode: Inode,
2566 handle: Handle,
2567 mut w: W,
2568 size: u32,
2569 offset: u64,
2570 _lock_owner: Option<u64>,
2571 _flags: u32,
2572 ) -> io::Result<usize> {
2573 if self.zero_message_open.load(Ordering::Relaxed) {
2574 let _trace = fs_trace!(self.tag, "read (zero-message)", inode, handle, size, offset);
2575 let data = self.find_inode(inode)?;
2576
2577 let mut file = data.file.lock();
2578 let mut flags = file.1;
2579 match flags & libc::O_ACCMODE {
2580 libc::O_WRONLY => {
2581 flags &= !libc::O_WRONLY;
2582 flags |= libc::O_RDWR;
2583
2584 // We need to get a readable handle for this file.
2585 let newfile = self.open_fd(file.0.as_raw_descriptor(), libc::O_RDWR)?;
2586 *file = (newfile, flags);
2587 }
2588 libc::O_RDONLY | libc::O_RDWR => {}
2589 _ => panic!("Unexpected flags: {:#x}", flags),
2590 }
2591
2592 w.write_from(&mut file.0, size as usize, offset)
2593 } else {
2594 let _trace = fs_trace!(self.tag, "read", inode, handle, size, offset);
2595 let data = self.find_handle(handle, inode)?;
2596
2597 let mut f = data.file.lock();
2598 w.write_from(&mut f, size as usize, offset)
2599 }
2600 }
2601
write<R: io::Read + ZeroCopyReader>( &self, _ctx: Context, inode: Inode, handle: Handle, mut r: R, size: u32, offset: u64, _lock_owner: Option<u64>, _delayed_write: bool, flags: u32, ) -> io::Result<usize>2602 fn write<R: io::Read + ZeroCopyReader>(
2603 &self,
2604 _ctx: Context,
2605 inode: Inode,
2606 handle: Handle,
2607 mut r: R,
2608 size: u32,
2609 offset: u64,
2610 _lock_owner: Option<u64>,
2611 _delayed_write: bool,
2612 flags: u32,
2613 ) -> io::Result<usize> {
2614 // When the WRITE_KILL_PRIV flag is set, drop CAP_FSETID so that the kernel will
2615 // automatically clear the setuid and setgid bits for us.
2616 let _fsetid = if flags & WRITE_KILL_PRIV != 0 {
2617 Some(drop_cap_fsetid()?)
2618 } else {
2619 None
2620 };
2621
2622 if self.zero_message_open.load(Ordering::Relaxed) {
2623 let _trace = fs_trace!(
2624 self.tag,
2625 "write (zero-message)",
2626 inode,
2627 handle,
2628 size,
2629 offset
2630 );
2631
2632 let data = self.find_inode(inode)?;
2633
2634 let mut file = data.file.lock();
2635 let mut flags = file.1;
2636 match flags & libc::O_ACCMODE {
2637 libc::O_RDONLY => {
2638 flags &= !libc::O_RDONLY;
2639 flags |= libc::O_RDWR;
2640
2641 // We need to get a writable handle for this file.
2642 let newfile = self.open_fd(file.0.as_raw_descriptor(), libc::O_RDWR)?;
2643 *file = (newfile, flags);
2644 }
2645 libc::O_WRONLY | libc::O_RDWR => {}
2646 _ => panic!("Unexpected flags: {:#x}", flags),
2647 }
2648
2649 r.read_to(&mut file.0, size as usize, offset)
2650 } else {
2651 let _trace = fs_trace!(self.tag, "write", inode, handle, size, offset);
2652
2653 let data = self.find_handle(handle, inode)?;
2654
2655 let mut f = data.file.lock();
2656 r.read_to(&mut f, size as usize, offset)
2657 }
2658 }
2659
getattr( &self, _ctx: Context, inode: Inode, _handle: Option<Handle>, ) -> io::Result<(libc::stat64, Duration)>2660 fn getattr(
2661 &self,
2662 _ctx: Context,
2663 inode: Inode,
2664 _handle: Option<Handle>,
2665 ) -> io::Result<(libc::stat64, Duration)> {
2666 let _trace = fs_trace!(self.tag, "getattr", inode, _handle);
2667
2668 let data = self.find_inode(inode)?;
2669 self.do_getattr(&data)
2670 }
2671
setattr( &self, _ctx: Context, inode: Inode, attr: libc::stat64, handle: Option<Handle>, valid: SetattrValid, ) -> io::Result<(libc::stat64, Duration)>2672 fn setattr(
2673 &self,
2674 _ctx: Context,
2675 inode: Inode,
2676 attr: libc::stat64,
2677 handle: Option<Handle>,
2678 valid: SetattrValid,
2679 ) -> io::Result<(libc::stat64, Duration)> {
2680 let _trace = fs_trace!(self.tag, "setattr", inode, handle);
2681 let inode_data = self.find_inode(inode)?;
2682
2683 enum Data<'a> {
2684 Handle(MutexGuard<'a, File>),
2685 ProcPath(CString),
2686 }
2687
2688 // If we have a handle then use it otherwise get a new fd from the inode.
2689 let hd;
2690 let data = if let Some(handle) = handle.filter(|&h| h != 0) {
2691 hd = self.find_handle(handle, inode)?;
2692 Data::Handle(hd.file.lock())
2693 } else {
2694 let pathname = CString::new(format!("self/fd/{}", inode_data.as_raw_descriptor()))
2695 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2696 Data::ProcPath(pathname)
2697 };
2698
2699 if valid.contains(SetattrValid::MODE) {
2700 // SAFETY: this doesn't modify any memory and we check the return value.
2701 syscall!(unsafe {
2702 match data {
2703 Data::Handle(ref fd) => libc::fchmod(fd.as_raw_descriptor(), attr.st_mode),
2704 Data::ProcPath(ref p) => {
2705 libc::fchmodat(self.proc.as_raw_descriptor(), p.as_ptr(), attr.st_mode, 0)
2706 }
2707 }
2708 })?;
2709 }
2710
2711 if valid.intersects(SetattrValid::UID | SetattrValid::GID) {
2712 let uid = if valid.contains(SetattrValid::UID) {
2713 attr.st_uid
2714 } else {
2715 // Cannot use -1 here because these are unsigned values.
2716 u32::MAX
2717 };
2718 let gid = if valid.contains(SetattrValid::GID) {
2719 attr.st_gid
2720 } else {
2721 // Cannot use -1 here because these are unsigned values.
2722 u32::MAX
2723 };
2724
2725 // SAFETY: this doesn't modify any memory and we check the return value.
2726 syscall!(unsafe {
2727 libc::fchownat(
2728 inode_data.as_raw_descriptor(),
2729 EMPTY_CSTR.as_ptr(),
2730 uid,
2731 gid,
2732 libc::AT_EMPTY_PATH | libc::AT_SYMLINK_NOFOLLOW,
2733 )
2734 })?;
2735 }
2736
2737 if valid.contains(SetattrValid::SIZE) {
2738 syscall!(match data {
2739 Data::Handle(ref fd) => {
2740 // SAFETY: this doesn't modify any memory and we check the return value.
2741 unsafe { libc::ftruncate64(fd.as_raw_descriptor(), attr.st_size) }
2742 }
2743 _ => {
2744 // There is no `ftruncateat` so we need to get a new fd and truncate it.
2745 let f = self.open_inode(&inode_data, libc::O_NONBLOCK | libc::O_RDWR)?;
2746 // SAFETY: this doesn't modify any memory and we check the return value.
2747 unsafe { libc::ftruncate64(f.as_raw_descriptor(), attr.st_size) }
2748 }
2749 })?;
2750 }
2751
2752 if valid.intersects(SetattrValid::ATIME | SetattrValid::MTIME) {
2753 let mut tvs = [
2754 libc::timespec {
2755 tv_sec: 0,
2756 tv_nsec: libc::UTIME_OMIT,
2757 },
2758 libc::timespec {
2759 tv_sec: 0,
2760 tv_nsec: libc::UTIME_OMIT,
2761 },
2762 ];
2763
2764 if valid.contains(SetattrValid::ATIME_NOW) {
2765 tvs[0].tv_nsec = libc::UTIME_NOW;
2766 } else if valid.contains(SetattrValid::ATIME) {
2767 tvs[0].tv_sec = attr.st_atime;
2768 tvs[0].tv_nsec = attr.st_atime_nsec;
2769 }
2770
2771 if valid.contains(SetattrValid::MTIME_NOW) {
2772 tvs[1].tv_nsec = libc::UTIME_NOW;
2773 } else if valid.contains(SetattrValid::MTIME) {
2774 tvs[1].tv_sec = attr.st_mtime;
2775 tvs[1].tv_nsec = attr.st_mtime_nsec;
2776 }
2777
2778 // SAFETY: this doesn't modify any memory and we check the return value.
2779 syscall!(unsafe {
2780 match data {
2781 Data::Handle(ref fd) => libc::futimens(fd.as_raw_descriptor(), tvs.as_ptr()),
2782 Data::ProcPath(ref p) => {
2783 libc::utimensat(self.proc.as_raw_descriptor(), p.as_ptr(), tvs.as_ptr(), 0)
2784 }
2785 }
2786 })?;
2787 }
2788
2789 self.do_getattr(&inode_data)
2790 }
2791
rename( &self, _ctx: Context, olddir: Inode, oldname: &CStr, newdir: Inode, newname: &CStr, flags: u32, ) -> io::Result<()>2792 fn rename(
2793 &self,
2794 _ctx: Context,
2795 olddir: Inode,
2796 oldname: &CStr,
2797 newdir: Inode,
2798 newname: &CStr,
2799 flags: u32,
2800 ) -> io::Result<()> {
2801 let _trace = fs_trace!(self.tag, "rename", olddir, oldname, newdir, newname, flags);
2802
2803 let old_inode = self.find_inode(olddir)?;
2804 let new_inode = self.find_inode(newdir)?;
2805 {
2806 let casefold_cache = self.lock_casefold_lookup_caches();
2807
2808 // SAFETY: this doesn't modify any memory and we check the return value.
2809 // TODO: Switch to libc::renameat2 once https://github.com/rust-lang/libc/pull/1508 lands
2810 // and we have glibc 2.28.
2811 syscall!(unsafe {
2812 libc::syscall(
2813 libc::SYS_renameat2,
2814 old_inode.as_raw_descriptor(),
2815 oldname.as_ptr(),
2816 new_inode.as_raw_descriptor(),
2817 newname.as_ptr(),
2818 flags,
2819 )
2820 })?;
2821 if let Some(mut c) = casefold_cache {
2822 c.remove(olddir, oldname);
2823 c.insert(newdir, newname);
2824 }
2825 }
2826
2827 Ok(())
2828 }
2829
mknod( &self, ctx: Context, parent: Inode, name: &CStr, mode: u32, rdev: u32, umask: u32, security_ctx: Option<&CStr>, ) -> io::Result<Entry>2830 fn mknod(
2831 &self,
2832 ctx: Context,
2833 parent: Inode,
2834 name: &CStr,
2835 mode: u32,
2836 rdev: u32,
2837 umask: u32,
2838 security_ctx: Option<&CStr>,
2839 ) -> io::Result<Entry> {
2840 let _trace = fs_trace!(
2841 self.tag,
2842 "mknod",
2843 parent,
2844 name,
2845 mode,
2846 rdev,
2847 umask,
2848 security_ctx
2849 );
2850 let data = self.find_inode(parent)?;
2851
2852 let _ctx = security_ctx
2853 .filter(|ctx| *ctx != UNLABELED_CSTR)
2854 .map(|ctx| ScopedSecurityContext::new(&self.proc, ctx))
2855 .transpose()?;
2856
2857 #[allow(unused_variables)]
2858 #[cfg(feature = "arc_quota")]
2859 let (uid, gid) = self.change_creds(&ctx, &data, name);
2860 #[cfg(feature = "fs_runtime_ugid_map")]
2861 let (uid, gid) = self.change_ugid_creds(&ctx, &data, name);
2862 #[cfg(not(feature = "fs_permission_translation"))]
2863 let (uid, gid) = (ctx.uid, ctx.gid);
2864
2865 let (_uid, _gid) = set_creds(uid, gid)?;
2866 {
2867 let _scoped_umask = ScopedUmask::new(umask);
2868 let casefold_cache = self.lock_casefold_lookup_caches();
2869
2870 // SAFETY: this doesn't modify any memory and we check the return value.
2871 syscall!(unsafe {
2872 libc::mknodat(
2873 data.as_raw_descriptor(),
2874 name.as_ptr(),
2875 mode as libc::mode_t,
2876 rdev as libc::dev_t,
2877 )
2878 })?;
2879 if let Some(mut c) = casefold_cache {
2880 c.insert(parent, name);
2881 }
2882 }
2883
2884 self.do_lookup(&data, name)
2885 }
2886
link( &self, _ctx: Context, inode: Inode, newparent: Inode, newname: &CStr, ) -> io::Result<Entry>2887 fn link(
2888 &self,
2889 _ctx: Context,
2890 inode: Inode,
2891 newparent: Inode,
2892 newname: &CStr,
2893 ) -> io::Result<Entry> {
2894 let _trace = fs_trace!(self.tag, "link", inode, newparent, newname);
2895 let data = self.find_inode(inode)?;
2896 let new_inode = self.find_inode(newparent)?;
2897
2898 let path = CString::new(format!("self/fd/{}", data.as_raw_descriptor()))
2899 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
2900
2901 {
2902 let casefold_cache = self.lock_casefold_lookup_caches();
2903 // SAFETY: this doesn't modify any memory and we check the return value.
2904 syscall!(unsafe {
2905 libc::linkat(
2906 self.proc.as_raw_descriptor(),
2907 path.as_ptr(),
2908 new_inode.as_raw_descriptor(),
2909 newname.as_ptr(),
2910 libc::AT_SYMLINK_FOLLOW,
2911 )
2912 })?;
2913 if let Some(mut c) = casefold_cache {
2914 c.insert(newparent, newname);
2915 }
2916 }
2917
2918 self.do_lookup(&new_inode, newname)
2919 }
2920
symlink( &self, ctx: Context, linkname: &CStr, parent: Inode, name: &CStr, security_ctx: Option<&CStr>, ) -> io::Result<Entry>2921 fn symlink(
2922 &self,
2923 ctx: Context,
2924 linkname: &CStr,
2925 parent: Inode,
2926 name: &CStr,
2927 security_ctx: Option<&CStr>,
2928 ) -> io::Result<Entry> {
2929 let _trace = fs_trace!(self.tag, "symlink", parent, linkname, name, security_ctx);
2930 let data = self.find_inode(parent)?;
2931
2932 let _ctx = security_ctx
2933 .filter(|ctx| *ctx != UNLABELED_CSTR)
2934 .map(|ctx| ScopedSecurityContext::new(&self.proc, ctx))
2935 .transpose()?;
2936
2937 #[allow(unused_variables)]
2938 #[cfg(feature = "arc_quota")]
2939 let (uid, gid) = self.change_creds(&ctx, &data, name);
2940 #[cfg(feature = "fs_runtime_ugid_map")]
2941 let (uid, gid) = self.change_ugid_creds(&ctx, &data, name);
2942 #[cfg(not(feature = "fs_permission_translation"))]
2943 let (uid, gid) = (ctx.uid, ctx.gid);
2944
2945 let (_uid, _gid) = set_creds(uid, gid)?;
2946 {
2947 let casefold_cache = self.lock_casefold_lookup_caches();
2948 // SAFETY: this doesn't modify any memory and we check the return value.
2949 syscall!(unsafe {
2950 libc::symlinkat(linkname.as_ptr(), data.as_raw_descriptor(), name.as_ptr())
2951 })?;
2952 if let Some(mut c) = casefold_cache {
2953 c.insert(parent, name);
2954 }
2955 }
2956
2957 self.do_lookup(&data, name)
2958 }
2959
readlink(&self, _ctx: Context, inode: Inode) -> io::Result<Vec<u8>>2960 fn readlink(&self, _ctx: Context, inode: Inode) -> io::Result<Vec<u8>> {
2961 let _trace = fs_trace!(self.tag, "readlink", inode);
2962 let data = self.find_inode(inode)?;
2963
2964 let mut buf = vec![0; libc::PATH_MAX as usize];
2965
2966 // SAFETY: this will only modify the contents of `buf` and we check the return value.
2967 let res = syscall!(unsafe {
2968 libc::readlinkat(
2969 data.as_raw_descriptor(),
2970 EMPTY_CSTR.as_ptr(),
2971 buf.as_mut_ptr() as *mut libc::c_char,
2972 buf.len(),
2973 )
2974 })?;
2975
2976 buf.resize(res as usize, 0);
2977
2978 #[cfg(feature = "fs_runtime_ugid_map")]
2979 {
2980 let link_target = Path::new(OsStr::from_bytes(&buf[..res as usize]));
2981 if !link_target.starts_with(&self.root_dir) {
2982 return Err(io::Error::new(
2983 io::ErrorKind::InvalidInput,
2984 "Symbolic link points outside of root_dir",
2985 ));
2986 }
2987 }
2988 Ok(buf)
2989 }
2990
flush( &self, _ctx: Context, inode: Inode, handle: Handle, _lock_owner: u64, ) -> io::Result<()>2991 fn flush(
2992 &self,
2993 _ctx: Context,
2994 inode: Inode,
2995 handle: Handle,
2996 _lock_owner: u64,
2997 ) -> io::Result<()> {
2998 let _trace = fs_trace!(self.tag, "flush", inode, handle);
2999 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
3000 self.find_inode(inode)?
3001 } else {
3002 self.find_handle(handle, inode)?
3003 };
3004
3005 // SAFETY:
3006 // Since this method is called whenever an fd is closed in the client, we can emulate that
3007 // behavior by doing the same thing (dup-ing the fd and then immediately closing it). Safe
3008 // because this doesn't modify any memory and we check the return values.
3009 unsafe {
3010 let newfd = syscall!(libc::fcntl(
3011 data.as_raw_descriptor(),
3012 libc::F_DUPFD_CLOEXEC,
3013 0
3014 ))?;
3015
3016 syscall!(libc::close(newfd))?;
3017 }
3018 Ok(())
3019 }
3020
fsync(&self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle) -> io::Result<()>3021 fn fsync(&self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle) -> io::Result<()> {
3022 if self.zero_message_open.load(Ordering::Relaxed) {
3023 let _trace = fs_trace!(self.tag, "fsync (zero-message)", inode, datasync, handle);
3024 let data = self.find_inode(inode)?;
3025 self.do_fsync(&*data, datasync)
3026 } else {
3027 let _trace = fs_trace!(self.tag, "fsync", inode, datasync, handle);
3028 let data = self.find_handle(handle, inode)?;
3029
3030 let file = data.file.lock();
3031 self.do_fsync(&*file, datasync)
3032 }
3033 }
3034
fsyncdir( &self, _ctx: Context, inode: Inode, datasync: bool, handle: Handle, ) -> io::Result<()>3035 fn fsyncdir(
3036 &self,
3037 _ctx: Context,
3038 inode: Inode,
3039 datasync: bool,
3040 handle: Handle,
3041 ) -> io::Result<()> {
3042 if self.zero_message_opendir.load(Ordering::Relaxed) {
3043 let _trace = fs_trace!(self.tag, "fsyncdir (zero-message)", inode, datasync, handle);
3044 let data = self.find_inode(inode)?;
3045 self.do_fsync(&*data, datasync)
3046 } else {
3047 let _trace = fs_trace!(self.tag, "fsyncdir", inode, datasync, handle);
3048 let data = self.find_handle(handle, inode)?;
3049
3050 let file = data.file.lock();
3051 self.do_fsync(&*file, datasync)
3052 }
3053 }
3054
access(&self, ctx: Context, inode: Inode, mask: u32) -> io::Result<()>3055 fn access(&self, ctx: Context, inode: Inode, mask: u32) -> io::Result<()> {
3056 let _trace = fs_trace!(self.tag, "access", inode, mask);
3057 let data = self.find_inode(inode)?;
3058
3059 let st = stat(&*data)?;
3060 let mode = mask as i32 & (libc::R_OK | libc::W_OK | libc::X_OK);
3061
3062 if mode == libc::F_OK {
3063 // The file exists since we were able to call `stat(2)` on it.
3064 return Ok(());
3065 }
3066
3067 if (mode & libc::R_OK) != 0 {
3068 if ctx.uid != 0
3069 && (st.st_uid != ctx.uid || st.st_mode & 0o400 == 0)
3070 && (st.st_gid != ctx.gid || st.st_mode & 0o040 == 0)
3071 && st.st_mode & 0o004 == 0
3072 {
3073 return Err(io::Error::from_raw_os_error(libc::EACCES));
3074 }
3075 }
3076
3077 if (mode & libc::W_OK) != 0 {
3078 if ctx.uid != 0
3079 && (st.st_uid != ctx.uid || st.st_mode & 0o200 == 0)
3080 && (st.st_gid != ctx.gid || st.st_mode & 0o020 == 0)
3081 && st.st_mode & 0o002 == 0
3082 {
3083 return Err(io::Error::from_raw_os_error(libc::EACCES));
3084 }
3085 }
3086
3087 // root can only execute something if it is executable by one of the owner, the group, or
3088 // everyone.
3089 if (mode & libc::X_OK) != 0 {
3090 if (ctx.uid != 0 || st.st_mode & 0o111 == 0)
3091 && (st.st_uid != ctx.uid || st.st_mode & 0o100 == 0)
3092 && (st.st_gid != ctx.gid || st.st_mode & 0o010 == 0)
3093 && st.st_mode & 0o001 == 0
3094 {
3095 return Err(io::Error::from_raw_os_error(libc::EACCES));
3096 }
3097 }
3098
3099 Ok(())
3100 }
3101
setxattr( &self, _ctx: Context, inode: Inode, name: &CStr, value: &[u8], flags: u32, ) -> io::Result<()>3102 fn setxattr(
3103 &self,
3104 _ctx: Context,
3105 inode: Inode,
3106 name: &CStr,
3107 value: &[u8],
3108 flags: u32,
3109 ) -> io::Result<()> {
3110 let _trace = fs_trace!(self.tag, "setxattr", inode, name, flags);
3111 // We can't allow the VM to set this xattr because an unprivileged process may use it to set
3112 // a privileged xattr.
3113 if self.cfg.rewrite_security_xattrs && name.to_bytes().starts_with(USER_VIRTIOFS_XATTR) {
3114 return Err(io::Error::from_raw_os_error(libc::EPERM));
3115 }
3116
3117 let data = self.find_inode(inode)?;
3118 let name = self.rewrite_xattr_name(name);
3119
3120 #[cfg(feature = "arc_quota")]
3121 if self.skip_host_set_xattr(&data.path, &name.to_string_lossy()) {
3122 debug!(
3123 "ignore setxattr for path:{} xattr_name:{}",
3124 &data.path,
3125 &name.to_string_lossy()
3126 );
3127 return Ok(());
3128 }
3129
3130 let file = data.file.lock();
3131 let o_path_file = (file.1 & libc::O_PATH) != 0;
3132 if o_path_file {
3133 // For FDs opened with `O_PATH`, we cannot call `fsetxattr` normally. Instead we emulate
3134 // an _at syscall by changing the CWD to /proc, running the path based syscall, and then
3135 // setting the CWD back to the root directory.
3136 let path = CString::new(format!("self/fd/{}", file.0.as_raw_descriptor()))
3137 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
3138
3139 syscall!(self.with_proc_chdir(|| {
3140 // SAFETY: this doesn't modify any memory and we check the return value.
3141 unsafe {
3142 libc::setxattr(
3143 path.as_ptr(),
3144 name.as_ptr(),
3145 value.as_ptr() as *const libc::c_void,
3146 value.len() as libc::size_t,
3147 flags as c_int,
3148 )
3149 }
3150 }))?;
3151 } else {
3152 syscall!(
3153 // For regular files and directories, we can just use fsetxattr.
3154 // SAFETY: this doesn't modify any memory and we check the return value.
3155 unsafe {
3156 libc::fsetxattr(
3157 file.0.as_raw_descriptor(),
3158 name.as_ptr(),
3159 value.as_ptr() as *const libc::c_void,
3160 value.len() as libc::size_t,
3161 flags as c_int,
3162 )
3163 }
3164 )?;
3165 }
3166
3167 Ok(())
3168 }
3169
getxattr( &self, _ctx: Context, inode: Inode, name: &CStr, size: u32, ) -> io::Result<GetxattrReply>3170 fn getxattr(
3171 &self,
3172 _ctx: Context,
3173 inode: Inode,
3174 name: &CStr,
3175 size: u32,
3176 ) -> io::Result<GetxattrReply> {
3177 let _trace = fs_trace!(self.tag, "getxattr", inode, name, size);
3178 // We don't allow the VM to set this xattr so we also pretend there is no value associated
3179 // with it.
3180 if self.cfg.rewrite_security_xattrs && name.to_bytes().starts_with(USER_VIRTIOFS_XATTR) {
3181 return Err(io::Error::from_raw_os_error(libc::ENODATA));
3182 }
3183
3184 let data = self.find_inode(inode)?;
3185 let name = self.rewrite_xattr_name(name);
3186 let mut buf = vec![0u8; size as usize];
3187
3188 #[cfg(feature = "arc_quota")]
3189 let res = self.do_getxattr_with_filter(data, name, &mut buf)?;
3190
3191 #[cfg(not(feature = "arc_quota"))]
3192 let res = self.do_getxattr(&data, &name, &mut buf[..])?;
3193
3194 if size == 0 {
3195 Ok(GetxattrReply::Count(res as u32))
3196 } else {
3197 buf.truncate(res);
3198 Ok(GetxattrReply::Value(buf))
3199 }
3200 }
3201
listxattr(&self, _ctx: Context, inode: Inode, size: u32) -> io::Result<ListxattrReply>3202 fn listxattr(&self, _ctx: Context, inode: Inode, size: u32) -> io::Result<ListxattrReply> {
3203 let _trace = fs_trace!(self.tag, "listxattr", inode, size);
3204 let data = self.find_inode(inode)?;
3205
3206 let mut buf = vec![0u8; size as usize];
3207
3208 let file = data.file.lock();
3209 let o_path_file = (file.1 & libc::O_PATH) != 0;
3210 let res = if o_path_file {
3211 // For FDs opened with `O_PATH`, we cannot call `flistxattr` normally. Instead we
3212 // emulate an _at syscall by changing the CWD to /proc, running the path based syscall,
3213 // and then setting the CWD back to the root directory.
3214 let path = CString::new(format!("self/fd/{}", file.0.as_raw_descriptor()))
3215 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
3216
3217 // SAFETY: this will only modify `buf` and we check the return value.
3218 syscall!(self.with_proc_chdir(|| unsafe {
3219 libc::listxattr(
3220 path.as_ptr(),
3221 buf.as_mut_ptr() as *mut libc::c_char,
3222 buf.len() as libc::size_t,
3223 )
3224 }))?
3225 } else {
3226 // For regular files and directories, we can just flistxattr.
3227 // SAFETY: this will only write to `buf` and we check the return value.
3228 syscall!(unsafe {
3229 libc::flistxattr(
3230 file.0.as_raw_descriptor(),
3231 buf.as_mut_ptr() as *mut libc::c_char,
3232 buf.len() as libc::size_t,
3233 )
3234 })?
3235 };
3236
3237 if size == 0 {
3238 Ok(ListxattrReply::Count(res as u32))
3239 } else {
3240 buf.truncate(res as usize);
3241
3242 if self.cfg.rewrite_security_xattrs {
3243 strip_xattr_prefix(&mut buf);
3244 }
3245 Ok(ListxattrReply::Names(buf))
3246 }
3247 }
3248
removexattr(&self, _ctx: Context, inode: Inode, name: &CStr) -> io::Result<()>3249 fn removexattr(&self, _ctx: Context, inode: Inode, name: &CStr) -> io::Result<()> {
3250 let _trace = fs_trace!(self.tag, "removexattr", inode, name);
3251 // We don't allow the VM to set this xattr so we also pretend there is no value associated
3252 // with it.
3253 if self.cfg.rewrite_security_xattrs && name.to_bytes().starts_with(USER_VIRTIOFS_XATTR) {
3254 return Err(io::Error::from_raw_os_error(libc::ENODATA));
3255 }
3256
3257 let data = self.find_inode(inode)?;
3258 let name = self.rewrite_xattr_name(name);
3259
3260 let file = data.file.lock();
3261 let o_path_file = (file.1 & libc::O_PATH) != 0;
3262 if o_path_file {
3263 // For files opened with `O_PATH`, we cannot call `fremovexattr` normally. Instead we
3264 // emulate an _at syscall by changing the CWD to /proc, running the path based syscall,
3265 // and then setting the CWD back to the root directory.
3266 let path = CString::new(format!("self/fd/{}", file.0.as_raw_descriptor()))
3267 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
3268
3269 syscall!(self.with_proc_chdir(||
3270 // SAFETY: this doesn't modify any memory and we check the return value.
3271 unsafe { libc::removexattr(path.as_ptr(), name.as_ptr()) }))?;
3272 } else {
3273 // For regular files and directories, we can just use fremovexattr.
3274 syscall!(
3275 // SAFETY: this doesn't modify any memory and we check the return value.
3276 unsafe { libc::fremovexattr(file.0.as_raw_descriptor(), name.as_ptr()) }
3277 )?;
3278 }
3279
3280 Ok(())
3281 }
3282
fallocate( &self, _ctx: Context, inode: Inode, handle: Handle, mode: u32, offset: u64, length: u64, ) -> io::Result<()>3283 fn fallocate(
3284 &self,
3285 _ctx: Context,
3286 inode: Inode,
3287 handle: Handle,
3288 mode: u32,
3289 offset: u64,
3290 length: u64,
3291 ) -> io::Result<()> {
3292 let _trace = fs_trace!(self.tag, "fallocate", inode, handle, mode, offset, length);
3293
3294 let data: Arc<dyn AsRawDescriptor> = if self.zero_message_open.load(Ordering::Relaxed) {
3295 let data = self.find_inode(inode)?;
3296
3297 {
3298 // fallocate needs a writable fd
3299 let mut file = data.file.lock();
3300 let mut flags = file.1;
3301 match flags & libc::O_ACCMODE {
3302 libc::O_RDONLY => {
3303 flags &= !libc::O_RDONLY;
3304 flags |= libc::O_RDWR;
3305
3306 // We need to get a writable handle for this file.
3307 let newfile = self.open_fd(file.0.as_raw_descriptor(), libc::O_RDWR)?;
3308 *file = (newfile, flags);
3309 }
3310 libc::O_WRONLY | libc::O_RDWR => {}
3311 _ => panic!("Unexpected flags: {:#x}", flags),
3312 }
3313 }
3314
3315 data
3316 } else {
3317 self.find_handle(handle, inode)?
3318 };
3319
3320 let fd = data.as_raw_descriptor();
3321 // SAFETY: this doesn't modify any memory and we check the return value.
3322 syscall!(unsafe {
3323 libc::fallocate64(
3324 fd,
3325 mode as libc::c_int,
3326 offset as libc::off64_t,
3327 length as libc::off64_t,
3328 )
3329 })?;
3330
3331 Ok(())
3332 }
3333
3334 #[allow(clippy::unnecessary_cast)]
ioctl<R: io::Read>( &self, ctx: Context, inode: Inode, handle: Handle, _flags: IoctlFlags, cmd: u32, _arg: u64, in_size: u32, out_size: u32, r: R, ) -> io::Result<IoctlReply>3335 fn ioctl<R: io::Read>(
3336 &self,
3337 ctx: Context,
3338 inode: Inode,
3339 handle: Handle,
3340 _flags: IoctlFlags,
3341 cmd: u32,
3342 _arg: u64,
3343 in_size: u32,
3344 out_size: u32,
3345 r: R,
3346 ) -> io::Result<IoctlReply> {
3347 let _trace = fs_trace!(self.tag, "ioctl", inode, handle, cmd, in_size, out_size);
3348
3349 match cmd as IoctlNr {
3350 FS_IOC_GET_ENCRYPTION_POLICY_EX => self.get_encryption_policy_ex(inode, handle, r),
3351 FS_IOC_FSGETXATTR => {
3352 if out_size < size_of::<fsxattr>() as u32 {
3353 Err(io::Error::from_raw_os_error(libc::ENOMEM))
3354 } else {
3355 self.get_fsxattr(inode, handle)
3356 }
3357 }
3358 FS_IOC_FSSETXATTR => {
3359 if in_size < size_of::<fsxattr>() as u32 {
3360 Err(io::Error::from_raw_os_error(libc::EINVAL))
3361 } else {
3362 self.set_fsxattr(ctx, inode, handle, r)
3363 }
3364 }
3365 FS_IOC32_GETFLAGS | FS_IOC64_GETFLAGS => {
3366 if out_size < size_of::<c_int>() as u32 {
3367 Err(io::Error::from_raw_os_error(libc::ENOMEM))
3368 } else {
3369 self.get_flags(inode, handle)
3370 }
3371 }
3372 FS_IOC32_SETFLAGS | FS_IOC64_SETFLAGS => {
3373 if in_size < size_of::<c_int>() as u32 {
3374 Err(io::Error::from_raw_os_error(libc::ENOMEM))
3375 } else {
3376 self.set_flags(ctx, inode, handle, r)
3377 }
3378 }
3379 FS_IOC_ENABLE_VERITY => {
3380 if in_size < size_of::<fsverity_enable_arg>() as u32 {
3381 Err(io::Error::from_raw_os_error(libc::ENOMEM))
3382 } else {
3383 self.enable_verity(inode, handle, r)
3384 }
3385 }
3386 FS_IOC_MEASURE_VERITY => {
3387 if in_size < size_of::<fsverity_digest>() as u32
3388 || out_size < size_of::<fsverity_digest>() as u32
3389 {
3390 Err(io::Error::from_raw_os_error(libc::ENOMEM))
3391 } else {
3392 self.measure_verity(inode, handle, r, out_size)
3393 }
3394 }
3395 // The following is ARCVM-specific ioctl
3396 // Refer go/remove-mount-passthrough-fuse for more design details
3397 #[cfg(feature = "arc_quota")]
3398 FS_IOC_SETPERMISSION => {
3399 if in_size != size_of::<FsPermissionDataBuffer>() as u32 {
3400 Err(io::Error::from_raw_os_error(libc::EINVAL))
3401 } else {
3402 Ok(self.set_permission_by_path(r))
3403 }
3404 }
3405 #[cfg(feature = "arc_quota")]
3406 FS_IOC_SETPATHXATTR => {
3407 if in_size != size_of::<FsPathXattrDataBuffer>() as u32 {
3408 Err(io::Error::from_raw_os_error(libc::EINVAL))
3409 } else {
3410 Ok(self.set_xattr_by_path(r))
3411 }
3412 }
3413 _ => Err(io::Error::from_raw_os_error(libc::ENOTTY)),
3414 }
3415 }
3416
copy_file_range( &self, ctx: Context, inode_src: Inode, handle_src: Handle, offset_src: u64, inode_dst: Inode, handle_dst: Handle, offset_dst: u64, length: u64, flags: u64, ) -> io::Result<usize>3417 fn copy_file_range(
3418 &self,
3419 ctx: Context,
3420 inode_src: Inode,
3421 handle_src: Handle,
3422 offset_src: u64,
3423 inode_dst: Inode,
3424 handle_dst: Handle,
3425 offset_dst: u64,
3426 length: u64,
3427 flags: u64,
3428 ) -> io::Result<usize> {
3429 let _trace = fs_trace!(
3430 self.tag,
3431 "copy_file_range",
3432 inode_src,
3433 handle_src,
3434 offset_src,
3435 inode_dst,
3436 handle_dst,
3437 offset_dst,
3438 length,
3439 flags
3440 );
3441 // We need to change credentials during a write so that the kernel will remove setuid or
3442 // setgid bits from the file if it was written to by someone other than the owner.
3443 let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?;
3444 let (src_data, dst_data): (Arc<dyn AsRawDescriptor>, Arc<dyn AsRawDescriptor>) =
3445 if self.zero_message_open.load(Ordering::Relaxed) {
3446 (self.find_inode(inode_src)?, self.find_inode(inode_dst)?)
3447 } else {
3448 (
3449 self.find_handle(handle_src, inode_src)?,
3450 self.find_handle(handle_dst, inode_dst)?,
3451 )
3452 };
3453
3454 let src = src_data.as_raw_descriptor();
3455 let dst = dst_data.as_raw_descriptor();
3456
3457 Ok(syscall!(
3458 // SAFETY: this call is safe because it doesn't modify any memory and we
3459 // check the return value.
3460 unsafe {
3461 libc::syscall(
3462 libc::SYS_copy_file_range,
3463 src,
3464 &offset_src,
3465 dst,
3466 &offset_dst,
3467 length,
3468 flags,
3469 )
3470 }
3471 )? as usize)
3472 }
3473
set_up_mapping<M: Mapper>( &self, _ctx: Context, inode: Self::Inode, _handle: Self::Handle, file_offset: u64, mem_offset: u64, size: usize, prot: u32, mapper: M, ) -> io::Result<()>3474 fn set_up_mapping<M: Mapper>(
3475 &self,
3476 _ctx: Context,
3477 inode: Self::Inode,
3478 _handle: Self::Handle,
3479 file_offset: u64,
3480 mem_offset: u64,
3481 size: usize,
3482 prot: u32,
3483 mapper: M,
3484 ) -> io::Result<()> {
3485 let _trace = fs_trace!(
3486 self.tag,
3487 "set_up_mapping",
3488 inode,
3489 file_offset,
3490 mem_offset,
3491 size,
3492 prot
3493 );
3494 if !self.cfg.use_dax {
3495 return Err(io::Error::from_raw_os_error(libc::ENOSYS));
3496 }
3497
3498 let read = prot & libc::PROT_READ as u32 != 0;
3499 let write = prot & libc::PROT_WRITE as u32 != 0;
3500 let (mmap_flags, prot) = match (read, write) {
3501 (true, true) => (libc::O_RDWR, Protection::read_write()),
3502 (true, false) => (libc::O_RDONLY, Protection::read()),
3503 // Write-only is mapped to O_RDWR since mmap always requires an fd opened for reading.
3504 (false, true) => (libc::O_RDWR, Protection::write()),
3505 (false, false) => return Err(io::Error::from_raw_os_error(libc::EINVAL)),
3506 };
3507
3508 let data = self.find_inode(inode)?;
3509
3510 if self.zero_message_open.load(Ordering::Relaxed) {
3511 let mut file = data.file.lock();
3512 let mut open_flags = file.1;
3513 match (mmap_flags, open_flags & libc::O_ACCMODE) {
3514 (libc::O_RDONLY, libc::O_WRONLY)
3515 | (libc::O_RDWR, libc::O_RDONLY)
3516 | (libc::O_RDWR, libc::O_WRONLY) => {
3517 // We have a read-only or write-only fd and we need to upgrade it.
3518 open_flags &= !libc::O_ACCMODE;
3519 open_flags |= libc::O_RDWR;
3520
3521 let newfile = self.open_fd(file.0.as_raw_descriptor(), libc::O_RDWR)?;
3522 *file = (newfile, open_flags);
3523 }
3524 (libc::O_RDONLY, libc::O_RDONLY)
3525 | (libc::O_RDONLY, libc::O_RDWR)
3526 | (libc::O_RDWR, libc::O_RDWR) => {}
3527 (m, o) => panic!(
3528 "Unexpected combination of access flags: ({:#x}, {:#x})",
3529 m, o
3530 ),
3531 }
3532 mapper.map(mem_offset, size, &file.0, file_offset, prot)
3533 } else {
3534 let file = self.open_inode(&data, mmap_flags | libc::O_NONBLOCK)?;
3535 mapper.map(mem_offset, size, &file, file_offset, prot)
3536 }
3537 }
3538
remove_mapping<M: Mapper>(&self, msgs: &[RemoveMappingOne], mapper: M) -> io::Result<()>3539 fn remove_mapping<M: Mapper>(&self, msgs: &[RemoveMappingOne], mapper: M) -> io::Result<()> {
3540 let _trace = fs_trace!(self.tag, "remove_mapping", msgs);
3541 if !self.cfg.use_dax {
3542 return Err(io::Error::from_raw_os_error(libc::ENOSYS));
3543 }
3544
3545 for RemoveMappingOne { moffset, len } in msgs {
3546 mapper.unmap(*moffset, *len)?;
3547 }
3548 Ok(())
3549 }
3550
atomic_open( &self, ctx: Context, parent: Self::Inode, name: &CStr, mode: u32, flags: u32, umask: u32, security_ctx: Option<&CStr>, ) -> io::Result<(Entry, Option<Self::Handle>, OpenOptions)>3551 fn atomic_open(
3552 &self,
3553 ctx: Context,
3554 parent: Self::Inode,
3555 name: &CStr,
3556 mode: u32,
3557 flags: u32,
3558 umask: u32,
3559 security_ctx: Option<&CStr>,
3560 ) -> io::Result<(Entry, Option<Self::Handle>, OpenOptions)> {
3561 let _trace = fs_trace!(
3562 self.tag,
3563 "atomic_open",
3564 parent,
3565 name,
3566 mode,
3567 flags,
3568 umask,
3569 security_ctx
3570 );
3571 // Perform lookup but not create negative dentry
3572 let data = self.find_inode(parent)?;
3573
3574 #[allow(unused_variables)]
3575 #[cfg(feature = "arc_quota")]
3576 let (uid, gid) = self.change_creds(&ctx, &data, name);
3577 #[cfg(feature = "fs_runtime_ugid_map")]
3578 let (uid, gid) = self.change_ugid_creds(&ctx, &data, name);
3579 #[cfg(not(feature = "fs_permission_translation"))]
3580 let (uid, gid) = (ctx.uid, ctx.gid);
3581
3582 let (_uid, _gid) = set_creds(uid, gid)?;
3583
3584 // This lookup serves two purposes:
3585 // 1. If the O_CREATE flag is not set, it retrieves the d_entry for the file.
3586 // 2. If the O_CREATE flag is set, it checks whether the file exists.
3587 let res = self.do_lookup_with_casefold_fallback(&data, name);
3588
3589 if let Err(e) = res {
3590 if e.kind() == std::io::ErrorKind::NotFound && (flags as i32 & libc::O_CREAT) != 0 {
3591 // If the file did not exist & O_CREAT is set,
3592 // create file & set FILE_CREATED bits in open options
3593 let (entry, handler, mut opts) =
3594 self.create(ctx, parent, name, mode, flags, umask, security_ctx)?;
3595 opts |= OpenOptions::FILE_CREATED;
3596 return Ok((entry, handler, opts));
3597 } else if e.kind() == std::io::ErrorKind::NotFound
3598 && !self.cfg.negative_timeout.is_zero()
3599 {
3600 return Ok((
3601 Entry::new_negative(self.cfg.negative_timeout),
3602 None,
3603 OpenOptions::empty(),
3604 ));
3605 }
3606 return Err(e);
3607 }
3608
3609 // SAFETY: checked res is not error before
3610 let entry = res.unwrap();
3611
3612 if entry.attr.st_mode & libc::S_IFMT == libc::S_IFLNK {
3613 return Ok((entry, None, OpenOptions::empty()));
3614 }
3615
3616 if (flags as i32 & (libc::O_CREAT | libc::O_EXCL)) == (libc::O_CREAT | libc::O_EXCL) {
3617 return Err(eexist());
3618 }
3619
3620 let (handler, opts) = if self.zero_message_open.load(Ordering::Relaxed) {
3621 (None, OpenOptions::KEEP_CACHE)
3622 } else {
3623 let (handler, opts) = self.do_open(entry.inode, flags)?;
3624 (handler, opts)
3625 };
3626 Ok((entry, handler, opts))
3627 }
3628 }
3629
3630 #[cfg(test)]
3631 mod tests {
3632 use std::path::Path;
3633
3634 use named_lock::NamedLock;
3635 use tempfile::TempDir;
3636
3637 use super::*;
3638 #[cfg(feature = "arc_quota")]
3639 use crate::virtio::fs::arc_ioctl::FS_IOCTL_PATH_MAX_LEN;
3640 #[cfg(feature = "arc_quota")]
3641 use crate::virtio::fs::arc_ioctl::FS_IOCTL_XATTR_NAME_MAX_LEN;
3642 #[cfg(feature = "arc_quota")]
3643 use crate::virtio::fs::arc_ioctl::FS_IOCTL_XATTR_VALUE_MAX_LEN;
3644
3645 const UNITTEST_LOCK_NAME: &str = "passthroughfs_unittest_lock";
3646
3647 // Create an instance of `Context` with valid uid, gid, and pid.
3648 // The correct ids are necessary for test cases where new files are created.
get_context() -> Context3649 fn get_context() -> Context {
3650 // SAFETY: both calls take no parameters and only return an integer value. The kernel also
3651 // guarantees that they can never fail.
3652 let uid = unsafe { libc::syscall(SYS_GETEUID) as libc::uid_t };
3653 // SAFETY: both calls take no parameters and only return an integer value. The kernel also
3654 // guarantees that they can never fail.
3655 let gid = unsafe { libc::syscall(SYS_GETEGID) as libc::gid_t };
3656 let pid = std::process::id() as libc::pid_t;
3657 Context { uid, gid, pid }
3658 }
3659
3660 /// Creates the given directories and files under `temp_dir`.
create_test_data(temp_dir: &TempDir, dirs: &[&str], files: &[&str])3661 fn create_test_data(temp_dir: &TempDir, dirs: &[&str], files: &[&str]) {
3662 let path = temp_dir.path();
3663
3664 for d in dirs {
3665 std::fs::create_dir_all(path.join(d)).unwrap();
3666 }
3667
3668 for f in files {
3669 File::create(path.join(f)).unwrap();
3670 }
3671 }
3672
3673 /// Looks up the given `path` in `fs`.
lookup(fs: &PassthroughFs, path: &Path) -> io::Result<Inode>3674 fn lookup(fs: &PassthroughFs, path: &Path) -> io::Result<Inode> {
3675 let mut inode = 1;
3676 let ctx = get_context();
3677 for name in path.iter() {
3678 let name = CString::new(name.to_str().unwrap()).unwrap();
3679 let ent = match fs.lookup(ctx, inode, &name) {
3680 Ok(ent) => ent,
3681 Err(e) => {
3682 return Err(e);
3683 }
3684 };
3685 inode = ent.inode;
3686 }
3687 Ok(inode)
3688 }
3689
3690 /// Looks up the given `path` in `fs`.
3691 #[cfg(feature = "arc_quota")]
lookup_ent(fs: &PassthroughFs, path: &Path) -> io::Result<Entry>3692 fn lookup_ent(fs: &PassthroughFs, path: &Path) -> io::Result<Entry> {
3693 let mut inode = 1;
3694 let ctx = get_context();
3695 let mut entry = Entry::new_negative(Duration::from_secs(10));
3696 for name in path.iter() {
3697 let name = CString::new(name.to_str().unwrap()).unwrap();
3698 entry = match fs.lookup(ctx, inode, &name) {
3699 Ok(ent) => ent,
3700 Err(e) => {
3701 return Err(e);
3702 }
3703 };
3704 inode = entry.inode;
3705 }
3706 Ok(entry)
3707 }
3708
3709 /// Creates a file at the given `path`.
create(fs: &PassthroughFs, path: &Path) -> io::Result<Entry>3710 fn create(fs: &PassthroughFs, path: &Path) -> io::Result<Entry> {
3711 let parent = path.parent().unwrap();
3712 let filename = CString::new(path.file_name().unwrap().to_str().unwrap()).unwrap();
3713 let parent_inode = lookup(fs, parent)?;
3714 let ctx = get_context();
3715 let security_ctx = None;
3716 fs.create(
3717 ctx,
3718 parent_inode,
3719 &filename,
3720 0o666,
3721 libc::O_RDWR as u32,
3722 0,
3723 security_ctx,
3724 )
3725 .map(|(entry, _, _)| entry)
3726 }
3727
3728 /// Removes a file at the given `path`.
unlink(fs: &PassthroughFs, path: &Path) -> io::Result<()>3729 fn unlink(fs: &PassthroughFs, path: &Path) -> io::Result<()> {
3730 let parent = path.parent().unwrap();
3731 let filename = CString::new(path.file_name().unwrap().to_str().unwrap()).unwrap();
3732 let parent_inode = lookup(fs, parent)?;
3733 let ctx = get_context();
3734 fs.unlink(ctx, parent_inode, &filename)
3735 }
3736
3737 /// Forgets cache.
forget(fs: &PassthroughFs, path: &Path) -> io::Result<()>3738 fn forget(fs: &PassthroughFs, path: &Path) -> io::Result<()> {
3739 let ctx = get_context();
3740 let inode = lookup(fs, path)?;
3741 // Pass `u64::MAX` to ensure that the refcount goes to 0 and we forget inode.
3742 fs.forget(ctx, inode, u64::MAX);
3743 Ok(())
3744 }
3745
3746 /// Looks up and open the given `path` in `fs`.
atomic_open( fs: &PassthroughFs, path: &Path, mode: u32, flags: u32, umask: u32, security_ctx: Option<&CStr>, ) -> io::Result<(Entry, Option<Handle>, OpenOptions)>3747 fn atomic_open(
3748 fs: &PassthroughFs,
3749 path: &Path,
3750 mode: u32,
3751 flags: u32,
3752 umask: u32,
3753 security_ctx: Option<&CStr>,
3754 ) -> io::Result<(Entry, Option<Handle>, OpenOptions)> {
3755 let mut inode = 1;
3756 let ctx = get_context();
3757
3758 let path_vec: Vec<_> = path.iter().collect();
3759 let vec_len = path_vec.len();
3760
3761 // Do lookup before util (vec_len-1)-th pathname, this operation is to simulate
3762 // the behavior of VFS, since when VFS call atomic_open only at last look up.
3763 for name in &path_vec[0..vec_len - 1] {
3764 let name = CString::new(name.to_str().unwrap()).unwrap();
3765 let ent = fs.lookup(ctx, inode, &name)?;
3766 inode = ent.inode;
3767 }
3768
3769 let name = CString::new(path_vec[vec_len - 1].to_str().unwrap()).unwrap();
3770
3771 fs.atomic_open(ctx, inode, &name, mode, flags, umask, security_ctx)
3772 }
3773
symlink( fs: &PassthroughFs, linkname: &Path, name: &Path, security_ctx: Option<&CStr>, ) -> io::Result<Entry>3774 fn symlink(
3775 fs: &PassthroughFs,
3776 linkname: &Path,
3777 name: &Path,
3778 security_ctx: Option<&CStr>,
3779 ) -> io::Result<Entry> {
3780 let inode = 1;
3781 let ctx = get_context();
3782 let name = CString::new(name.to_str().unwrap()).unwrap();
3783 let linkname = CString::new(linkname.to_str().unwrap()).unwrap();
3784 fs.symlink(ctx, &linkname, inode, &name, security_ctx)
3785 }
3786
3787 // In this ioctl inode,handle,flags,arg and out_size is irrelavant, set to empty value.
3788 #[cfg(feature = "arc_quota")]
fs_ioc_setpermission<R: io::Read>( fs: &PassthroughFs, in_size: u32, r: R, ) -> io::Result<IoctlReply>3789 fn fs_ioc_setpermission<R: io::Read>(
3790 fs: &PassthroughFs,
3791 in_size: u32,
3792 r: R,
3793 ) -> io::Result<IoctlReply> {
3794 let ctx = get_context();
3795 fs.ioctl(
3796 ctx,
3797 0,
3798 0,
3799 IoctlFlags::empty(),
3800 FS_IOC_SETPERMISSION as u32,
3801 0,
3802 in_size,
3803 0,
3804 r,
3805 )
3806 }
3807
3808 // In this ioctl inode,handle,flags,arg and out_size is irrelavant, set to empty value.
3809 #[cfg(feature = "arc_quota")]
fs_ioc_setpathxattr<R: io::Read>( fs: &PassthroughFs, in_size: u32, r: R, ) -> io::Result<IoctlReply>3810 fn fs_ioc_setpathxattr<R: io::Read>(
3811 fs: &PassthroughFs,
3812 in_size: u32,
3813 r: R,
3814 ) -> io::Result<IoctlReply> {
3815 let ctx = get_context();
3816 fs.ioctl(
3817 ctx,
3818 0,
3819 0,
3820 IoctlFlags::empty(),
3821 FS_IOC_SETPATHXATTR as u32,
3822 0,
3823 in_size,
3824 0,
3825 r,
3826 )
3827 }
3828
3829 #[test]
rewrite_xattr_names()3830 fn rewrite_xattr_names() {
3831 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
3832 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
3833 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
3834 let _guard = lock.lock().expect("acquire named lock");
3835
3836 let cfg = Config {
3837 rewrite_security_xattrs: true,
3838 ..Default::default()
3839 };
3840
3841 let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
3842
3843 // Selinux shouldn't get overwritten.
3844 let selinux = c"security.selinux";
3845 assert_eq!(p.rewrite_xattr_name(selinux).to_bytes(), selinux.to_bytes());
3846
3847 // user, trusted, and system should not be changed either.
3848 let user = c"user.foobar";
3849 assert_eq!(p.rewrite_xattr_name(user).to_bytes(), user.to_bytes());
3850 let trusted = c"trusted.foobar";
3851 assert_eq!(p.rewrite_xattr_name(trusted).to_bytes(), trusted.to_bytes());
3852 let system = c"system.foobar";
3853 assert_eq!(p.rewrite_xattr_name(system).to_bytes(), system.to_bytes());
3854
3855 // sehash should be re-written.
3856 let sehash = c"security.sehash";
3857 assert_eq!(
3858 p.rewrite_xattr_name(sehash).to_bytes(),
3859 b"user.virtiofs.security.sehash"
3860 );
3861 }
3862
3863 #[test]
strip_xattr_names()3864 fn strip_xattr_names() {
3865 let only_nuls = b"\0\0\0\0\0";
3866 let mut actual = only_nuls.to_vec();
3867 strip_xattr_prefix(&mut actual);
3868 assert_eq!(&actual[..], &only_nuls[..]);
3869
3870 let no_nuls = b"security.sehashuser.virtiofs";
3871 let mut actual = no_nuls.to_vec();
3872 strip_xattr_prefix(&mut actual);
3873 assert_eq!(&actual[..], &no_nuls[..]);
3874
3875 let empty = b"";
3876 let mut actual = empty.to_vec();
3877 strip_xattr_prefix(&mut actual);
3878 assert_eq!(&actual[..], &empty[..]);
3879
3880 let no_strippable_names = b"security.selinux\0user.foobar\0system.test\0";
3881 let mut actual = no_strippable_names.to_vec();
3882 strip_xattr_prefix(&mut actual);
3883 assert_eq!(&actual[..], &no_strippable_names[..]);
3884
3885 let only_strippable_names = b"user.virtiofs.security.sehash\0user.virtiofs.security.wat\0";
3886 let mut actual = only_strippable_names.to_vec();
3887 strip_xattr_prefix(&mut actual);
3888 assert_eq!(&actual[..], b"security.sehash\0security.wat\0");
3889
3890 let mixed_names = b"user.virtiofs.security.sehash\0security.selinux\0user.virtiofs.security.wat\0user.foobar\0";
3891 let mut actual = mixed_names.to_vec();
3892 strip_xattr_prefix(&mut actual);
3893 let expected = b"security.sehash\0security.selinux\0security.wat\0user.foobar\0";
3894 assert_eq!(&actual[..], &expected[..]);
3895
3896 let no_nul_with_prefix = b"user.virtiofs.security.sehash";
3897 let mut actual = no_nul_with_prefix.to_vec();
3898 strip_xattr_prefix(&mut actual);
3899 assert_eq!(&actual[..], b"security.sehash");
3900 }
3901
3902 #[test]
lookup_files()3903 fn lookup_files() {
3904 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
3905 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
3906 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
3907 let _guard = lock.lock().expect("acquire named lock");
3908
3909 let temp_dir = TempDir::new().unwrap();
3910 create_test_data(&temp_dir, &["dir"], &["a.txt", "dir/b.txt"]);
3911
3912 let cfg = Default::default();
3913 let fs = PassthroughFs::new("tag", cfg).unwrap();
3914
3915 let capable = FsOptions::empty();
3916 fs.init(capable).unwrap();
3917
3918 assert!(lookup(&fs, &temp_dir.path().join("a.txt")).is_ok());
3919 assert!(lookup(&fs, &temp_dir.path().join("dir")).is_ok());
3920 assert!(lookup(&fs, &temp_dir.path().join("dir/b.txt")).is_ok());
3921
3922 assert_eq!(
3923 lookup(&fs, &temp_dir.path().join("nonexistent-file"))
3924 .expect_err("file must not exist")
3925 .kind(),
3926 io::ErrorKind::NotFound
3927 );
3928 // "A.txt" is different from "a.txt".
3929 assert_eq!(
3930 lookup(&fs, &temp_dir.path().join("A.txt"))
3931 .expect_err("file must not exist")
3932 .kind(),
3933 io::ErrorKind::NotFound
3934 );
3935 }
3936
3937 #[test]
lookup_files_ascii_casefold()3938 fn lookup_files_ascii_casefold() {
3939 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
3940 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
3941 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
3942 let _guard = lock.lock().expect("acquire named lock");
3943
3944 let temp_dir = TempDir::new().unwrap();
3945 create_test_data(&temp_dir, &["dir"], &["a.txt", "dir/b.txt"]);
3946
3947 let cfg = Config {
3948 ascii_casefold: true,
3949 ..Default::default()
3950 };
3951 let fs = PassthroughFs::new("tag", cfg).unwrap();
3952
3953 let capable = FsOptions::empty();
3954 fs.init(capable).unwrap();
3955
3956 // Ensure that "A.txt" is equated with "a.txt".
3957 let a_inode = lookup(&fs, &temp_dir.path().join("a.txt")).expect("a.txt must be found");
3958 assert_eq!(
3959 lookup(&fs, &temp_dir.path().join("A.txt")).expect("A.txt must exist"),
3960 a_inode
3961 );
3962
3963 let dir_inode = lookup(&fs, &temp_dir.path().join("dir")).expect("dir must be found");
3964 assert_eq!(
3965 lookup(&fs, &temp_dir.path().join("DiR")).expect("DiR must exist"),
3966 dir_inode
3967 );
3968
3969 let b_inode =
3970 lookup(&fs, &temp_dir.path().join("dir/b.txt")).expect("dir/b.txt must be found");
3971 assert_eq!(
3972 lookup(&fs, &temp_dir.path().join("dIr/B.TxT")).expect("dIr/B.TxT must exist"),
3973 b_inode
3974 );
3975
3976 assert_eq!(
3977 lookup(&fs, &temp_dir.path().join("nonexistent-file"))
3978 .expect_err("file must not exist")
3979 .kind(),
3980 io::ErrorKind::NotFound
3981 );
3982 }
3983
test_create_and_remove(ascii_casefold: bool)3984 fn test_create_and_remove(ascii_casefold: bool) {
3985 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
3986 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
3987 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
3988 let _guard = lock.lock().expect("acquire named lock");
3989
3990 let temp_dir = TempDir::new().unwrap();
3991 let timeout = Duration::from_millis(10);
3992 let cfg = Config {
3993 timeout,
3994 cache_policy: CachePolicy::Auto,
3995 ascii_casefold,
3996 ..Default::default()
3997 };
3998 let fs = PassthroughFs::new("tag", cfg).unwrap();
3999
4000 let capable = FsOptions::empty();
4001 fs.init(capable).unwrap();
4002
4003 // Create a.txt and b.txt.
4004 let a_path = temp_dir.path().join("a.txt");
4005 let b_path = temp_dir.path().join("b.txt");
4006 let a_entry = create(&fs, &a_path).expect("create a.txt");
4007 let b_entry = create(&fs, &b_path).expect("create b.txt");
4008 assert_eq!(
4009 a_entry.inode,
4010 lookup(&fs, &a_path).expect("lookup a.txt"),
4011 "Created file 'a.txt' must be looked up"
4012 );
4013 assert_eq!(
4014 b_entry.inode,
4015 lookup(&fs, &b_path).expect("lookup b.txt"),
4016 "Created file 'b.txt' must be looked up"
4017 );
4018
4019 // Remove a.txt only
4020 unlink(&fs, &a_path).expect("Remove");
4021 assert_eq!(
4022 lookup(&fs, &a_path)
4023 .expect_err("file must not exist")
4024 .kind(),
4025 io::ErrorKind::NotFound,
4026 "a.txt must be removed"
4027 );
4028 // "A.TXT" must not be found regardless of whether casefold is enabled or not.
4029 let upper_a_path = temp_dir.path().join("A.TXT");
4030 assert_eq!(
4031 lookup(&fs, &upper_a_path)
4032 .expect_err("file must not exist")
4033 .kind(),
4034 io::ErrorKind::NotFound,
4035 "A.txt must be removed"
4036 );
4037
4038 // Check if the host file system doesn't have a.txt but does b.txt.
4039 assert!(!a_path.exists(), "a.txt must be removed");
4040 assert!(b_path.exists(), "b.txt must exist");
4041 }
4042
4043 #[test]
create_and_remove()4044 fn create_and_remove() {
4045 test_create_and_remove(false /* casefold */);
4046 }
4047
4048 #[test]
create_and_remove_casefold()4049 fn create_and_remove_casefold() {
4050 test_create_and_remove(true /* casefold */);
4051 }
4052
test_create_and_forget(ascii_casefold: bool)4053 fn test_create_and_forget(ascii_casefold: bool) {
4054 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4055 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4056 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4057 let _guard = lock.lock().expect("acquire named lock");
4058
4059 let temp_dir = TempDir::new().unwrap();
4060 let timeout = Duration::from_millis(10);
4061 let cfg = Config {
4062 timeout,
4063 cache_policy: CachePolicy::Auto,
4064 ascii_casefold,
4065 ..Default::default()
4066 };
4067 let fs = PassthroughFs::new("tag", cfg).unwrap();
4068
4069 let capable = FsOptions::empty();
4070 fs.init(capable).unwrap();
4071
4072 // Create a.txt.
4073 let a_path = temp_dir.path().join("a.txt");
4074 let a_entry = create(&fs, &a_path).expect("create a.txt");
4075 assert_eq!(
4076 a_entry.inode,
4077 lookup(&fs, &a_path).expect("lookup a.txt"),
4078 "Created file 'a.txt' must be looked up"
4079 );
4080
4081 // Forget a.txt's inode from PassthroughFs's internal cache.
4082 forget(&fs, &a_path).expect("forget a.txt");
4083
4084 if ascii_casefold {
4085 let upper_a_path = temp_dir.path().join("A.TXT");
4086 let new_a_inode = lookup(&fs, &upper_a_path).expect("lookup a.txt");
4087 assert_ne!(
4088 a_entry.inode, new_a_inode,
4089 "inode must be changed after forget()"
4090 );
4091 assert_eq!(
4092 new_a_inode,
4093 lookup(&fs, &a_path).expect("lookup a.txt"),
4094 "inode must be same for a.txt and A.TXT"
4095 );
4096 } else {
4097 assert_ne!(
4098 a_entry.inode,
4099 lookup(&fs, &a_path).expect("lookup a.txt"),
4100 "inode must be changed after forget()"
4101 );
4102 }
4103 }
4104
4105 #[test]
create_and_forget()4106 fn create_and_forget() {
4107 test_create_and_forget(false /* ascii_casefold */);
4108 }
4109
4110 #[test]
create_and_forget_casefold()4111 fn create_and_forget_casefold() {
4112 test_create_and_forget(true /* ascii_casefold */);
4113 }
4114
4115 #[test]
casefold_lookup_cache()4116 fn casefold_lookup_cache() {
4117 let temp_dir = TempDir::new().unwrap();
4118 // Prepare `a.txt` before starting the test.
4119 create_test_data(&temp_dir, &[], &["a.txt"]);
4120
4121 let cfg = Config {
4122 ascii_casefold: true,
4123 ..Default::default()
4124 };
4125 let fs = PassthroughFs::new("tag", cfg).unwrap();
4126
4127 let capable = FsOptions::empty();
4128 fs.init(capable).unwrap();
4129
4130 let parent = lookup(&fs, temp_dir.path()).expect("lookup temp_dir");
4131
4132 // Since `a.txt` exists, "A.TXT" must exist.
4133 let large_a_path = temp_dir.path().join("A.TXT");
4134 // Looking up "A.TXT" must create a CasefoldCache entry.
4135 lookup(&fs, &large_a_path).expect("A.TXT must exist");
4136 assert!(fs.exists_in_casefold_cache(parent, &CString::new("A.TXT").unwrap()));
4137
4138 // Create b.txt.
4139 let b_path = temp_dir.path().join("b.txt");
4140 create(&fs, &b_path).expect("create b.txt");
4141 // Then, b.txt must exists in the cache.
4142 assert!(fs.exists_in_casefold_cache(parent, &CString::new("B.TXT").unwrap()));
4143 // When removing b.txt, it must be removed from the cache as well.
4144 unlink(&fs, &b_path).expect("remove b.txt");
4145 assert!(!fs.exists_in_casefold_cache(parent, &CString::new("B.TXT").unwrap()));
4146 }
4147
4148 #[test]
lookup_negative_cache()4149 fn lookup_negative_cache() {
4150 let temp_dir = TempDir::new().unwrap();
4151 // Prepare `a.txt` before starting the test.
4152 create_test_data(&temp_dir, &[], &[]);
4153
4154 let cfg = Config {
4155 negative_timeout: Duration::from_secs(5),
4156 ..Default::default()
4157 };
4158 let fs = PassthroughFs::new("tag", cfg).unwrap();
4159
4160 let capable = FsOptions::empty();
4161 fs.init(capable).unwrap();
4162
4163 let a_path = temp_dir.path().join("a.txt");
4164 // a.txt hasn't existed yet.
4165 // Since negative_timeout is enabled, success with inode=0 is expected.
4166 assert_eq!(
4167 0,
4168 lookup(&fs, &a_path).expect("lookup a.txt"),
4169 "Entry with inode=0 is expected for non-existing file 'a.txt'"
4170 );
4171 // Create a.txt
4172 let a_entry = create(&fs, &a_path).expect("create a.txt");
4173 assert_eq!(
4174 a_entry.inode,
4175 lookup(&fs, &a_path).expect("lookup a.txt"),
4176 "Created file 'a.txt' must be looked up"
4177 );
4178 // Remove a.txt
4179 unlink(&fs, &a_path).expect("Remove");
4180 assert_eq!(
4181 0,
4182 lookup(&fs, &a_path).expect("lookup a.txt"),
4183 "Entry with inode=0 is expected for the removed file 'a.txt'"
4184 );
4185 }
4186 #[test]
test_atomic_open_existing_file()4187 fn test_atomic_open_existing_file() {
4188 atomic_open_existing_file(false);
4189 }
4190
4191 #[test]
test_atomic_open_existing_file_zero_message()4192 fn test_atomic_open_existing_file_zero_message() {
4193 atomic_open_existing_file(true);
4194 }
4195
atomic_open_existing_file(zero_message_open: bool)4196 fn atomic_open_existing_file(zero_message_open: bool) {
4197 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4198 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4199 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4200 let _guard = lock.lock().expect("acquire named lock");
4201
4202 let temp_dir = TempDir::new().unwrap();
4203 create_test_data(&temp_dir, &["dir"], &["a.txt", "dir/b.txt", "dir/c.txt"]);
4204
4205 let cache_policy = match zero_message_open {
4206 true => CachePolicy::Always,
4207 false => CachePolicy::Auto,
4208 };
4209
4210 let cfg = Config {
4211 cache_policy,
4212 ..Default::default()
4213 };
4214 let fs = PassthroughFs::new("tag", cfg).unwrap();
4215
4216 let capable = FsOptions::ZERO_MESSAGE_OPEN;
4217 fs.init(capable).unwrap();
4218
4219 // atomic_open with flag O_RDWR, should return positive dentry and file handler
4220 let res = atomic_open(
4221 &fs,
4222 &temp_dir.path().join("a.txt"),
4223 0o666,
4224 libc::O_RDWR as u32,
4225 0,
4226 None,
4227 );
4228 assert!(res.is_ok());
4229 let (entry, handler, open_options) = res.unwrap();
4230 assert_ne!(entry.inode, 0);
4231
4232 if zero_message_open {
4233 assert!(handler.is_none());
4234 assert_eq!(open_options, OpenOptions::KEEP_CACHE);
4235 } else {
4236 assert!(handler.is_some());
4237 assert_ne!(
4238 open_options & OpenOptions::FILE_CREATED,
4239 OpenOptions::FILE_CREATED
4240 );
4241 }
4242
4243 // atomic_open with flag O_RDWR | O_CREATE, should return positive dentry and file handler
4244 let res = atomic_open(
4245 &fs,
4246 &temp_dir.path().join("dir/b.txt"),
4247 0o666,
4248 (libc::O_RDWR | libc::O_CREAT) as u32,
4249 0,
4250 None,
4251 );
4252 assert!(res.is_ok());
4253 let (entry, handler, open_options) = res.unwrap();
4254 assert_ne!(entry.inode, 0);
4255
4256 if zero_message_open {
4257 assert!(handler.is_none());
4258 assert_eq!(open_options, OpenOptions::KEEP_CACHE);
4259 } else {
4260 assert!(handler.is_some());
4261 assert_ne!(
4262 open_options & OpenOptions::FILE_CREATED,
4263 OpenOptions::FILE_CREATED
4264 );
4265 }
4266
4267 // atomic_open with flag O_RDWR | O_CREATE | O_EXCL, should return positive dentry and file
4268 // handler
4269 let res = atomic_open(
4270 &fs,
4271 &temp_dir.path().join("dir/c.txt"),
4272 0o666,
4273 (libc::O_RDWR | libc::O_CREAT | libc::O_EXCL) as u32,
4274 0,
4275 None,
4276 );
4277 assert!(res.is_err());
4278 let err_kind = res.unwrap_err().kind();
4279 assert_eq!(err_kind, io::ErrorKind::AlreadyExists);
4280 }
4281
4282 #[test]
test_atomic_open_non_existing_file()4283 fn test_atomic_open_non_existing_file() {
4284 atomic_open_non_existing_file(false);
4285 }
4286
4287 #[test]
test_atomic_open_non_existing_file_zero_message()4288 fn test_atomic_open_non_existing_file_zero_message() {
4289 atomic_open_non_existing_file(true);
4290 }
4291
atomic_open_non_existing_file(zero_message_open: bool)4292 fn atomic_open_non_existing_file(zero_message_open: bool) {
4293 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4294 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4295 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4296 let _guard = lock.lock().expect("acquire named lock");
4297
4298 let temp_dir = TempDir::new().unwrap();
4299
4300 let cache_policy = match zero_message_open {
4301 true => CachePolicy::Always,
4302 false => CachePolicy::Auto,
4303 };
4304
4305 let cfg = Config {
4306 cache_policy,
4307 ..Default::default()
4308 };
4309 let fs = PassthroughFs::new("tag", cfg).unwrap();
4310
4311 let capable = FsOptions::ZERO_MESSAGE_OPEN;
4312 fs.init(capable).unwrap();
4313
4314 // atomic_open with flag O_RDWR, should return NO_EXIST error
4315 let res = atomic_open(
4316 &fs,
4317 &temp_dir.path().join("a.txt"),
4318 0o666,
4319 libc::O_RDWR as u32,
4320 0,
4321 None,
4322 );
4323 assert!(res.is_err());
4324 let err_kind = res.unwrap_err().kind();
4325 assert_eq!(err_kind, io::ErrorKind::NotFound);
4326
4327 // atomic_open with flag O_RDWR | O_CREATE, should return positive dentry and file handler
4328 let res = atomic_open(
4329 &fs,
4330 &temp_dir.path().join("b.txt"),
4331 0o666,
4332 (libc::O_RDWR | libc::O_CREAT) as u32,
4333 0,
4334 None,
4335 );
4336 assert!(res.is_ok());
4337 let (entry, handler, open_options) = res.unwrap();
4338 assert_ne!(entry.inode, 0);
4339
4340 if zero_message_open {
4341 assert!(handler.is_none());
4342 assert_eq!(
4343 open_options & OpenOptions::KEEP_CACHE,
4344 OpenOptions::KEEP_CACHE
4345 );
4346 } else {
4347 assert!(handler.is_some());
4348 }
4349 assert_eq!(
4350 open_options & OpenOptions::FILE_CREATED,
4351 OpenOptions::FILE_CREATED
4352 );
4353 }
4354
4355 #[test]
atomic_open_symbol_link()4356 fn atomic_open_symbol_link() {
4357 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4358 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4359 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4360 let _guard = lock.lock().expect("acquire named lock");
4361
4362 let temp_dir = TempDir::new().unwrap();
4363 create_test_data(&temp_dir, &["dir"], &["a.txt"]);
4364
4365 let cfg = Default::default();
4366 let fs = PassthroughFs::new("tag", cfg).unwrap();
4367
4368 let capable = FsOptions::empty();
4369 fs.init(capable).unwrap();
4370
4371 // atomic open the link destination file
4372 let res_dst = atomic_open(
4373 &fs,
4374 &temp_dir.path().join("a.txt"),
4375 0o666,
4376 libc::O_RDWR as u32,
4377 0,
4378 None,
4379 );
4380 assert!(res_dst.is_ok());
4381 let (entry_dst, handler_dst, _) = res_dst.unwrap();
4382 assert_ne!(entry_dst.inode, 0);
4383 assert!(handler_dst.is_some());
4384
4385 // create depth 1 symbol link
4386 let sym1_res = symlink(
4387 &fs,
4388 &temp_dir.path().join("a.txt"),
4389 &temp_dir.path().join("blink"),
4390 None,
4391 );
4392 assert!(sym1_res.is_ok());
4393 let sym1_entry = sym1_res.unwrap();
4394 assert_ne!(sym1_entry.inode, 0);
4395
4396 // atomic_open symbol link, should return dentry with no handler
4397 let res = atomic_open(
4398 &fs,
4399 &temp_dir.path().join("blink"),
4400 0o666,
4401 libc::O_RDWR as u32,
4402 0,
4403 None,
4404 );
4405 assert!(res.is_ok());
4406 let (entry, handler, open_options) = res.unwrap();
4407 assert_eq!(entry.inode, sym1_entry.inode);
4408 assert!(handler.is_none());
4409 assert_eq!(open_options, OpenOptions::empty());
4410
4411 // delete link destination
4412 unlink(&fs, &temp_dir.path().join("a.txt")).expect("Remove");
4413 assert_eq!(
4414 lookup(&fs, &temp_dir.path().join("a.txt"))
4415 .expect_err("file must not exist")
4416 .kind(),
4417 io::ErrorKind::NotFound,
4418 "a.txt must be removed"
4419 );
4420
4421 // after link destination removed, should still return valid dentry
4422 let res = atomic_open(
4423 &fs,
4424 &temp_dir.path().join("blink"),
4425 0o666,
4426 libc::O_RDWR as u32,
4427 0,
4428 None,
4429 );
4430 assert!(res.is_ok());
4431 let (entry, handler, open_options) = res.unwrap();
4432 assert_eq!(entry.inode, sym1_entry.inode);
4433 assert!(handler.is_none());
4434 assert_eq!(open_options, OpenOptions::empty());
4435 }
4436
4437 #[test]
4438 #[cfg(feature = "arc_quota")]
set_permission_ioctl_valid_data()4439 fn set_permission_ioctl_valid_data() {
4440 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4441 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4442 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4443 let _guard = lock.lock().expect("acquire named lock");
4444
4445 let cfg = Config {
4446 max_dynamic_perm: 1,
4447 ..Default::default()
4448 };
4449 let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
4450
4451 let perm_path_string = String::from("/test");
4452 let fs_permission_data_buffer = FsPermissionDataBuffer {
4453 guest_uid: 1,
4454 guest_gid: 2,
4455 host_uid: 3,
4456 host_gid: 4,
4457 umask: 5,
4458 pad: 0,
4459 perm_path: {
4460 let mut perm_path: [u8; FS_IOCTL_PATH_MAX_LEN] = [0; FS_IOCTL_PATH_MAX_LEN];
4461 perm_path[..perm_path_string.len()].copy_from_slice(perm_path_string.as_bytes());
4462 perm_path
4463 },
4464 };
4465 let r = std::io::Cursor::new(fs_permission_data_buffer.as_bytes());
4466
4467 let res = fs_ioc_setpermission(
4468 &p,
4469 mem::size_of_val(&fs_permission_data_buffer) as u32,
4470 r.clone(),
4471 )
4472 .expect("valid input should get IoctlReply");
4473 assert!(matches!(res, IoctlReply::Done(Ok(data)) if data.is_empty()));
4474
4475 let read_guard = p
4476 .permission_paths
4477 .read()
4478 .expect("read permission_paths failed");
4479 let permission_data = read_guard
4480 .first()
4481 .expect("permission path should not be empty");
4482
4483 // Check expected data item is added to permission_paths.
4484 let expected_data = PermissionData {
4485 guest_uid: 1,
4486 guest_gid: 2,
4487 host_uid: 3,
4488 host_gid: 4,
4489 umask: 5,
4490 perm_path: perm_path_string,
4491 };
4492 assert_eq!(*permission_data, expected_data);
4493
4494 // Second ioctl should not succeed since max_dynamic_perm is set to 1
4495 let res = fs_ioc_setpermission(
4496 &p,
4497 mem::size_of_val(&fs_permission_data_buffer) as u32,
4498 r.clone(),
4499 )
4500 .expect("valid input should get IoctlReply");
4501 assert!(
4502 matches!(res, IoctlReply::Done(Err(err)) if err.raw_os_error().is_some_and(|errno| {
4503 errno == libc::EPERM
4504 }))
4505 );
4506 }
4507
4508 #[test]
4509 #[cfg(feature = "arc_quota")]
set_permission_ioctl_invalid_data()4510 fn set_permission_ioctl_invalid_data() {
4511 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4512 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4513 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4514 let _guard = lock.lock().expect("acquire named lock");
4515
4516 let cfg = Config {
4517 max_dynamic_perm: 1,
4518 ..Default::default()
4519 };
4520 let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
4521
4522 // The perm_path is not valid since it does not start with /.
4523 let perm_path_string = String::from("test");
4524 let fs_permission_data_buffer = FsPermissionDataBuffer {
4525 guest_uid: 1,
4526 guest_gid: 2,
4527 host_uid: 3,
4528 host_gid: 4,
4529 umask: 5,
4530 pad: 0,
4531 perm_path: {
4532 let mut perm_path: [u8; FS_IOCTL_PATH_MAX_LEN] = [0; FS_IOCTL_PATH_MAX_LEN];
4533 perm_path[..perm_path_string.len()].copy_from_slice(perm_path_string.as_bytes());
4534 perm_path
4535 },
4536 };
4537
4538 let r = std::io::Cursor::new(fs_permission_data_buffer.as_bytes());
4539 // In this ioctl inode,handle,flags,arg and out_size is irrelavant, set to empty value.
4540 // This call is supposed to get EINVAL ioctlReply, since the perm_path is invalid.
4541 let res = fs_ioc_setpermission(&p, mem::size_of_val(&fs_permission_data_buffer) as u32, r)
4542 .expect("invalid perm_path should get IoctlReply");
4543 assert!(
4544 matches!(res, IoctlReply::Done(Err(err)) if err.raw_os_error().is_some_and(|errno| {
4545 errno == libc::EINVAL
4546 }))
4547 );
4548
4549 let fake_data_buffer: [u8; 128] = [0; 128];
4550 let r = std::io::Cursor::new(fake_data_buffer.as_bytes());
4551
4552 // This call is supposed to get EINVAL ioctlReply, since the in_size is not the size of
4553 // struct FsPermissionDataBuffer.
4554 let res = fs_ioc_setpermission(&p, mem::size_of_val(&fake_data_buffer) as u32, r)
4555 .expect_err("invalid in_size should get Error");
4556 assert!(res
4557 .raw_os_error()
4558 .is_some_and(|errno| { errno == libc::EINVAL }));
4559 }
4560
4561 #[test]
4562 #[cfg(feature = "arc_quota")]
permission_data_path_matching()4563 fn permission_data_path_matching() {
4564 let ctx = get_context();
4565 let temp_dir = TempDir::new().unwrap();
4566 // Prepare `a.txt` before starting the test.
4567 create_test_data(&temp_dir, &["dir"], &["a.txt", "dir/a.txt"]);
4568
4569 let cfg = Config {
4570 max_dynamic_perm: 1,
4571 ..Default::default()
4572 };
4573 let fs = PassthroughFs::new("tag", cfg).unwrap();
4574
4575 let capable = FsOptions::empty();
4576 fs.init(capable).unwrap();
4577
4578 const BY_PATH_UID: u32 = 655360;
4579 const BY_PATH_GID: u32 = 655361;
4580 const BY_PATH_UMASK: u32 = 0o007;
4581
4582 let dir_path = temp_dir.path().join("dir");
4583 let permission_data = PermissionData {
4584 guest_uid: BY_PATH_UID,
4585 guest_gid: BY_PATH_GID,
4586 host_uid: ctx.uid,
4587 host_gid: ctx.gid,
4588 umask: BY_PATH_UMASK,
4589 perm_path: dir_path.to_string_lossy().into_owned(),
4590 };
4591 fs.permission_paths
4592 .write()
4593 .expect("permission_path lock must be acquired")
4594 .push(permission_data);
4595
4596 // a_path is the path with out set permission by path
4597 let a_path = temp_dir.path().join("a.txt");
4598 let in_dir_a_path = dir_path.join("a.txt");
4599
4600 // a.txt should not be set with guest_uid/guest_uid/umask by path
4601 let a_entry = lookup_ent(&fs, &a_path).expect("a.txt must exist");
4602 assert_ne!(a_entry.attr.st_uid, BY_PATH_UID);
4603 assert_ne!(a_entry.attr.st_gid, BY_PATH_GID);
4604
4605 // a.txt in dir should be set guest_uid/guest_uid/umask by path
4606 let in_dir_a_entry = lookup_ent(&fs, &in_dir_a_path).expect("dir/a.txt must exist");
4607 assert_eq!(in_dir_a_entry.attr.st_uid, BY_PATH_UID);
4608 assert_eq!(in_dir_a_entry.attr.st_gid, BY_PATH_GID);
4609 assert_eq!(in_dir_a_entry.attr.st_mode & 0o777, !BY_PATH_UMASK & 0o777);
4610
4611 // Create dir/b.txt.
4612 let in_dir_b_path = dir_path.join("b.txt");
4613 create(&fs, &in_dir_b_path).expect("create b.txt");
4614
4615 // newly created b.txt in dir should be set guest_uid/guest_uid/umask by path
4616 let in_dir_b_entry = lookup_ent(&fs, &in_dir_a_path).expect("dir/b.txt must exist");
4617 assert_eq!(in_dir_b_entry.attr.st_uid, BY_PATH_UID);
4618 assert_eq!(in_dir_b_entry.attr.st_gid, BY_PATH_GID);
4619 assert_eq!(in_dir_b_entry.attr.st_mode & 0o777, !BY_PATH_UMASK & 0o777);
4620 }
4621
4622 #[test]
4623 #[cfg(feature = "arc_quota")]
set_path_xattr_ioctl_valid_data()4624 fn set_path_xattr_ioctl_valid_data() {
4625 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4626 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4627 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4628 let _guard = lock.lock().expect("acquire named lock");
4629
4630 let cfg: Config = Config {
4631 max_dynamic_xattr: 1,
4632 ..Default::default()
4633 };
4634 let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
4635
4636 let path_string = String::from("/test");
4637 let xattr_name_string = String::from("test_name");
4638 let xattr_value_string = String::from("test_value");
4639 let fs_path_xattr_data_buffer = FsPathXattrDataBuffer {
4640 path: {
4641 let mut path: [u8; FS_IOCTL_PATH_MAX_LEN] = [0; FS_IOCTL_PATH_MAX_LEN];
4642 path[..path_string.len()].copy_from_slice(path_string.as_bytes());
4643 path
4644 },
4645 xattr_name: {
4646 let mut xattr_name: [u8; FS_IOCTL_XATTR_NAME_MAX_LEN] =
4647 [0; FS_IOCTL_XATTR_NAME_MAX_LEN];
4648 xattr_name[..xattr_name_string.len()].copy_from_slice(xattr_name_string.as_bytes());
4649 xattr_name
4650 },
4651 xattr_value: {
4652 let mut xattr_value: [u8; FS_IOCTL_XATTR_VALUE_MAX_LEN] =
4653 [0; FS_IOCTL_XATTR_VALUE_MAX_LEN];
4654 xattr_value[..xattr_value_string.len()]
4655 .copy_from_slice(xattr_value_string.as_bytes());
4656 xattr_value
4657 },
4658 };
4659 let r = std::io::Cursor::new(fs_path_xattr_data_buffer.as_bytes());
4660
4661 let res = fs_ioc_setpathxattr(
4662 &p,
4663 mem::size_of_val(&fs_path_xattr_data_buffer) as u32,
4664 r.clone(),
4665 )
4666 .expect("valid input should get IoctlReply");
4667 assert!(matches!(res, IoctlReply::Done(Ok(data)) if data.is_empty()));
4668
4669 let read_guard = p.xattr_paths.read().expect("read xattr_paths failed");
4670 let xattr_data = read_guard.first().expect("xattr_paths should not be empty");
4671
4672 // Check expected data item is added to permission_paths.
4673 let expected_data = XattrData {
4674 xattr_path: path_string,
4675 xattr_name: xattr_name_string,
4676 xattr_value: xattr_value_string,
4677 };
4678 assert_eq!(*xattr_data, expected_data);
4679
4680 // Second ioctl should not succeed since max_dynamic_perm is set to 1
4681 let res = fs_ioc_setpathxattr(
4682 &p,
4683 mem::size_of_val(&fs_path_xattr_data_buffer) as u32,
4684 r.clone(),
4685 )
4686 .expect("valid input should get IoctlReply");
4687 assert!(
4688 matches!(res, IoctlReply::Done(Err(err)) if err.raw_os_error().is_some_and(|errno| {
4689 errno == libc::EPERM
4690 }))
4691 );
4692 }
4693 #[test]
4694 #[cfg(feature = "arc_quota")]
set_path_xattr_ioctl_invalid_data()4695 fn set_path_xattr_ioctl_invalid_data() {
4696 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4697 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4698 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4699 let _guard = lock.lock().expect("acquire named lock");
4700
4701 let cfg: Config = Config {
4702 max_dynamic_xattr: 1,
4703 ..Default::default()
4704 };
4705 let p = PassthroughFs::new("tag", cfg).expect("Failed to create PassthroughFs");
4706
4707 let path_string = String::from("test");
4708 let xattr_name_string = String::from("test_name");
4709 let xattr_value_string = String::from("test_value");
4710 let fs_path_xattr_data_buffer = FsPathXattrDataBuffer {
4711 path: {
4712 let mut path: [u8; FS_IOCTL_PATH_MAX_LEN] = [0; FS_IOCTL_PATH_MAX_LEN];
4713 path[..path_string.len()].copy_from_slice(path_string.as_bytes());
4714 path
4715 },
4716 xattr_name: {
4717 let mut xattr_name: [u8; FS_IOCTL_XATTR_NAME_MAX_LEN] =
4718 [0; FS_IOCTL_XATTR_NAME_MAX_LEN];
4719 xattr_name[..xattr_name_string.len()].copy_from_slice(xattr_name_string.as_bytes());
4720 xattr_name
4721 },
4722 xattr_value: {
4723 let mut xattr_value: [u8; FS_IOCTL_XATTR_VALUE_MAX_LEN] =
4724 [0; FS_IOCTL_XATTR_VALUE_MAX_LEN];
4725 xattr_value[..xattr_value_string.len()]
4726 .copy_from_slice(xattr_value_string.as_bytes());
4727 xattr_value
4728 },
4729 };
4730 let r = std::io::Cursor::new(fs_path_xattr_data_buffer.as_bytes());
4731
4732 // This call is supposed to get EINVAL ioctlReply, since the perm_path is invalid.
4733 let res = fs_ioc_setpathxattr(
4734 &p,
4735 mem::size_of_val(&fs_path_xattr_data_buffer) as u32,
4736 r.clone(),
4737 )
4738 .expect("valid input should get IoctlReply");
4739 assert!(
4740 matches!(res, IoctlReply::Done(Err(err)) if err.raw_os_error().is_some_and(|errno| {
4741 errno == libc::EINVAL
4742 }))
4743 );
4744
4745 let fake_data_buffer: [u8; 128] = [0; 128];
4746 let r = std::io::Cursor::new(fake_data_buffer.as_bytes());
4747 // This call is supposed to get EINVAL ioctlReply, since the in_size is not the size of
4748 // struct FsPathXattrDataBuffer.
4749 let res = fs_ioc_setpathxattr(&p, mem::size_of_val(&fake_data_buffer) as u32, r.clone())
4750 .expect_err("valid input should get IoctlReply");
4751 assert!(res
4752 .raw_os_error()
4753 .is_some_and(|errno| { errno == libc::EINVAL }));
4754 }
4755
4756 #[test]
4757 #[cfg(feature = "arc_quota")]
xattr_data_path_matching()4758 fn xattr_data_path_matching() {
4759 let ctx = get_context();
4760 let temp_dir = TempDir::new().unwrap();
4761 // Prepare `a.txt` before starting the test.
4762 create_test_data(&temp_dir, &["dir"], &["a.txt", "dir/a.txt"]);
4763
4764 let cfg = Config {
4765 max_dynamic_xattr: 1,
4766 ..Default::default()
4767 };
4768 let fs = PassthroughFs::new("tag", cfg).unwrap();
4769
4770 let capable = FsOptions::empty();
4771 fs.init(capable).unwrap();
4772
4773 let dir_path = temp_dir.path().join("dir");
4774 let xattr_name_string = String::from("test_name");
4775 let xattr_name_cstring = CString::new(xattr_name_string.clone()).expect("create c string");
4776 let xattr_value_string = String::from("test_value");
4777 let xattr_value_bytes = xattr_value_string.clone().into_bytes();
4778
4779 let xattr_data = XattrData {
4780 xattr_name: xattr_name_string,
4781 xattr_value: xattr_value_string,
4782 xattr_path: dir_path.to_string_lossy().into_owned(),
4783 };
4784 fs.xattr_paths
4785 .write()
4786 .expect("xattr_paths lock must be acquired")
4787 .push(xattr_data);
4788
4789 // a_path is the path with out set xattr by path
4790 let a_path: std::path::PathBuf = temp_dir.path().join("a.txt");
4791 let in_dir_a_path = dir_path.join("a.txt");
4792
4793 let a_node = lookup(&fs, a_path.as_path()).expect("lookup a node");
4794 // a.txt should not be set with xattr by path
4795 assert!(fs
4796 .getxattr(
4797 ctx,
4798 a_node,
4799 &xattr_name_cstring,
4800 xattr_value_bytes.len() as u32
4801 )
4802 .is_err());
4803
4804 let in_dir_a_node = lookup(&fs, in_dir_a_path.as_path()).expect("lookup in dir a node");
4805 // a.txt in dir should be set xattr by path
4806 let in_dir_a_reply = fs
4807 .getxattr(
4808 ctx,
4809 in_dir_a_node,
4810 &xattr_name_cstring,
4811 xattr_value_bytes.len() as u32,
4812 )
4813 .expect("Getxattr should success");
4814 assert!(matches!(in_dir_a_reply, GetxattrReply::Value(v) if v == xattr_value_bytes));
4815 // Create dir/b.txt.
4816 let in_dir_b_path = dir_path.join("b.txt");
4817 create(&fs, &in_dir_b_path).expect("create b.txt");
4818
4819 // newly created b.txt in dir should be set xattr by path
4820 let in_dir_b_node = lookup(&fs, in_dir_a_path.as_path()).expect("lookup in dir b node");
4821 let in_dir_b_reply = fs
4822 .getxattr(
4823 ctx,
4824 in_dir_b_node,
4825 &xattr_name_cstring,
4826 xattr_value_bytes.len() as u32,
4827 )
4828 .expect("Getxattr should success");
4829 assert!(matches!(in_dir_b_reply, GetxattrReply::Value(v) if v == xattr_value_bytes));
4830 }
4831
4832 /// Creates and open a new file by atomic_open with O_APPEND flag.
4833 /// We check O_APPEND is properly handled, depending on writeback cache is enabled or not.
atomic_open_create_o_append(writeback: bool)4834 fn atomic_open_create_o_append(writeback: bool) {
4835 // Since PassthroughFs may executes process-wide operations such as `fchdir`, acquire
4836 // `NamedLock` before starting each unit test creating a `PassthroughFs` instance.
4837 let lock = NamedLock::create(UNITTEST_LOCK_NAME).expect("create named lock");
4838 let _guard = lock.lock().expect("acquire named lock");
4839
4840 let temp_dir = TempDir::new().unwrap();
4841
4842 let cfg = Config {
4843 cache_policy: CachePolicy::Always,
4844 writeback,
4845 ..Default::default()
4846 };
4847 let fs = PassthroughFs::new("tag", cfg).unwrap();
4848
4849 let capable = FsOptions::ZERO_MESSAGE_OPEN | FsOptions::WRITEBACK_CACHE;
4850 fs.init(capable).unwrap();
4851
4852 let (entry, _, _) = atomic_open(
4853 &fs,
4854 &temp_dir.path().join("a.txt"),
4855 0o666,
4856 (libc::O_RDWR | libc::O_CREAT | libc::O_APPEND) as u32,
4857 0,
4858 None,
4859 )
4860 .expect("atomic_open");
4861 assert_ne!(entry.inode, 0);
4862
4863 let inodes = fs.inodes.lock();
4864 let data = inodes.get(&entry.inode).unwrap();
4865 let flags = data.file.lock().1;
4866 if writeback {
4867 // When writeback is enabled, O_APPEND must be handled by the guest kernel.
4868 // So, it must be cleared.
4869 assert_eq!(flags & libc::O_APPEND, 0);
4870 } else {
4871 // Without writeback cache, O_APPEND must not be cleared.
4872 assert_eq!(flags & libc::O_APPEND, libc::O_APPEND);
4873 }
4874 }
4875
4876 #[test]
test_atomic_open_create_o_append_no_writeback()4877 fn test_atomic_open_create_o_append_no_writeback() {
4878 atomic_open_create_o_append(false);
4879 }
4880
4881 #[test]
test_atomic_open_create_o_append_writeback()4882 fn test_atomic_open_create_o_append_writeback() {
4883 atomic_open_create_o_append(true);
4884 }
4885 }
4886