From 44336b913126d73f9f8d6854f57aac92b5db809e Mon Sep 17 00:00:00 2001 From: Chirantan Ekbote Date: Tue, 23 Jun 2020 17:12:03 +0900 Subject: [PATCH] devices: fs: Fix posix acl handling Posix acls are a truly incredible example of API design. The presence of a default posix acl in a directory completely changes the meaning of the `mode` parameter for all system call that create inodes. However, this new behavior only applies when the inode is first created and not for any subsequent operations that use the mode, like fchmod. When a directory has a default posix acl, all inodes created in that directory get the permissions specified in the default acl. The mode parameter is treated like a umask where any permissions allowed by the default acl that are not allowed by the mode parameter are blocked. The actual umask is ignored in this case. So to handle this properly we need to set FUSE_DONT_MASK to prevent the kernel driver from preemptively applying the umask. Then we have to check if the parent directory has a default posix acl and only apply the umask to the mode if it does not. This also means that we cannot use `mkdtemp` because that always creates directories with a mode of 0o700 and since the default posix acl calculation only applies on creation and not on later operations, we need to apply the proper mode in the very beginning. BUG=b:159285544,b:152806644 TEST=vm.Virtiofs. Use a test program to create files/directories in directories that have a default acl and ones that don't, and verify that the mode is correctly set after creation Change-Id: Ieca8ac9db391feebe5719630c5f3b57b04b71533 Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/2260253 Tested-by: kokoro Reviewed-by: Daniel Verkamp Commit-Queue: Chirantan Ekbote Auto-Submit: Chirantan Ekbote --- Cargo.lock | 1 + devices/Cargo.toml | 1 + devices/src/virtio/fs/passthrough.rs | 255 ++++++++++++++++++--------- seccomp/aarch64/fs_device.policy | 1 + seccomp/arm/fs_device.policy | 1 + seccomp/x86_64/fs_device.policy | 1 + 6 files changed, 180 insertions(+), 80 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6958898a52..3b26eed08d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -194,6 +194,7 @@ dependencies = [ "net_util 0.1.0", "p9 0.1.0", "protos 0.1.0", + "rand_ish 0.1.0", "remain 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "resources 0.1.0", "sync 0.1.0", diff --git a/devices/Cargo.toml b/devices/Cargo.toml index 1d205c1699..cb2efd9a07 100644 --- a/devices/Cargo.toml +++ b/devices/Cargo.toml @@ -39,6 +39,7 @@ net_sys = { path = "../net_sys" } net_util = { path = "../net_util" } p9 = { path = "../p9" } protos = { path = "../protos", optional = true } +rand_ish = { path = "../rand_ish" } remain = "*" resources = { path = "../resources" } sync = { path = "../sync" } diff --git a/devices/src/virtio/fs/passthrough.rs b/devices/src/virtio/fs/passthrough.rs index d5628eb636..4741549ac4 100644 --- a/devices/src/virtio/fs/passthrough.rs +++ b/devices/src/virtio/fs/passthrough.rs @@ -5,7 +5,7 @@ use std::borrow::Cow; use std::collections::btree_map; use std::collections::BTreeMap; -use std::ffi::{CStr, CString}; +use std::ffi::{c_void, CStr, CString}; use std::fs::File; use std::io; use std::mem::{self, size_of, MaybeUninit}; @@ -17,6 +17,7 @@ use std::sync::Arc; use std::time::Duration; use data_model::DataInit; +use rand_ish::SimpleRng; use sync::Mutex; use sys_util::{error, ioctl_ior_nr, ioctl_iow_nr, ioctl_with_mut_ptr, ioctl_with_ptr}; @@ -887,48 +888,77 @@ fn strip_xattr_prefix(buf: &mut Vec) { } } +// Like mkdtemp but also takes a mode parameter rather than always using 0o700. This is needed +// because if the parent has a default posix acl set then the meaning of the mode parameter in the +// mkdir call completely changes: the actual mode is inherited from the default acls set in the +// parent and the mode is treated like a umask (the real umask is ignored in this case). +// Additionally, this only happens when the inode is first created and not on subsequent fchmod +// calls so we really need to use the requested mode from the very beginning and not the default +// 0o700 mode that mkdtemp uses. +fn create_temp_dir(parent: &File, mode: libc::mode_t) -> io::Result { + const MAX_ATTEMPTS: usize = 64; + let mut seed = 0u64.to_ne_bytes(); + // Safe because this will only modify `seed` and we check the return value. + let ret = unsafe { + libc::syscall( + libc::SYS_getrandom, + seed.as_mut_ptr() as *mut c_void, + seed.len(), + 0, + ) + }; + if ret < 0 { + return Err(io::Error::last_os_error()); + } + + let mut rng = SimpleRng::new(u64::from_ne_bytes(seed)); + + // Set an upper bound so that we don't end up spinning here forever. + for _ in 0..MAX_ATTEMPTS { + let mut name = String::from("."); + name.push_str(&rng.str(6)); + let name = CString::new(name).expect("SimpleRng produced string with nul-bytes"); + + // Safe because this doesn't modify any memory and we check the return value. + let ret = unsafe { libc::mkdirat(parent.as_raw_fd(), name.as_ptr(), mode) }; + if ret == 0 { + return Ok(name); + } + + let e = io::Error::last_os_error(); + if let Some(libc::EEXIST) = e.raw_os_error() { + continue; + } else { + return Err(e); + } + } + + Err(io::Error::from_raw_os_error(libc::EAGAIN)) +} + // A temporary directory that is automatically deleted when dropped unless `into_inner()` is called. // This isn't a general-purpose temporary directory and is only intended to be used to ensure that // there are no leaks when initializing a newly created directory with the correct metadata (see the // implementation of `mkdir()` below). The directory is removed via a call to `unlinkat` so callers // are not allowed to actually populate this temporary directory with any entries (or else deleting // the directory will fail). -struct TempDir { - path: CString, +struct TempDir<'a> { + parent: &'a File, + name: CString, file: File, } -impl TempDir { - // Creates a new temporary directory in `path` with a randomly generated name. `path` must be a - // directory. - fn new(path: CString) -> io::Result { - let mut template = path.into_bytes(); - - // mkdtemp requires that the last 6 bytes are XXXXXX. We're not using PathBuf here because - // the round-trip from Vec to PathBuf and back is really tedious due to Windows/Unix - // differences. - template.extend(b"/.XXXXXX"); - - // The only way this can cause an error is if the caller passed in a malformed CString. - let ptr = CString::new(template) - .map(CString::into_raw) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidInput, e))?; - - // Safe because this will only modify `ptr`. - let ret = unsafe { libc::mkdtemp(ptr) }; - - // Safe because this pointer was originally created by a call to `CString::into_raw`. - let tmpdir = unsafe { CString::from_raw(ptr) }; - - if ret.is_null() { - return Err(io::Error::last_os_error()); - } +impl<'a> TempDir<'a> { + // Creates a new temporary directory in `parent` with a randomly generated name. `parent` must + // be a directory. + fn new(parent: &File, mode: libc::mode_t) -> io::Result { + let name = create_temp_dir(parent, mode)?; // Safe because this doesn't modify any memory and we check the return value. let fd = unsafe { libc::openat( - libc::AT_FDCWD, - tmpdir.as_ptr(), + parent.as_raw_fd(), + name.as_ptr(), libc::O_DIRECTORY | libc::O_CLOEXEC, ) }; @@ -937,14 +967,15 @@ impl TempDir { } Ok(TempDir { - path: tmpdir, + parent, + name, // Safe because we just opened this fd. file: unsafe { File::from_raw_fd(fd) }, }) } - fn path(&self) -> &CStr { - &self.path + fn basename(&self) -> &CStr { + &self.name } // Consumes the `TempDir`, returning the inner `File` without deleting the temporary @@ -952,27 +983,31 @@ impl TempDir { fn into_inner(self) -> (CString, File) { // Safe because this is a valid pointer and we are going to call `mem::forget` on `self` so // we will not be aliasing memory. - let path = unsafe { ptr::read(&self.path) }; + let _parent = unsafe { ptr::read(&self.parent) }; + let name = unsafe { ptr::read(&self.name) }; let file = unsafe { ptr::read(&self.file) }; mem::forget(self); - (path, file) + (name, file) } } -impl AsRawFd for TempDir { +impl<'a> AsRawFd for TempDir<'a> { fn as_raw_fd(&self) -> RawFd { self.file.as_raw_fd() } } -impl Drop for TempDir { +impl<'a> Drop for TempDir<'a> { fn drop(&mut self) { - // There is no `AT_EMPTY_PATH` equivalent for `unlinkat` and using "." as the path returns - // EINVAL. So we have no choice but to use the real path. Safe because this doesn't modify - // any memory and we check the return value. - let ret = - unsafe { libc::unlinkat(libc::AT_FDCWD, self.path().as_ptr(), libc::AT_REMOVEDIR) }; + // Safe because this doesn't modify any memory and we check the return value. + let ret = unsafe { + libc::unlinkat( + self.parent.as_raw_fd(), + self.name.as_ptr(), + libc::AT_REMOVEDIR, + ) + }; if ret < 0 { println!("Failed to remove tempdir: {}", io::Error::last_os_error()); error!("Failed to remove tempdir: {}", io::Error::last_os_error()); @@ -980,6 +1015,25 @@ impl Drop for TempDir { } } +// Checks whether `path` has a default posix acl xattr. +fn has_default_posix_acl(path: &CStr) -> io::Result { + // Safe because this is a valid c string with no interior nul-bytes. + let acl = unsafe { CStr::from_bytes_with_nul_unchecked(b"system.posix_acl_default\0") }; + + // Safe because this doesn't modify any memory and we check the return value. + let res = unsafe { libc::lgetxattr(path.as_ptr(), acl.as_ptr(), ptr::null_mut(), 0) }; + + if res < 0 { + let err = io::Error::last_os_error(); + match err.raw_os_error() { + Some(libc::ENODATA) | Some(libc::EOPNOTSUPP) => Ok(false), + _ => Err(err), + } + } else { + Ok(true) + } +} + impl FileSystem for PassthroughFs { type Inode = Inode; type Handle = Handle; @@ -1031,6 +1085,7 @@ impl FileSystem for PassthroughFs { let mut opts = FsOptions::DO_READDIRPLUS | FsOptions::READDIRPLUS_AUTO | FsOptions::EXPORT_SUPPORT + | FsOptions::DONT_MASK | FsOptions::POSIX_ACL; if self.cfg.writeback && capable.contains(FsOptions::WRITEBACK_CACHE) { opts |= FsOptions::WRITEBACK_CACHE; @@ -1106,8 +1161,8 @@ impl FileSystem for PassthroughFs { ctx: Context, parent: Inode, name: &CStr, - mode: u32, - _umask: u32, + mut mode: u32, + umask: u32, ) -> io::Result { // This method has the same issues as `create()`: namely that the kernel may have allowed a // process to make a directory due to one of its supplementary groups but that information @@ -1124,7 +1179,15 @@ impl FileSystem for PassthroughFs { .map(Arc::clone) .ok_or_else(ebadf)?; - let tmpdir = self.get_path(parent).and_then(TempDir::new)?; + let path = self.get_path(parent)?; + + // The presence of a default posix acl xattr in the parent directory completely changes the + // meaning of the mode parameter so only apply the umask if it doesn't have one. + if !has_default_posix_acl(&path)? { + mode &= !umask; + } + + let tmpdir = TempDir::new(&data.file, mode)?; // We need to respect the setgid bit in the parent directory if it is set. let st = stat(&data.file)?; @@ -1141,20 +1204,14 @@ impl FileSystem for PassthroughFs { return Err(io::Error::last_os_error()); } - // Set the mode. Safe because this doesn't modify any memory and we check the return value. - let ret = unsafe { libc::fchmod(tmpdir.as_raw_fd(), mode) }; - if ret < 0 { - return Err(io::Error::last_os_error()); - } - // Now rename it into place. Safe because this doesn't modify any memory and we check the // return value. TODO: Switch to libc::renameat2 once // https://github.com/rust-lang/libc/pull/1508 lands and we have glibc 2.28. let ret = unsafe { libc::syscall( libc::SYS_renameat2, - libc::AT_FDCWD, - tmpdir.path().as_ptr(), + data.file.as_raw_fd(), + tmpdir.basename().as_ptr(), data.file.as_raw_fd(), name.as_ptr(), libc::RENAME_NOREPLACE, @@ -1265,9 +1322,9 @@ impl FileSystem for PassthroughFs { ctx: Context, parent: Inode, name: &CStr, - mode: u32, + mut mode: u32, flags: u32, - _umask: u32, + umask: u32, ) -> io::Result<(Entry, Option, OpenOptions)> { // The `Context` may not contain all the information we need to create the file here. For // example, a process may be part of several groups, one of which gives it permission to @@ -1297,6 +1354,13 @@ impl FileSystem for PassthroughFs { tmpflags |= libc::O_RDWR; } + // The presence of a default posix acl xattr in the parent directory completely changes the + // meaning of the mode parameter so only apply the umask if it doesn't have one. + let path = self.get_path(parent)?; + if !has_default_posix_acl(&path)? { + mode &= !umask; + } + // Safe because this is a valid c string. let current_dir = unsafe { CStr::from_bytes_with_nul_unchecked(b".\0") }; @@ -1612,11 +1676,19 @@ impl FileSystem for PassthroughFs { ctx: Context, parent: Inode, name: &CStr, - mode: u32, + mut mode: u32, rdev: u32, - _umask: u32, + umask: u32, ) -> io::Result { let (_uid, _gid) = set_creds(ctx.uid, ctx.gid)?; + + // The presence of a default posix acl xattr in the parent directory completely changes the + // meaning of the mode parameter so only apply the umask if it doesn't have one. + let path = self.get_path(parent)?; + if !has_default_posix_acl(&path)? { + mode &= !umask; + } + let data = self .inodes .lock() @@ -2139,7 +2211,7 @@ mod tests { use std::env; use std::os::unix::ffi::OsStringExt; - use std::path::{Path, PathBuf}; + use std::path::PathBuf; #[test] fn padded_cstrings() { @@ -2164,41 +2236,64 @@ mod tests { #[test] fn create_temp_dir() { - let t = TempDir::new( - CString::new(env::temp_dir().into_os_string().into_vec()) - .expect("env::temp_dir() is not a valid c-string"), - ) - .expect("Failed to create temporary directory"); + let testdir = CString::new(env::temp_dir().into_os_string().into_vec()) + .expect("env::temp_dir() is not a valid c-string"); + let fd = unsafe { + libc::openat( + libc::AT_FDCWD, + testdir.as_ptr(), + libc::O_PATH | libc::O_CLOEXEC, + ) + }; + assert!(fd >= 0, "Failed to open env::temp_dir()"); + let parent = unsafe { File::from_raw_fd(fd) }; + let t = TempDir::new(&parent, 0o755).expect("Failed to create temporary directory"); - let cstr = t.path().to_string_lossy(); - let path = Path::new(&*cstr); + let basename = t.basename().to_string_lossy(); + let path = PathBuf::from(env::temp_dir()).join(&*basename); assert!(path.exists()); assert!(path.is_dir()); } #[test] fn remove_temp_dir() { - let t = TempDir::new( - CString::new(env::temp_dir().into_os_string().into_vec()) - .expect("env::temp_dir() is not a valid c-string"), - ) - .expect("Failed to create temporary directory"); - let cstr = t.path().to_string_lossy(); - let path = PathBuf::from(&*cstr); + let testdir = CString::new(env::temp_dir().into_os_string().into_vec()) + .expect("env::temp_dir() is not a valid c-string"); + let fd = unsafe { + libc::openat( + libc::AT_FDCWD, + testdir.as_ptr(), + libc::O_PATH | libc::O_CLOEXEC, + ) + }; + assert!(fd >= 0, "Failed to open env::temp_dir()"); + let parent = unsafe { File::from_raw_fd(fd) }; + let t = TempDir::new(&parent, 0o755).expect("Failed to create temporary directory"); + + let basename = t.basename().to_string_lossy(); + let path = PathBuf::from(env::temp_dir()).join(&*basename); mem::drop(t); assert!(!path.exists()); } #[test] fn temp_dir_into_inner() { - let t = TempDir::new( - CString::new(env::temp_dir().into_os_string().into_vec()) - .expect("env::temp_dir() is not a valid c-string"), - ) - .expect("Failed to create temporary directory"); - let (cstr, _) = t.into_inner(); - let cow_str = cstr.to_string_lossy(); - let path = Path::new(&*cow_str); + let testdir = CString::new(env::temp_dir().into_os_string().into_vec()) + .expect("env::temp_dir() is not a valid c-string"); + let fd = unsafe { + libc::openat( + libc::AT_FDCWD, + testdir.as_ptr(), + libc::O_PATH | libc::O_CLOEXEC, + ) + }; + assert!(fd >= 0, "Failed to open env::temp_dir()"); + let parent = unsafe { File::from_raw_fd(fd) }; + let t = TempDir::new(&parent, 0o755).expect("Failed to create temporary directory"); + + let (basename_cstr, _) = t.into_inner(); + let basename = basename_cstr.to_string_lossy(); + let path = PathBuf::from(env::temp_dir()).join(&*basename); assert!(path.exists()); } diff --git a/seccomp/aarch64/fs_device.policy b/seccomp/aarch64/fs_device.policy index 52b1e8e27c..7f636c4f87 100644 --- a/seccomp/aarch64/fs_device.policy +++ b/seccomp/aarch64/fs_device.policy @@ -22,6 +22,7 @@ ftruncate: 1 getdents64: 1 getegid: 1 geteuid: 1 +getrandom: 1 ioctl: arg1 == FS_IOC_GET_ENCRYPTION_POLICY || arg1 == FS_IOC_SET_ENCRYPTION_POLICY || arg1 == FS_IOC_FSGETXATTR || arg1 == FS_IOC_FSSETXATTR linkat: 1 lseek: 1 diff --git a/seccomp/arm/fs_device.policy b/seccomp/arm/fs_device.policy index 1bc35fe007..599375cee2 100644 --- a/seccomp/arm/fs_device.policy +++ b/seccomp/arm/fs_device.policy @@ -22,6 +22,7 @@ ftruncate64: 1 getdents64: 1 getegid32: 1 geteuid32: 1 +getrandom: 1 ioctl: arg1 == FS_IOC_GET_ENCRYPTION_POLICY || arg1 == FS_IOC_SET_ENCRYPTION_POLICY || arg1 == FS_IOC_FSGETXATTR || arg1 == FS_IOC_FSSETXATTR linkat: 1 _llseek: 1 diff --git a/seccomp/x86_64/fs_device.policy b/seccomp/x86_64/fs_device.policy index 884470ad91..0a6c18d6f8 100644 --- a/seccomp/x86_64/fs_device.policy +++ b/seccomp/x86_64/fs_device.policy @@ -21,6 +21,7 @@ ftruncate: 1 getdents64: 1 getegid: 1 geteuid: 1 +getrandom: 1 ioctl: arg1 == FS_IOC_GET_ENCRYPTION_POLICY || arg1 == FS_IOC_SET_ENCRYPTION_POLICY || arg1 == FS_IOC_FSGETXATTR || arg1 == FS_IOC_FSSETXATTR linkat: 1 lseek: 1