From f67ee911d98cca8b996bd696a469bf162fac38f5 Mon Sep 17 00:00:00 2001 From: Keiichi Watanabe Date: Mon, 21 Oct 2024 03:47:36 +0000 Subject: [PATCH] devices: fs: Allow running virito-fs without root We usually requires the root permission or sandboxing to run virtiofs to do bind-mount and pivot_root inside of a mount namespace. This CL allows non-root user wherein "root_dir" defaults to the path provided via "--shared-dir". This override is currently enabled when "fs_runtime_ugid_map" feature is enabled. Bug=b:340940950 Test: Launch Terminal App with virtiofs enabled. Verify virtiofs mounts on guest and run basic I/O tests. Change-Id: Icc0065cb11dd919f473e5d3ee994e7f8d679ecd0 Signed-off-by: Akilesh Kailash Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/5945445 Reviewed-by: Keiichi Watanabe --- devices/src/virtio/fs/config.rs | 4 +- devices/src/virtio/fs/passthrough.rs | 118 +++++++++++++++--- devices/src/virtio/vhost/user/device/fs.rs | 28 ++++- .../virtio/vhost/user/device/fs/sys/linux.rs | 34 +++-- e2e_tests/tests/fs.rs | 109 ++++++++++++++++ jail/src/helpers.rs | 40 +++++- 6 files changed, 304 insertions(+), 29 deletions(-) diff --git a/devices/src/virtio/fs/config.rs b/devices/src/virtio/fs/config.rs index 88e810e262..0f4aa91983 100644 --- a/devices/src/virtio/fs/config.rs +++ b/devices/src/virtio/fs/config.rs @@ -98,7 +98,7 @@ pub struct PermissionData { /// umask to be set at runtime for the files in the path. pub umask: libc::mode_t, - /// This is the relative path from the root of the shared directory. + /// This is the absolute path from the root of the shared directory. pub perm_path: String, } @@ -338,7 +338,7 @@ pub struct Config { // host-uid: UID to be set for all the files in the path in the host. // host-gid: GID to be set for all the files in the path in the host. // umask: umask to be set at runtime for the files in the path. - // path: This is the relative path from the root of the shared directory. + // path: This is the absolute path from the root of the shared directory. // // This follows similar format to ARCVM IOCTL "FS_IOC_SETPERMISSION" #[cfg(feature = "fs_runtime_ugid_map")] diff --git a/devices/src/virtio/fs/passthrough.rs b/devices/src/virtio/fs/passthrough.rs index 15eecd4eb9..2f71dd83f7 100644 --- a/devices/src/virtio/fs/passthrough.rs +++ b/devices/src/virtio/fs/passthrough.rs @@ -9,6 +9,8 @@ use std::collections::btree_map; use std::collections::BTreeMap; use std::ffi::CStr; use std::ffi::CString; +#[cfg(feature = "fs_runtime_ugid_map")] +use std::ffi::OsStr; use std::fs::File; use std::io; use std::mem; @@ -16,6 +18,10 @@ use std::mem::size_of; use std::mem::MaybeUninit; use std::os::raw::c_int; use std::os::raw::c_long; +#[cfg(feature = "fs_runtime_ugid_map")] +use std::os::unix::ffi::OsStrExt; +#[cfg(feature = "fs_runtime_ugid_map")] +use std::path::Path; use std::ptr; use std::ptr::addr_of; use std::ptr::addr_of_mut; @@ -93,7 +99,6 @@ use crate::virtio::fs::multikey::MultikeyBTreeMap; use crate::virtio::fs::read_dir::ReadDir; const EMPTY_CSTR: &[u8] = b"\0"; -const ROOT_CSTR: &[u8] = b"/\0"; const PROC_CSTR: &[u8] = b"/proc\0"; const UNLABELED_CSTR: &[u8] = b"unlabeled\0"; @@ -730,6 +735,16 @@ pub struct PassthroughFs { xattr_paths: RwLock>, cfg: Config, + + // Set the root directory when pivot root isn't enabled for jailed process. + // + // virtio-fs typically uses mount namespaces and pivot_root for file system isolation, + // making the jailed process's root directory "/". + // + // However, Android's security model prevents crosvm from having the necessary SYS_ADMIN + // capability for mount namespaces and pivot_root. This lack of isolation means that + // root_dir defaults to the path provided via "--shared-dir". + root_dir: String, } impl std::fmt::Debug for PassthroughFs { @@ -813,6 +828,7 @@ impl PassthroughFs { #[cfg(feature = "arc_quota")] xattr_paths: RwLock::new(Vec::new()), cfg, + root_dir: "/".to_string(), }; #[cfg(feature = "fs_runtime_ugid_map")] @@ -837,6 +853,21 @@ impl PassthroughFs { } } + #[cfg(feature = "fs_runtime_ugid_map")] + pub fn set_root_dir(&mut self, shared_dir: String) -> io::Result<()> { + let canonicalized_root = match std::fs::canonicalize(shared_dir) { + Ok(path) => path, + Err(e) => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + format!("Failed to canonicalize root_dir: {}", e), + )); + } + }; + self.root_dir = canonicalized_root.to_string_lossy().to_string(); + Ok(()) + } + pub fn cfg(&self) -> &Config { &self.cfg } @@ -1704,9 +1735,12 @@ impl PassthroughFs { #[cfg(feature = "fs_runtime_ugid_map")] impl PassthroughFs { - /// Set permission according to path - fn set_ugid_permission(&self, st: &mut libc::stat64, path: &str) { - let is_root_path = path.is_empty(); + fn find_and_set_ugid_permission( + &self, + st: &mut libc::stat64, + path: &str, + is_root_path: bool, + ) -> bool { for perm_data in self .permission_paths .read() @@ -1718,12 +1752,36 @@ impl PassthroughFs { && perm_data.perm_path != "/" && perm_data.need_set_permission(path)) { - st.st_uid = perm_data.guest_uid; - st.st_gid = perm_data.guest_gid; - st.st_mode = (st.st_mode & libc::S_IFMT) | (0o777 & !perm_data.umask); - return; + self.set_permission_from_data(st, perm_data); + return true; } } + false + } + + fn set_permission_from_data(&self, st: &mut libc::stat64, perm_data: &PermissionData) { + st.st_uid = perm_data.guest_uid; + st.st_gid = perm_data.guest_gid; + st.st_mode = (st.st_mode & libc::S_IFMT) | (0o777 & !perm_data.umask); + } + + /// Set permission according to path + fn set_ugid_permission(&self, st: &mut libc::stat64, path: &str) { + let is_root_path = path.is_empty(); + + if self.find_and_set_ugid_permission(st, path, is_root_path) { + return; + } + + if let Some(perm_data) = self + .permission_paths + .read() + .expect("acquire permission_paths read lock") + .iter() + .find(|pd| pd.perm_path == "/") + { + self.set_permission_from_data(st, perm_data); + } } /// Set host uid/gid to configured value according to path @@ -1735,6 +1793,25 @@ impl PassthroughFs { ); let is_root_path = path.is_empty(); + + if self.find_ugid_creds_for_path(&path, is_root_path).is_some() { + return self.find_ugid_creds_for_path(&path, is_root_path).unwrap(); + } + + if let Some(perm_data) = self + .permission_paths + .read() + .expect("acquire permission_paths read lock") + .iter() + .find(|pd| pd.perm_path == "/") + { + return (perm_data.host_uid, perm_data.host_gid); + } + + (ctx.uid, ctx.gid) + } + + fn find_ugid_creds_for_path(&self, path: &str, is_root_path: bool) -> Option<(u32, u32)> { for perm_data in self .permission_paths .read() @@ -1744,13 +1821,12 @@ impl PassthroughFs { if (is_root_path && perm_data.perm_path == "/") || (!is_root_path && perm_data.perm_path != "/" - && perm_data.need_set_permission(&path)) + && perm_data.need_set_permission(path)) { - return (perm_data.host_uid, perm_data.host_gid); + return Some((perm_data.host_uid, perm_data.host_gid)); } } - - (ctx.uid, ctx.gid) + None } } @@ -2053,9 +2129,10 @@ impl FileSystem for PassthroughFs { type DirIter = ReadDir>; fn init(&self, capable: FsOptions) -> io::Result { - // SAFETY: this is a constant value that is a nul-terminated string without interior - // nul bytes. - let root = unsafe { CStr::from_bytes_with_nul_unchecked(ROOT_CSTR) }; + let mut root_str_with_null: Vec = self.root_dir.clone().into_bytes(); + root_str_with_null.push(0u8); + // SAFETY: this is a nul-terminated string without interior nul bytes. + let root = unsafe { CStr::from_bytes_with_nul_unchecked(&root_str_with_null) }; let flags = libc::O_DIRECTORY | libc::O_NOFOLLOW | libc::O_CLOEXEC; // SAFETY: this doesn't modify any memory and we check the return value. @@ -2916,6 +2993,17 @@ impl FileSystem for PassthroughFs { })?; buf.resize(res as usize, 0); + + #[cfg(feature = "fs_runtime_ugid_map")] + { + let link_target = Path::new(OsStr::from_bytes(&buf[..res as usize])); + if !link_target.starts_with(&self.root_dir) { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Symbolic link points outside of root_dir", + )); + } + } Ok(buf) } diff --git a/devices/src/virtio/vhost/user/device/fs.rs b/devices/src/virtio/vhost/user/device/fs.rs index c0c6b3d176..09d5a2f775 100644 --- a/devices/src/virtio/vhost/user/device/fs.rs +++ b/devices/src/virtio/vhost/user/device/fs.rs @@ -47,7 +47,13 @@ struct FsBackend { } impl FsBackend { - pub fn new(tag: &str, cfg: Option) -> anyhow::Result { + #[allow(unused_variables)] + pub fn new( + tag: &str, + shared_dir: &str, + skip_pivot_root: bool, + cfg: Option, + ) -> anyhow::Result { if tag.len() > FS_MAX_TAG_LEN { bail!( "fs tag is too long: {} (max supported: {})", @@ -60,7 +66,12 @@ impl FsBackend { | 1 << VHOST_USER_F_PROTOCOL_FEATURES; // Use default passthroughfs config - let fs = PassthroughFs::new(tag, cfg.unwrap_or_default())?; + #[allow(unused_mut)] + let mut fs = PassthroughFs::new(tag, cfg.unwrap_or_default())?; + #[cfg(feature = "fs_runtime_ugid_map")] + if skip_pivot_root { + fs.set_root_dir(shared_dir.to_string())?; + } let mut keep_rds: Vec = [0, 1, 2].to_vec(); keep_rds.append(&mut fs.keep_rds()); @@ -221,4 +232,17 @@ pub struct Options { /// a new mount namespace and run without seccomp filter. /// Default: false. disable_sandbox: bool, + #[argh(option, arg_name = "skip_pivot_root", default = "false")] + /// disable pivot_root when process is jailed. + /// + /// virtio-fs typically uses mount namespaces and pivot_root for file system isolation, + /// making the jailed process's root directory "/". + /// + /// Android's security model restricts crosvm's access to certain system capabilities, + /// specifically those related to managing mount namespaces and using pivot_root. + /// These capabilities are typically associated with the SYS_ADMIN capability. + /// To maintain a secure environment, Android relies on mechanisms like SELinux to + /// enforce isolation and control access to directories. + #[allow(dead_code)] + skip_pivot_root: bool, } diff --git a/devices/src/virtio/vhost/user/device/fs/sys/linux.rs b/devices/src/virtio/vhost/user/device/fs/sys/linux.rs index a220153386..c9989b732c 100644 --- a/devices/src/virtio/vhost/user/device/fs/sys/linux.rs +++ b/devices/src/virtio/vhost/user/device/fs/sys/linux.rs @@ -13,6 +13,7 @@ use base::AsRawDescriptors; use base::RawDescriptor; use cros_async::Executor; use jail::create_base_minijail; +use jail::create_base_minijail_without_pivot_root; use jail::set_embedded_bpf_program; use minijail::Minijail; @@ -41,13 +42,18 @@ fn jail_and_fork( uid_map: Option, gid_map: Option, disable_sandbox: bool, + pivot_root: bool, ) -> anyhow::Result { let limit = max_open_files() .context("failed to get max open files")? .rlim_max; // Create new minijail sandbox let jail = if disable_sandbox { - create_base_minijail(dir_path.as_path(), limit)? + if pivot_root { + create_base_minijail(dir_path.as_path(), limit) + } else { + create_base_minijail_without_pivot_root(dir_path.as_path(), limit) + }? } else { let mut j: Minijail = Minijail::new()?; j.namespace_pids(); @@ -108,15 +114,29 @@ fn jail_and_fork( /// Starts a vhost-user fs device. /// Returns an error if the given `args` is invalid or the device fails to run. -pub fn start_device(opts: Options) -> anyhow::Result<()> { +#[allow(unused_mut)] +pub fn start_device(mut opts: Options) -> anyhow::Result<()> { + #[allow(unused_mut)] + let mut is_pivot_root_required = true; #[cfg(feature = "fs_runtime_ugid_map")] - if let Some(ref cfg) = opts.cfg { - if !cfg.ugid_map.is_empty() && !opts.disable_sandbox { - bail!("uid_gid_map can only be set with disable sandbox option"); + if let Some(ref mut cfg) = opts.cfg { + if !cfg.ugid_map.is_empty() && (!opts.disable_sandbox || !opts.skip_pivot_root) { + bail!("uid_gid_map can only be set with disable sandbox and skip_pivot_root option"); + } + + if opts.skip_pivot_root { + is_pivot_root_required = false; } } let ex = Executor::new().context("Failed to create executor")?; - let fs_device = FsBackend::new(&opts.tag, opts.cfg)?; + let fs_device = FsBackend::new( + &opts.tag, + opts.shared_dir + .to_str() + .expect("Failed to convert opts.shared_dir to str()"), + opts.skip_pivot_root, + opts.cfg, + )?; let mut keep_rds = fs_device.keep_rds.clone(); keep_rds.append(&mut ex.as_raw_descriptors()); @@ -128,7 +148,6 @@ pub fn start_device(opts: Options) -> anyhow::Result<()> { base::syslog::push_descriptors(&mut keep_rds); cros_tracing::push_descriptors!(&mut keep_rds); metrics::push_descriptors(&mut keep_rds); - let pid = jail_and_fork( keep_rds, opts.shared_dir, @@ -137,6 +156,7 @@ pub fn start_device(opts: Options) -> anyhow::Result<()> { opts.uid_map, opts.gid_map, opts.disable_sandbox, + is_pivot_root_required, )?; // Parent, nothing to do but wait and then exit diff --git a/e2e_tests/tests/fs.rs b/e2e_tests/tests/fs.rs index 2fc0c9a2d6..21423dd4d0 100644 --- a/e2e_tests/tests/fs.rs +++ b/e2e_tests/tests/fs.rs @@ -213,3 +213,112 @@ fn vhost_user_fs_mount_rw() { mount_rw(vm, tag, temp_dir); } + +fn copy_file_validate_ugid_mapping( + mut vm: TestVm, + tag: &str, + dir: TempDir, + mapped_uid: u32, + mapped_gid: u32, +) { + use std::os::linux::fs::MetadataExt; + const ORIGINAL_FILE_NAME: &str = "original.txt"; + const NEW_FILE_NAME: &str = "new.txt"; + const TEST_DATA: &str = "Hello world!"; + + let orig_file = dir.path().join(ORIGINAL_FILE_NAME); + + std::fs::write(orig_file, TEST_DATA).unwrap(); + + vm.exec_in_guest(&format!( + "mount -t virtiofs {tag} /mnt && cp /mnt/{} /mnt/{} && sync", + ORIGINAL_FILE_NAME, NEW_FILE_NAME, + )) + .unwrap(); + + let output = vm + .exec_in_guest(&format!("stat /mnt/{}", ORIGINAL_FILE_NAME,)) + .unwrap(); + + assert!(output.stdout.contains(&format!("Uid: ({}/", mapped_uid))); + assert!(output.stdout.contains(&format!("Gid: ({}/", mapped_gid))); + + let new_file = dir.path().join(NEW_FILE_NAME); + let output_stat = std::fs::metadata(new_file.clone()); + + assert_eq!( + output_stat + .as_ref() + .expect("stat of new_file failed") + .st_uid(), + base::geteuid() + ); + assert_eq!( + output_stat + .as_ref() + .expect("stat of new_file failed") + .st_gid(), + base::getegid() + ); + + let contents = std::fs::read(new_file).unwrap(); + assert_eq!(TEST_DATA.as_bytes(), &contents); +} + +pub fn create_ugid_map_config( + socket: &Path, + shared_dir: &Path, + tag: &str, + mapped_uid: u32, + mapped_gid: u32, +) -> VuConfig { + let socket_path = socket.to_str().unwrap(); + let shared_dir_path = shared_dir.to_str().unwrap(); + + let uid = base::geteuid(); + let gid = base::getegid(); + let ugid_map_value = format!("{} {} {} {} 7 /", mapped_uid, mapped_gid, uid, gid,); + + let cfg_arg = format!("writeback=true,ugid_map='{}'", ugid_map_value); + + println!("socket={socket_path}, tag={tag}, shared_dir={shared_dir_path}"); + + VuConfig::new(CmdType::Device, "vhost-user-fs").extra_args(vec![ + "fs".to_string(), + format!("--socket-path={socket_path}"), + format!("--shared-dir={shared_dir_path}"), + format!("--tag={tag}"), + format!("--cfg={cfg_arg}"), + format!("--disable-sandbox"), + format!("--skip-pivot-root=true"), + ]) +} + +/// Tests file copy with disabled sandbox +/// +/// 1. Create `original.txt` on a temporal directory. +/// 3: Setup ugid_map for vhost-user-fs backend +/// 2. Start a VM with a virtiofs device for the temporal directory. +/// 3. Copy `original.txt` to `new.txt` in the guest. +/// 4. Check that `new.txt` is created in the host. +/// 5: Verify the UID/GID of the files both in the guest and the host. +#[test] +fn vhost_user_fs_without_sandbox_and_pivot_root() { + let socket = NamedTempFile::new().unwrap(); + let temp_dir = tempfile::tempdir().unwrap(); + + let config = Config::new(); + let tag = "android"; + + let mapped_uid = 123456; + let mapped_gid = 12345; + let vu_config = + create_ugid_map_config(socket.path(), temp_dir.path(), tag, mapped_uid, mapped_gid); + + let _vu_device = VhostUserBackend::new(vu_config).unwrap(); + + let config = config.with_vhost_user_fs(socket.path(), tag); + let vm = TestVm::new(config).unwrap(); + + copy_file_validate_ugid_mapping(vm, tag, temp_dir, mapped_uid, mapped_gid); +} diff --git a/jail/src/helpers.rs b/jail/src/helpers.rs index 20ccfb7889..b3f9ddf117 100644 --- a/jail/src/helpers.rs +++ b/jail/src/helpers.rs @@ -120,13 +120,11 @@ pub fn create_base_minijail(root: &Path, max_open_files: u64) -> Result Result Result { + // Validate new root directory. Path::is_dir() also checks the existence. + if !root.is_dir() { + bail!("{:?} is not a directory, cannot create jail", root); + } + if !root.is_absolute() { + bail!("{:?} is not absolute path", root); + } + + let mut jail = Minijail::new().context("failed to jail device")?; + jail.set_rlimit(libc::RLIMIT_NOFILE as i32, max_open_files, max_open_files) + .context("error setting max open files")?; + + Ok(jail) +} + /// Creates a [Minijail] instance which creates a sandbox. /// /// # Arguments