mirror of
https://chromium.googlesource.com/crosvm/crosvm
synced 2024-11-28 17:44:10 +00:00
device: vhost_user: Enable seccomp filter vhost-user-fs
Vhost-user-fs currently lacks seccomp filter support, which cause security concerns to put into real usage. This change introduces virtio-fs device's seccomp policy filter to vhost-user-fs when sandbox is enabled. When specified path of socket does not exist for vhost-user device, the vhost-user device will call socketpair to create a socket. To support the syscall, the rule allowing socketpair is added to vhost_user.policy. Also, this CL adds disable-sandbox option for vhost-user-fs-device. The option is set to false by default, the vhost-user-fs will enter new mnt/user/pid/net namespace. If the this option is true, the vhost-user-fs device only create a new mount namespace. BUG=b:355159487 TEST=run manual tests TEST=run e2e test in chromium:5746575 Change-Id: I6c18386f690af7b0d2e1550c0b3881d444280a8b Reviewed-on: https://chromium-review.googlesource.com/c/crosvm/crosvm/+/5741356 Reviewed-by: Keiichi Watanabe <keiichiw@chromium.org> Commit-Queue: Yuan Yao <yuanyaogoog@chromium.org>
This commit is contained in:
parent
437d6612e0
commit
54e5b6b204
7 changed files with 98 additions and 47 deletions
|
@ -232,4 +232,11 @@ pub struct Options {
|
||||||
/// gid of the device process in the new user namespace created by minijail.
|
/// gid of the device process in the new user namespace created by minijail.
|
||||||
/// Default: 0.
|
/// Default: 0.
|
||||||
gid: u32,
|
gid: u32,
|
||||||
|
#[argh(switch)]
|
||||||
|
/// disable-sandbox controls whether vhost-user-fs device uses minijail sandbox.
|
||||||
|
/// By default, it is false, the vhost-user-fs will enter new mnt/user/pid/net
|
||||||
|
/// namespace. If the this option is true, the vhost-user-fs device only create
|
||||||
|
/// a new mount namespace and run without seccomp filter.
|
||||||
|
/// Default: false.
|
||||||
|
disable_sandbox: bool,
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,6 +10,8 @@ use anyhow::Context;
|
||||||
use base::linux::max_open_files;
|
use base::linux::max_open_files;
|
||||||
use base::RawDescriptor;
|
use base::RawDescriptor;
|
||||||
use cros_async::Executor;
|
use cros_async::Executor;
|
||||||
|
use jail::create_base_minijail;
|
||||||
|
use jail::set_embedded_bpf_program;
|
||||||
use minijail::Minijail;
|
use minijail::Minijail;
|
||||||
|
|
||||||
use crate::virtio::vhost::user::device::fs::FsBackend;
|
use crate::virtio::vhost::user::device::fs::FsBackend;
|
||||||
|
@ -37,39 +39,47 @@ fn jail_and_fork(
|
||||||
gid: u32,
|
gid: u32,
|
||||||
uid_map: Option<String>,
|
uid_map: Option<String>,
|
||||||
gid_map: Option<String>,
|
gid_map: Option<String>,
|
||||||
|
disable_sandbox: bool,
|
||||||
) -> anyhow::Result<i32> {
|
) -> anyhow::Result<i32> {
|
||||||
// Create new minijail sandbox
|
|
||||||
let mut j = Minijail::new()?;
|
|
||||||
|
|
||||||
j.namespace_pids();
|
|
||||||
j.namespace_user();
|
|
||||||
j.namespace_user_disable_setgroups();
|
|
||||||
if uid != 0 {
|
|
||||||
j.change_uid(uid);
|
|
||||||
}
|
|
||||||
if gid != 0 {
|
|
||||||
j.change_gid(gid);
|
|
||||||
}
|
|
||||||
j.uidmap(&uid_map.unwrap_or_else(default_uidmap))?;
|
|
||||||
j.gidmap(&gid_map.unwrap_or_else(default_gidmap))?;
|
|
||||||
j.run_as_init();
|
|
||||||
|
|
||||||
j.namespace_vfs();
|
|
||||||
j.namespace_net();
|
|
||||||
j.no_new_privs();
|
|
||||||
|
|
||||||
// Only pivot_root if we are not re-using the current root directory.
|
|
||||||
if dir_path != Path::new("/") {
|
|
||||||
// It's safe to call `namespace_vfs` multiple times.
|
|
||||||
j.namespace_vfs();
|
|
||||||
j.enter_pivot_root(&dir_path)?;
|
|
||||||
}
|
|
||||||
j.set_remount_mode(libc::MS_SLAVE);
|
|
||||||
|
|
||||||
let limit = max_open_files().context("failed to get max open files")?;
|
let limit = max_open_files().context("failed to get max open files")?;
|
||||||
j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)?;
|
// Create new minijail sandbox
|
||||||
// vvu locks around 512k memory. Just give 1M.
|
let jail = if disable_sandbox {
|
||||||
j.set_rlimit(libc::RLIMIT_MEMLOCK as i32, 1 << 20, 1 << 20)?;
|
create_base_minijail(dir_path.as_path(), limit)?
|
||||||
|
} else {
|
||||||
|
let mut j: Minijail = Minijail::new()?;
|
||||||
|
j.namespace_pids();
|
||||||
|
j.namespace_user();
|
||||||
|
j.namespace_user_disable_setgroups();
|
||||||
|
if uid != 0 {
|
||||||
|
j.change_uid(uid);
|
||||||
|
}
|
||||||
|
if gid != 0 {
|
||||||
|
j.change_gid(gid);
|
||||||
|
}
|
||||||
|
j.uidmap(&uid_map.unwrap_or_else(default_uidmap))?;
|
||||||
|
j.gidmap(&gid_map.unwrap_or_else(default_gidmap))?;
|
||||||
|
j.run_as_init();
|
||||||
|
|
||||||
|
j.namespace_vfs();
|
||||||
|
j.namespace_net();
|
||||||
|
j.no_new_privs();
|
||||||
|
|
||||||
|
// Only pivot_root if we are not re-using the current root directory.
|
||||||
|
if dir_path != Path::new("/") {
|
||||||
|
// It's safe to call `namespace_vfs` multiple times.
|
||||||
|
j.namespace_vfs();
|
||||||
|
j.enter_pivot_root(&dir_path)?;
|
||||||
|
}
|
||||||
|
j.set_remount_mode(libc::MS_SLAVE);
|
||||||
|
|
||||||
|
j.set_rlimit(libc::RLIMIT_NOFILE as i32, limit, limit)?;
|
||||||
|
// vvu locks around 512k memory. Just give 1M.
|
||||||
|
j.set_rlimit(libc::RLIMIT_MEMLOCK as i32, 1 << 20, 1 << 20)?;
|
||||||
|
#[cfg(not(feature = "seccomp_trace"))]
|
||||||
|
set_embedded_bpf_program(&mut j, "fs_device_vhost_user")?;
|
||||||
|
j.use_seccomp_filter();
|
||||||
|
j
|
||||||
|
};
|
||||||
|
|
||||||
// Make sure there are no duplicates in keep_rds
|
// Make sure there are no duplicates in keep_rds
|
||||||
keep_rds.sort_unstable();
|
keep_rds.sort_unstable();
|
||||||
|
@ -77,7 +87,7 @@ fn jail_and_fork(
|
||||||
|
|
||||||
// fork on the jail here
|
// fork on the jail here
|
||||||
// SAFETY: trivially safe
|
// SAFETY: trivially safe
|
||||||
let pid = unsafe { j.fork(Some(&keep_rds))? };
|
let pid = unsafe { jail.fork(Some(&keep_rds))? };
|
||||||
|
|
||||||
if pid > 0 {
|
if pid > 0 {
|
||||||
// Current FS driver jail does not use seccomp and jail_and_fork() does not have other
|
// Current FS driver jail does not use seccomp and jail_and_fork() does not have other
|
||||||
|
@ -113,6 +123,7 @@ pub fn start_device(opts: Options) -> anyhow::Result<()> {
|
||||||
opts.gid,
|
opts.gid,
|
||||||
opts.uid_map,
|
opts.uid_map,
|
||||||
opts.gid_map,
|
opts.gid_map,
|
||||||
|
opts.disable_sandbox,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// Parent, nothing to do but wait and then exit
|
// Parent, nothing to do but wait and then exit
|
||||||
|
|
7
jail/seccomp/aarch64/fs_device_vhost_user.policy
Normal file
7
jail/seccomp/aarch64/fs_device_vhost_user.policy
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# Copyright 2024 The ChromiumOS Authors
|
||||||
|
# Use of this source code is governed by a BSD-style license that can be
|
||||||
|
# found in the LICENSE file.
|
||||||
|
|
||||||
|
@include /usr/share/policy/crosvm/vhost_user.policy
|
||||||
|
|
||||||
|
@include /usr/share/policy/crosvm/fs_device.policy
|
14
jail/seccomp/aarch64/vhost_user.policy
Normal file
14
jail/seccomp/aarch64/vhost_user.policy
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
# Copyright 2024 The ChromiumOS Authors
|
||||||
|
# Use of this source code is governed by a BSD-style license that can be
|
||||||
|
# found in the LICENSE file.
|
||||||
|
|
||||||
|
# Policy file for the vhost-user transport over a socket.
|
||||||
|
|
||||||
|
# FIONBIO: for setting non-blocking mode over the socket.
|
||||||
|
# TCGETS/TCSETS: used on FD 0, probably for serial.
|
||||||
|
# b/239779171: try moving this to the serial device once we can extend ioctls across policy files.
|
||||||
|
ioctl: arg1 == FIONBIO || arg1 == TCGETS || arg1 == TCSETS
|
||||||
|
# For accepting a client connection over the socket.
|
||||||
|
accept4: 1
|
||||||
|
# For creating a socket if the specified socket path does not exits
|
||||||
|
socketpair: arg0 == AF_UNIX
|
7
jail/seccomp/x86_64/fs_device_vhost_user.policy
Normal file
7
jail/seccomp/x86_64/fs_device_vhost_user.policy
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
# Copyright 2024 The ChromiumOS Authors
|
||||||
|
# Use of this source code is governed by a BSD-style license that can be
|
||||||
|
# found in the LICENSE file.
|
||||||
|
|
||||||
|
@include /usr/share/policy/crosvm/vhost_user.policy
|
||||||
|
|
||||||
|
@include /usr/share/policy/crosvm/fs_device.policy
|
|
@ -10,3 +10,5 @@
|
||||||
ioctl: arg1 == FIONBIO || arg1 == TCGETS || arg1 == TCSETS
|
ioctl: arg1 == FIONBIO || arg1 == TCGETS || arg1 == TCSETS
|
||||||
# For accepting a client connection over the socket.
|
# For accepting a client connection over the socket.
|
||||||
accept4: 1
|
accept4: 1
|
||||||
|
# For creating a socket if the specified socket path does not exits
|
||||||
|
socketpair: arg0 == AF_UNIX
|
||||||
|
|
|
@ -28,7 +28,6 @@ use zerocopy::AsBytes;
|
||||||
|
|
||||||
use crate::config::JailConfig;
|
use crate::config::JailConfig;
|
||||||
|
|
||||||
#[cfg(not(feature = "seccomp_trace"))]
|
|
||||||
static EMBEDDED_BPFS: Lazy<std::collections::HashMap<&str, Vec<u8>>> =
|
static EMBEDDED_BPFS: Lazy<std::collections::HashMap<&str, Vec<u8>>> =
|
||||||
Lazy::new(|| include!(concat!(env!("OUT_DIR"), "/bpf_includes.in")));
|
Lazy::new(|| include!(concat!(env!("OUT_DIR"), "/bpf_includes.in")));
|
||||||
|
|
||||||
|
@ -288,20 +287,7 @@ pub fn create_sandbox_minijail(
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let bpf_program = EMBEDDED_BPFS
|
set_embedded_bpf_program(&mut jail, config.seccomp_policy_name)?;
|
||||||
.get(&config.seccomp_policy_name)
|
|
||||||
.with_context(|| {
|
|
||||||
format!(
|
|
||||||
"failed to find embedded seccomp policy: {}",
|
|
||||||
&config.seccomp_policy_name
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
jail.parse_seccomp_bytes(bpf_program).with_context(|| {
|
|
||||||
format!(
|
|
||||||
"failed to parse embedded seccomp policy: {}",
|
|
||||||
&config.seccomp_policy_name
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
jail.use_seccomp_filter();
|
jail.use_seccomp_filter();
|
||||||
|
@ -482,3 +468,20 @@ fn add_current_user_to_jail(jail: &mut Minijail) -> Result<()> {
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the seccomp policy for a jail from embedded bpfs
|
||||||
|
pub fn set_embedded_bpf_program(jail: &mut Minijail, seccomp_policy_name: &str) -> Result<()> {
|
||||||
|
let bpf_program = EMBEDDED_BPFS.get(seccomp_policy_name).with_context(|| {
|
||||||
|
format!(
|
||||||
|
"failed to find embedded seccomp policy: {}",
|
||||||
|
seccomp_policy_name
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
jail.parse_seccomp_bytes(bpf_program).with_context(|| {
|
||||||
|
format!(
|
||||||
|
"failed to parse embedded seccomp policy: {}",
|
||||||
|
seccomp_policy_name
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue