Open-source syscall scraper

Reviewed By: VladimirMakaev

Differential Revision: D44388612

fbshipit-source-id: baf5eb0a350e0c902874726689a0aacf6d585013
This commit is contained in:
Jason White 2023-03-25 16:12:24 -07:00 committed by Facebook GitHub Bot
parent e5e7a3a70e
commit c5786cf741
3 changed files with 593 additions and 0 deletions

View file

@ -0,0 +1,272 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
mod syscall_info;
use std::collections::HashMap;
use std::fs;
use std::io;
use std::io::Write;
use std::mem;
use std::path::Path;
use std::path::PathBuf;
use std::process::Command;
use std::process::Stdio;
use std::slice;
use clap::Parser;
use goblin::elf::section_header::SectionHeader;
use goblin::elf::sym::Sym;
use goblin::elf::Elf;
use scroll::Pread;
use self::syscall_info::*;
#[derive(Parser)]
struct Opts {
/// Path to the vmlinux ELF image. By default, `/boot/vmlinux-$(uname -r)`
/// is used. This must contain debug information so that we can search
/// through the symbol table for syscall metadata symbols.
#[clap()]
vmlinux: Option<PathBuf>,
/// Outputs the syscall list as Rust source code.
#[clap(long)]
rust: bool,
}
/// Metadata associated with a syscall. This is defined in
/// [`include/trace/syscall.h`][syscall_metadata].
///
/// [syscall_metadata]: https://elixir.bootlin.com/linux/v5.5.3/source/include/trace/syscall.h
#[repr(C)]
#[derive(Debug, Pread)]
struct SyscallMetadata {
/// Address of the syscall name.
name: u64,
/// Number of the syscall. This is always set to -1. The Kernel sets this to
/// the real syscall number upon boot. Since we are reading the ELF file, we
/// need to look through `sys_call_table` to find out the real syscall
/// number.
syscall_nr: libc::c_int,
/// Number of parameters it takes.
nb_args: libc::c_int,
/// List of types as strings.
types: u64,
/// List of parameters as strings (args[i] matches types[i]).
args: u64,
// Don't care about these fields.
//struct list_head enter_fields;
//struct trace_event_call *enter_event;
//struct trace_event_call *exit_event;
}
fn find_sym(elf: &Elf, search: &str) -> Option<Sym> {
for sym in elf.syms.iter() {
if let Some(name) = elf.strtab.get_at(sym.st_name) {
if name == search {
return Some(sym);
}
}
}
None
}
fn find_section<'a>(elf: &'a Elf, search: &'a str) -> Option<&'a SectionHeader> {
for sh in &elf.section_headers {
if let Some(name) = elf.shdr_strtab.get_at(sh.sh_name) {
if name == search {
return Some(sh);
}
}
}
None
}
fn get_array<'a, T>(elf: &'a Elf, buf: &'a [u8], sym: Sym) -> &'a [T] {
let count = sym.st_size as usize / mem::size_of::<T>();
let sh = &elf.section_headers[sym.st_shndx];
let offset = sym.st_value - sh.sh_addr + sh.sh_offset;
unsafe { slice::from_raw_parts(buf.as_ptr().add(offset as usize).cast::<T>(), count) }
}
fn sym_offset(elf: &Elf, sym: Sym) -> usize {
let sh = &elf.section_headers[sym.st_shndx];
let offset = sym.st_value - sh.sh_addr + sh.sh_offset;
offset as usize
}
fn get_syscall_table<'a>(elf: &'a Elf, buf: &'a [u8]) -> Option<&'a [libc::c_ulong]> {
let sym = find_sym(elf, "sys_call_table")?;
Some(get_array(elf, buf, sym))
}
fn syscall_list(path: &Path) -> Result<Vec<SyscallInfo>, Box<dyn std::error::Error>> {
let buf = fs::read(path)?;
let elf = Elf::parse(&buf)?;
// This is a table of all the addresses of architecture-specific syscall
// symbols. We use this to create a mapping of syscall IDs to symbol names.
let table = match get_syscall_table(&elf, &buf) {
Some(table) => table,
None => return Err("Failed to find `sys_call_table`".into()),
};
// The syscall symbols live in this section.
let text_section = match find_section(&elf, ".text") {
Some(sec) => sec,
None => return Err("Failed to find .text section".into()),
};
// The syscall symbols live in this section.
let data_section = match find_section(&elf, ".data") {
Some(sec) => sec,
None => return Err("Failed to find .data section".into()),
};
// Create a mapping of syscall symbol names to syscall numbers.
let mapping: HashMap<_, _> = elf
.syms
.iter()
.filter_map(|sym| {
table
.iter()
.position(|&addr| addr == sym.st_value)
.and_then(|id| {
// Look up the name, stripping off the `__x64_` prefix.
// TODO: Don't assume x64 architecture. Derive the prefix somehow.
let name = elf.strtab.get_at(sym.st_name)?.strip_prefix("__x64_")?;
Some((name, id))
})
})
.collect();
let mut list = Vec::new();
for sym in elf.syms.iter() {
if let Some(sym_name) = elf.strtab.get_at(sym.st_name) {
if sym_name.starts_with("__syscall_meta") {
let syscall: SyscallMetadata = buf.pread(sym_offset(&elf, sym))?;
// Look up the name in the .text section.
let name_offset = syscall.name - text_section.sh_addr + text_section.sh_offset;
let name: &str = buf.pread(name_offset as usize)?;
if name == "sys_ni_syscall" {
// This is a placeholder for syscalls that are not implemented.
continue;
}
if let Some(&nr) = mapping.get(name) {
let mut types = Vec::new();
// Chase pointers and gather arg types
if syscall.types != 0 {
let mut types_offset = (syscall.types - data_section.sh_addr
+ data_section.sh_offset)
as usize;
for _ in 0..syscall.nb_args {
let addr: u64 = buf.gread(&mut types_offset)?;
let offset = addr - text_section.sh_addr + text_section.sh_offset;
let name: &str = buf.pread(offset as usize)?;
types.push(name);
}
}
let mut args = Vec::new();
// Chase pointers and gather arg names
if syscall.args != 0 {
let mut args_offset =
(syscall.args - data_section.sh_addr + data_section.sh_offset) as usize;
for _ in 0..syscall.nb_args {
let addr: u64 = buf.gread(&mut args_offset)?;
let offset = addr - text_section.sh_addr + text_section.sh_offset;
let name: &str = buf.pread(offset as usize)?;
args.push(name);
}
}
let params = types
.into_iter()
.zip(args)
.map(|(t, a)| (t.to_string(), a.to_string()))
.collect();
list.push(SyscallInfo {
num: nr,
name: name.into(),
params,
});
}
}
}
}
Ok(list)
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = Opts::from_args();
let vmlinux = match args.vmlinux {
Some(vmlinux) => vmlinux,
None => {
// Use /boot/vmlinux-$(uname -r) by default.
PathBuf::from(format!(
"/boot/vmlinux-{}",
nix::sys::utsname::uname()
.expect("Failed getting uname.")
.release()
.to_str()
.ok_or("OsStr release is not a valid Unicode.")?
))
}
};
let mut list = syscall_list(&vmlinux)?;
list.sort_by_key(|syscall| syscall.num);
if args.rust {
generate_rust(&list)?;
} else {
for syscall in &list {
println!("{}", syscall);
}
}
Ok(())
}
fn generate_rust(syscalls: &[SyscallInfo]) -> io::Result<()> {
let mut child = Command::new("rustfmt").stdin(Stdio::piped()).spawn()?;
let mut f = child.stdin.take().unwrap();
writeln!(
&mut f,
"use syscalls::{{Errno, Sysno, syscall0, syscall1, syscall2, syscall3, syscall4, syscall5, syscall6}};\n"
)?;
for syscall in syscalls {
writeln!(&mut f, "{}", syscall.display_as_rust())?;
}
Ok(())
}

View file

@ -0,0 +1,301 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
use core::fmt;
/// Contains information about a syscall, including its parameters.
pub struct SyscallInfo {
pub num: usize,
pub name: String,
pub params: Vec<(String, String)>,
}
impl fmt::Display for SyscallInfo {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{:>3} => {}({})",
self.num,
self.name,
self.params
.iter()
.map(|(t, a)| format!("{} {}", t, a))
.collect::<Vec<_>>()
.join(", ")
)
}
}
impl SyscallInfo {
pub fn display_as_rust(&self) -> RustSyscall {
RustSyscall(self)
}
}
pub struct RustSyscall<'a>(&'a SyscallInfo);
impl<'a> fmt::Display for RustSyscall<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let params = self
.0
.params
.iter()
.map(|(t, a)| RustParam::new(t, a))
.collect::<Vec<_>>();
let name = translate_syscall(&self.0.name);
writeln!(
f,
"/// See [{name}(2)](http://man7.org/linux/man-pages/man2/{name}.2.html)\n\
/// for more info on this syscall.",
name = name
)?;
writeln!(f, "#[inline(always)]")?;
writeln!(
f,
"pub unsafe fn sys_{}({}) -> Result<i64, Errno> {{",
name,
params
.iter()
.map(|p| format!("{}", p))
.collect::<Vec<_>>()
.join(", ")
)?;
let idents = params
.iter()
.map(|p| format!("{} as u64", p.ident))
.collect::<Vec<_>>()
.join(", ");
if params.is_empty() {
writeln!(f, " syscall0(Sysno::{})", name)?;
} else {
writeln!(
f,
" syscall{}(Sysno::{}, {})",
params.len(),
name,
idents
)?;
}
writeln!(f, "}}")
}
}
/// Format a parameter as a Rust parameter.
struct RustParam<'a> {
/// The type of the parameter.
ty: &'a str,
/// The identifier of the parameter.
ident: &'a str,
}
impl<'a> RustParam<'a> {
pub fn new(ty: &'a str, ident: &'a str) -> Self {
let ident = translate_ident(ident);
Self { ty, ident }
}
}
impl<'a> fmt::Display for RustParam<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}: {}", self.ident, to_rust_type(self.ident, self.ty))
}
}
fn translate_syscall(name: &str) -> &str {
let name = name.strip_prefix("sys_").unwrap_or(name);
match name {
"newstat" => "stat",
"newfstat" => "fstat",
"newlstat" => "lstat",
"sendfile64" => "sendfile",
"sysctl" => "_sysctl",
"umount" => "umount2",
"newuname" => "uname",
_ => name,
}
}
fn translate_ident(ident: &str) -> &str {
match ident {
"type" => "r#type",
"usize" => "size",
_ => ident,
}
}
/// Converts this type to a Rust type if possible.
fn to_rust_type(ident: &str, ty: &str) -> &'static str {
match ty {
"char *" => match ident {
"buf" => "*mut u8",
_ => "*mut libc::c_char",
},
"const char *" => match ident {
"buf" => "*const u8",
_ => "*const libc::c_char",
},
"unsigned char *" => "*mut u8",
"const unsigned char *" => "*const u8",
"int" => "i32",
"int *" => "*mut i32",
"const int *" => "*const i32",
"u32" => "u32",
"u32 *" => "*mut u32",
"__u64" => "u64",
"__s32" => "i32",
"long" => "i64",
"unsigned" => "u32",
"unsigned *" => "*mut u32",
"unsigned int" => "u32",
"unsigned int *" => "*mut u32",
"size_t" => "usize",
"size_t *" => "*mut usize",
"unsigned long" => "u64",
"unsigned long *" => "*mut u64",
"const unsigned long *" => "*const u64",
"umode_t" => "libc::mode_t",
"struct stat *" => "*mut libc::stat",
"struct pollfd *" => "*mut libc::pollfd",
"off_t" => "libc::off_t",
"const struct sigaction *" => "*const libc::sigaction",
"struct sigaction *" => "*mut libc::sigaction",
"sigset_t *" => "*mut libc::sigset_t",
"const sigset_t *" => "*const libc::sigset_t",
"siginfo_t *" => "*mut libc::siginfo_t",
"struct siginfo *" => "*mut libc::siginfo_t",
"loff_t" => "libc::loff_t",
"loff_t *" => "*mut libc::loff_t",
"const struct iovec *" => "*const libc::iovec",
"fd_set *" => "*mut libc::fd_set",
"struct __kernel_old_timeval *" => "*mut libc::timeval",
"key_t" => "libc::key_t",
"struct shmid_ds *" => "*mut libc::shmid_ds",
"struct __kernel_timespec *" => "*mut libc::timespec",
"const struct __kernel_timespec *" => "*const libc::timespec",
"struct __kernel_old_itimerval *" => "*mut libc::itimerval",
"struct sockaddr *" => "*mut libc::sockaddr",
"void *" => "*mut libc::c_void",
"const void *" => "*const libc::c_void",
"const void * *" => "*mut *const libc::c_void",
"struct user_msghdr *" => "*mut libc::msghdr",
"const char *const *" => "*const *const libc::c_char",
"pid_t" => "libc::pid_t",
"struct rusage *" => "*mut libc::rusage",
"struct new_utsname *" => "*mut libc::utsname",
"struct sembuf *" => "*mut libc::sembuf",
"struct msgbuf *" => "*mut libc::c_void",
"struct msqid_ds *" => "*mut libc::msqid_ds",
"struct linux_dirent *" => "*mut libc::dirent",
"struct linux_dirent64 *" => "*mut libc::dirent64",
"uid_t" => "libc::uid_t",
"uid_t *" => "*mut libc::uid_t",
"gid_t" => "libc::gid_t",
"gid_t *" => "*mut libc::gid_t",
"struct timezone *" => "*mut libc::timezone",
"struct rlimit *" => "*mut libc::rlimit",
"struct rlimit64 *" => "*mut libc::rlimit64",
"const struct rlimit64 *" => "*const libc::rlimit64",
"struct sysinfo *" => "*mut libc::sysinfo",
"struct tms *" => "*mut libc::tms",
// FIXME: See https://man7.org/linux/man-pages/man2/capget.2.html for
// the definition of cap_user_header_t and cap_user_data_t.
"cap_user_header_t" => "*mut libc::c_void",
"cap_user_data_t" => "*mut libc::c_void",
"const cap_user_data_t" => "*const libc::c_void",
"stack_t *" => "*mut libc::stack_t",
"const stack_t *" => "*const libc::stack_t",
"struct utimbuf *" => "*mut libc::utimbuf",
// FIXME: This should be using libc::ustat, but that doesn't exist yet.
"struct ustat *" => "*mut libc::c_void",
"struct statfs *" => "*mut libc::statfs",
"struct sched_param *" => "*mut libc::sched_param",
// FIXME: No equivalent exists. See
// https://man7.org/linux/man-pages/man2/sysctl.2.html for definition.
"struct __sysctl_args *" => "*mut libc::c_void",
"struct __kernel_timex *" => "*mut libc::timex",
"qid_t" => "i32",
"__kernel_old_time_t *" => "*mut libc::time_t",
// aio_context_t is defined as a simple `unsigned long`.
"aio_context_t *" => "*mut u64",
"aio_context_t" => "u64",
// FIXME: io_event is defined at
// https://elixir.bootlin.com/linux/v5.16.11/source/include/uapi/linux/aio_abi.h#L60.
"struct io_event *" => "*mut libc::c_void",
// FIXME: See https://man7.org/linux/man-pages/man2/io_submit.2.html for
// definition of iocb.
"struct iocb * *" => "*mut *mut libc::c_void",
"struct iocb *" => "*mut libc::c_void",
"const clockid_t" => "libc::clockid_t",
"struct sigevent *" => "*mut libc::sigevent",
"const struct sigevent *" => "*mut libc::sigevent",
"timer_t *" => "*mut i32",
"timer_t" => "i32",
"const struct __kernel_itimerspec *" => "*const libc::itimerspec",
"struct __kernel_itimerspec *" => "*mut libc::itimerspec",
"struct epoll_event *" => "*mut libc::epoll_event",
"struct mq_attr *" => "*mut libc::mq_attr",
"const struct mq_attr *" => "*const libc::mq_attr",
"mqd_t" => "libc::mqd_t",
// FIXME: See https://man7.org/linux/man-pages/man2/kexec_load.2.html
// for definition of kexec_segment.
"struct kexec_segment *" => "*mut libc::c_void",
"key_serial_t" => "i32",
// FIXME: robust_list_head is defined at
// https://elixir.bootlin.com/linux/v5.16.11/source/include/uapi/linux/futex.h#L97
"struct robust_list_head *" => "*mut libc::c_void",
"struct robust_list_head * *" => "*mut *mut libc::c_void",
// FIXME: perf_event_attr is a big struct and no definition in libc
// exists. For real definiton, see
// https://man7.org/linux/man-pages/man2/perf_event_open.2.html.
"struct perf_event_attr *" => "*mut libc::c_void",
"struct mmsghdr *" => "*mut libc::mmsghdr",
// FIXME: See
// https://man7.org/linux/man-pages/man2/name_to_handle_at.2.html for
// definition of file_handle.
"struct file_handle *" => "*mut libc::c_void",
// NOTE: getcpu_cache is an opaque type and should never be accessed by
// user code.
"struct getcpu_cache *" => "*mut libc::c_void",
// FIXME: For definition of sched_attr, see:
// https://elixir.bootlin.com/linux/v5.16.11/source/include/uapi/linux/sched/types.h#L102
"struct sched_attr *" => "*mut libc::c_void",
// FIXME: For definition of bpf_attr, see:
// https://man7.org/linux/man-pages/man2/bpf.2.html
"union bpf_attr *" => "*mut libc::c_void",
"rwf_t" => "i32",
"struct statx *" => "*mut libc::statx",
// FIXME: For definition of __aio_sigset, see:
// https://elixir.bootlin.com/linux/v5.16.11/source/fs/aio.c#L2216
"const struct __aio_sigset *" => "*mut libc::c_void",
// FIXME: For definition of rseq, see:
// https://elixir.bootlin.com/linux/v5.16.11/source/include/uapi/linux/rseq.h#L62
"struct rseq *" => "*mut libc::c_void",
// FIXME: For definitino of io_uring_params, see:
// https://elixir.bootlin.com/linux/v5.16.11/source/include/uapi/linux/io_uring.h#L265
"struct io_uring_params *" => "*mut libc::c_void",
// FIXME: This is used by the clone3 syscall and libc doesn't have this
// yet. For the definition of clone_args, see:
// https://elixir.bootlin.com/linux/v5.16.11/source/include/uapi/linux/sched.h#L92
"struct clone_args *" => "*mut libc::c_void",
// FIXME: This is used by the openat2 syscall and libc doesn't have this
// yet. For the definition of open_how, see:
// https://elixir.bootlin.com/linux/v5.16.11/source/include/uapi/linux/openat2.h#L19
"struct open_how *" => "*mut libc::c_void",
_ => panic!(
"Don't know how to convert this syscall parameter to Rust: {} {}",
ident, ty
),
}
}

View file

@ -0,0 +1,20 @@
# @generated by autocargo
[package]
name = "scrape-syscalls"
version = "0.1.0"
authors = ["Meta Platforms"]
edition = "2021"
license = "BSD-2-Clause"
[[bin]]
name = "scrape_syscalls"
path = "scrape-syscalls/main.rs"
edition = "2018"
[dependencies]
clap = { version = "3.2.23", features = ["derive", "env", "regex", "unicode", "wrap_help"] }
goblin = "0.5.2"
libc = "0.2.139"
nix = "0.25"
scroll = { version = "0.10", features = ["derive"] }