From 7f2e3bf2871394e864ef2cd7dc3215ccecca263c Mon Sep 17 00:00:00 2001 From: Changyuan Lyu Date: Mon, 5 Feb 2024 13:48:57 -0800 Subject: [PATCH] Initial release Signed-off-by: Changyuan Lyu --- .gitignore | 10 + Cargo.lock | 746 ++++++++++++++++++++++++++ Cargo.toml | 12 + LICENSE | 202 +++++++ README.md | 34 ++ alioth-cli/Cargo.toml | 15 + alioth-cli/src/main.rs | 140 +++++ alioth/Cargo.toml | 14 + alioth/src/acpi.rs | 244 +++++++++ alioth/src/acpi/bindings.rs | 223 ++++++++ alioth/src/action.rs | 18 + alioth/src/arch.rs | 18 + alioth/src/arch/x86_64.rs | 18 + alioth/src/arch/x86_64/layout.rs | 53 ++ alioth/src/arch/x86_64/msr.rs | 40 ++ alioth/src/arch/x86_64/paging.rs | 39 ++ alioth/src/arch/x86_64/reg.rs | 170 ++++++ alioth/src/device.rs | 15 + alioth/src/device/serial.rs | 490 +++++++++++++++++ alioth/src/hv.rs | 184 +++++++ alioth/src/hv/arch.rs | 18 + alioth/src/hv/arch/x86_64.rs | 100 ++++ alioth/src/hv/kvm.rs | 133 +++++ alioth/src/hv/kvm/bindings.rs | 239 +++++++++ alioth/src/hv/kvm/ioctls.rs | 56 ++ alioth/src/hv/kvm/vcpu.rs | 481 +++++++++++++++++ alioth/src/hv/kvm/vcpu/x86_64.rs | 236 ++++++++ alioth/src/hv/kvm/vm.rs | 234 ++++++++ alioth/src/hv/kvm/vmentry.rs | 51 ++ alioth/src/hv/kvm/vmexit.rs | 63 +++ alioth/src/hv/test.rs | 41 ++ alioth/src/lib.rs | 27 + alioth/src/loader.rs | 104 ++++ alioth/src/loader/linux.rs | 21 + alioth/src/loader/linux/bootparams.rs | 145 +++++ alioth/src/loader/linux/x86_64.rs | 240 +++++++++ alioth/src/mem.rs | 347 ++++++++++++ alioth/src/mem/addressable.rs | 277 ++++++++++ alioth/src/mem/io.rs | 56 ++ alioth/src/mem/mmio.rs | 174 ++++++ alioth/src/mem/ram.rs | 670 +++++++++++++++++++++++ alioth/src/utils.rs | 78 +++ alioth/src/utils/ioctls.rs | 184 +++++++ alioth/src/vm.rs | 270 ++++++++++ alioth/src/vm/x86_64.rs | 113 ++++ docs/contributing.md | 33 ++ rust-toolchain.toml | 2 + rustfmt.toml | 2 + 48 files changed, 7080 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 README.md create mode 100644 alioth-cli/Cargo.toml create mode 100644 alioth-cli/src/main.rs create mode 100644 alioth/Cargo.toml create mode 100644 alioth/src/acpi.rs create mode 100644 alioth/src/acpi/bindings.rs create mode 100644 alioth/src/action.rs create mode 100644 alioth/src/arch.rs create mode 100644 alioth/src/arch/x86_64.rs create mode 100644 alioth/src/arch/x86_64/layout.rs create mode 100644 alioth/src/arch/x86_64/msr.rs create mode 100644 alioth/src/arch/x86_64/paging.rs create mode 100644 alioth/src/arch/x86_64/reg.rs create mode 100644 alioth/src/device.rs create mode 100644 alioth/src/device/serial.rs create mode 100644 alioth/src/hv.rs create mode 100644 alioth/src/hv/arch.rs create mode 100644 alioth/src/hv/arch/x86_64.rs create mode 100644 alioth/src/hv/kvm.rs create mode 100644 alioth/src/hv/kvm/bindings.rs create mode 100644 alioth/src/hv/kvm/ioctls.rs create mode 100644 alioth/src/hv/kvm/vcpu.rs create mode 100644 alioth/src/hv/kvm/vcpu/x86_64.rs create mode 100644 alioth/src/hv/kvm/vm.rs create mode 100644 alioth/src/hv/kvm/vmentry.rs create mode 100644 alioth/src/hv/kvm/vmexit.rs create mode 100644 alioth/src/hv/test.rs create mode 100644 alioth/src/lib.rs create mode 100644 alioth/src/loader.rs create mode 100644 alioth/src/loader/linux.rs create mode 100644 alioth/src/loader/linux/bootparams.rs create mode 100644 alioth/src/loader/linux/x86_64.rs create mode 100644 alioth/src/mem.rs create mode 100644 alioth/src/mem/addressable.rs create mode 100644 alioth/src/mem/io.rs create mode 100644 alioth/src/mem/mmio.rs create mode 100644 alioth/src/mem/ram.rs create mode 100644 alioth/src/utils.rs create mode 100644 alioth/src/utils/ioctls.rs create mode 100644 alioth/src/vm.rs create mode 100644 alioth/src/vm/x86_64.rs create mode 100644 docs/contributing.md create mode 100644 rust-toolchain.toml create mode 100644 rustfmt.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..71ab9a4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..be24d76 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,746 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "alioth" +version = "0.1.0" +dependencies = [ + "bitfield", + "bitflags", + "libc", + "log", + "mio", + "rand", + "thiserror", + "zerocopy", +] + +[[package]] +name = "alioth-cli" +version = "0.1.0" +dependencies = [ + "alioth", + "anyhow", + "clap", + "flexi_logger", + "log", +] + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "anstream" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" + +[[package]] +name = "anstyle-parse" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648" +dependencies = [ + "windows-sys 0.52.0", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7" +dependencies = [ + "anstyle", + "windows-sys 0.52.0", +] + +[[package]] +name = "anyhow" +version = "1.0.76" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59d2a3357dde987206219e78ecfbbb6e8dad06cbb65292758d3270e6254f7355" + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitfield" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d7e60934ceec538daadb9d8432424ed043a904d8e0243f3c6446bce549a46ac" + +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + +[[package]] +name = "bumpalo" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets 0.48.5", +] + +[[package]] +name = "clap" +version = "4.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfaff671f6b22ca62406885ece523383b9b64022e341e53e009a62ebc47a45f2" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a216b506622bb1d316cd51328dce24e07bdff4a6128a47c7e7fad11878d5adbb" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "flexi_logger" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47a8a297f2d1285b13abf253b27f2f4480eb6703a0424b5942f9dc872831045c" +dependencies = [ + "chrono", + "glob", + "is-terminal", + "lazy_static", + "log", + "nu-ansi-term", + "regex", + "thiserror", +] + +[[package]] +name = "getrandom" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" + +[[package]] +name = "iana-time-zone" +version = "0.1.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8326b86b6cff230b97d0d312a6c40a60726df3332e721f72a1b035f451663b20" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "is-terminal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" +dependencies = [ + "hermit-abi", + "rustix", + "windows-sys 0.48.0", +] + +[[package]] +name = "js-sys" +version = "0.3.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.151" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" + +[[package]] +name = "linux-raw-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "memchr" +version = "2.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" + +[[package]] +name = "mio" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" +dependencies = [ + "libc", + "log", + "wasi", + "windows-sys 0.48.0", +] + +[[package]] +name = "nu-ansi-term" +version = "0.49.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c073d3c1930d0751774acf49e66653acecb416c3a54c6ec095a9b11caddb5a68" +dependencies = [ + "windows-sys 0.48.0", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75cb1540fadbd5b8fbccc4dddad2734eba435053f725621c070711a14bb5f4b8" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "regex" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustix" +version = "0.38.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "2.0.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b7d0a2c048d661a1a59fcd7355baa232f7ed34e0ee4df2eef3c1c1c0d3852d8" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f11c217e1416d6f036b870f14e0413d480dbf28edbee1f877abaf0206af43bb7" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.51" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01742297787513b79cf8e29d1056ede1313e2420b7b3b15d0a768b4921f549df" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" + +[[package]] +name = "windows-core" +version = "0.51.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1f8cf84f35d2db49a46868f947758c7a1138116f7fac3bc844f43ade1292e64" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8434163 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,12 @@ +[workspace] +members = [ + "alioth", + "alioth-cli", +] +resolver = "2" + +[profile.release] +lto = true +codegen-units = 1 +opt-level = 3 +strip = true diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d645695 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..34e155d --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +# Alioth + +Alioth is a toy virtual machine monitor based on KVM. Complementary to the +official tutorial [Using the KVM API](https://lwn.net/Articles/658511/), it demonstrates +detailed steps for building a type-2 hypervisor and booting a Linux guest kernel. + +## Get started + +* Build Alioth from source, + + ```sh + cargo +nightly build --release --target x86_64-unknown-linux-gnu + ``` + +* Make an initramfs with [u-root](https://github.com/u-root/u-root?tab=readme-ov-file#examples), + +* Boot a Linux kernel with 2 CPUs and 4 GiB memory: + + ```sh + cargo +nightly run --release --target x86_64-unknown-linux-gnu -- \ + -l info \ + --log-to-file \ + run \ + --kernel /path/to/vmlinuz \ + --cmd-line "console=ttyS0" \ + --initramfs /path/to/initramfs \ + --mem-size 4G \ + --num-cpu=2 + ``` + + +## Disclaimer + +Disclaimer: Alioth is not an officially supported Google product. \ No newline at end of file diff --git a/alioth-cli/Cargo.toml b/alioth-cli/Cargo.toml new file mode 100644 index 0000000..5bd6ae8 --- /dev/null +++ b/alioth-cli/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "alioth-cli" +version = "0.1.0" +edition = "2021" + +[dependencies] +log = "0.4" +flexi_logger = "0.26" +clap = { version = "4", features = ["derive"] } +anyhow = "1" +alioth = { path = "../alioth" } + +[[bin]] +path = "src/main.rs" +name = "alioth" diff --git a/alioth-cli/src/main.rs b/alioth-cli/src/main.rs new file mode 100644 index 0000000..d271842 --- /dev/null +++ b/alioth-cli/src/main.rs @@ -0,0 +1,140 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::path::PathBuf; + +use alioth::hv::Kvm; +use alioth::vm::{BoardConfig, ExecType, Machine, Payload}; +use anyhow::Result; +use clap::{Args, Parser, Subcommand}; +use flexi_logger::{FileSpec, Logger}; + +#[derive(Parser, Debug)] +#[command(author, version, about)] +struct Cli { + #[arg(short, long)] + /// Loglevel specification, see + /// https://docs.rs/flexi_logger/0.25.5/flexi_logger/struct.LogSpecification.html. + /// If not set, environment variable $RUST_LOG is used. + pub log_spec: Option, + + #[arg(long)] + pub log_to_file: bool, + + #[arg(long)] + pub log_dir: Option, + + #[command(subcommand)] + pub cmd: Option, +} + +#[derive(Subcommand, Debug)] +enum Command { + Run(RunArgs), +} + +#[derive(Args, Debug, Clone)] +struct RunArgs { + #[arg(short, long)] + kernel: Option, + + #[arg(short, long)] + cmd_line: Option, + + #[arg(short, long)] + initramfs: Option, + + #[arg(long, default_value_t = 1)] + num_cpu: u32, + + #[arg(long, default_value = "1G")] + mem_size: String, +} + +fn parse_mem(s: &str) -> Result { + if let Some((num, "")) = s.split_once(['g', 'G']) { + let n = num.parse::()?; + Ok(n << 30) + } else if let Some((num, "")) = s.split_once(['m', 'M']) { + let n = num.parse::()?; + Ok(n << 20) + } else if let Some((num, "")) = s.split_once(['k', 'K']) { + let n = num.parse::()?; + Ok(n << 10) + } else { + let n = s.parse::()?; + Ok(n) + } +} + +fn main_run(args: RunArgs) -> Result<()> { + let hypervisor = Kvm::new()?; + let payload = if let Some(kernel) = args.kernel { + Some(Payload { + exec_type: ExecType::Linux, + executable: kernel, + initramfs: args.initramfs, + cmd_line: args.cmd_line, + }) + } else { + None + }; + let board_config = BoardConfig { + mem_size: parse_mem(&args.mem_size)?, + num_cpu: args.num_cpu, + }; + let mut vm = Machine::new(hypervisor, board_config)?; + #[cfg(target_arch = "x86_64")] + vm.add_com1()?; + if let Some(payload) = payload { + vm.add_payload(payload); + } + vm.boot()?; + for result in vm.wait() { + result?; + } + Ok(()) +} + +fn main() -> Result<(), Box> { + let cli = Cli::parse(); + let logger = if let Some(ref spec) = cli.log_spec { + Logger::try_with_str(spec) + } else { + Logger::try_with_env_or_str("warn") + }?; + let logger = if cli.log_to_file { + logger.log_to_file( + FileSpec::default() + .suppress_timestamp() + .o_directory(cli.log_dir), + ) + } else { + logger + }; + let _handle = logger.start()?; + log::debug!( + "{} {} started...", + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + ); + let Some(cmd) = cli.cmd else { + return Ok(()); + }; + + match cmd { + Command::Run(args) => main_run(args)?, + } + Ok(()) +} diff --git a/alioth/Cargo.toml b/alioth/Cargo.toml new file mode 100644 index 0000000..b9b3a0d --- /dev/null +++ b/alioth/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "alioth" +version = "0.1.0" +edition = "2021" + +[dependencies] +thiserror = "1" +zerocopy = { version = "0.7.32", features = ["derive", "alloc"] } +bitflags = "2.4.0" +bitfield = "0.14.0" +log = "0.4" +mio = { version = "0.8.8", features = ["os-poll", "os-ext", "net"] } +rand = "0.8.5" +libc = "0.2.150" diff --git a/alioth/src/acpi.rs b/alioth/src/acpi.rs new file mode 100644 index 0000000..ab53d42 --- /dev/null +++ b/alioth/src/acpi.rs @@ -0,0 +1,244 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod bindings; + +use std::mem::{size_of, size_of_val}; + +use zerocopy::AsBytes; + +#[cfg(target_arch = "x86_64")] +use crate::arch::layout::{APIC_START, IOAPIC_START}; +use crate::{align_up, unsafe_impl_zerocopy}; + +use bindings::{ + AcpiGenericAddress, AcpiMadtIoApic, AcpiMadtLocalX2apic, AcpiSubtableHeader, AcpiTableFadt, + AcpiTableHeader, AcpiTableMadt, AcpiTableMcfg, AcpiTableRsdp, AcpiTableXsdt, + FADT_MAJOR_VERSION, FADT_MINOR_VERSION, MADT_IO_APIC, MADT_LOCAL_X2APIC, MADT_REVISION, + RSDP_REVISION, SIG_FADT, SIG_MADT, SIG_RSDP, SIG_XSDT, XSDT_REVISION, +}; + +unsafe_impl_zerocopy!(AcpiTableMcfg<1>, FromBytes, FromZeroes, AsBytes); +unsafe_impl_zerocopy!(AcpiTableXsdt<2>, FromBytes, FromZeroes, AsBytes); + +#[rustfmt::skip] +pub const DSDT_DSDTTBL_HEADER: [u8; 104] = [ + 0x44,0x53,0x44,0x54,0x67,0x00,0x00,0x00, /* 00000000 "DSDTg..." */ + 0x02,0xFB,0x41,0x4C,0x49,0x4F,0x54,0x48, /* 00000008 "..ALIOTH" */ + 0x41,0x4C,0x49,0x4F,0x54,0x48,0x56,0x4D, /* 00000010 "ALIOTHVM" */ + 0x01,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */ + 0x25,0x09,0x20,0x20, /* 00000020 "%. " */ + 0x5B,0x82,0x37,0x2E,0x5F,0x53,0x42,0x5F, /* 00000024 "[.7._SB_" */ + 0x43,0x4F,0x4D,0x31, /* 0000002C "COM1" */ + 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 00000030 "._HID.A." */ + 0x05,0x01, /* 00000038 ".." */ + 0x08,0x5F,0x55,0x49,0x44,0x01, /* 0000003A "._UID." */ + 0x08,0x5F,0x53,0x54,0x41,0x0A,0x0F, /* 00000040 "._STA.." */ + 0x08,0x5F,0x43,0x52,0x53,0x11,0x10,0x0A, /* 00000047 "._CRS..." */ + 0x0D, /* 0000004F "." */ + 0x47,0x01,0xF8,0x03,0xF8,0x03,0x00,0x08, /* 00000050 "G......." */ + 0x22,0x10,0x00, /* 00000058 "".." */ + 0x79,0x00, /* 0000005B "y." */ + 0x08,0x5F,0x53,0x35,0x5F, /* 0000005D "._S5_" */ + 0x12,0x04,0x01,0x0A,0x05, /* 00000062 "....." */ + 0x00, +]; + +#[inline] +fn gencsum<'a, T>(data: T) -> u8 +where + T: IntoIterator, +{ + (!wrapping_sum(data)).wrapping_add(1) +} + +#[inline] +fn wrapping_sum<'a, T>(data: T) -> u8 +where + T: IntoIterator, +{ + data.into_iter().fold(0u8, |accu, e| accu.wrapping_add(*e)) +} + +fn encode_addr64(addr: usize) -> [u32; 2] { + [addr as u32, (addr >> 32) as u32] +} + +const OEM_ID: [u8; 6] = *b"ALIOTH"; + +fn default_header() -> AcpiTableHeader { + AcpiTableHeader { + checksum: 0, + oem_id: OEM_ID, + oem_table_id: *b"ALIOTHVM", + oem_revision: 1, + asl_compiler_id: *b"ALTH", + asl_compiler_revision: 1, + ..Default::default() + } +} + +// https://uefi.org/htmlspecs/ACPI_Spec_6_4_html/05_ACPI_Software_Programming_Model/ACPI_Software_Programming_Model.html#root-system-description-pointer-rsdp-structure +pub fn create_rsdp(xsdt_addr: usize) -> AcpiTableRsdp { + let mut rsdp = AcpiTableRsdp { + signature: SIG_RSDP, + oem_id: OEM_ID, + revision: RSDP_REVISION, + length: size_of::() as u32, + xsdt_physical_address: encode_addr64(xsdt_addr), + ..Default::default() + }; + rsdp.checksum = gencsum(&rsdp.as_bytes()[0..20]); + rsdp.extended_checksum = gencsum(rsdp.as_bytes()); + rsdp +} + +// https://uefi.org/htmlspecs/ACPI_Spec_6_4_html/05_ACPI_Software_Programming_Model/ACPI_Software_Programming_Model.html#extended-system-description-table-fields-xsdt +pub fn create_xsdt(entries: [usize; 2]) -> AcpiTableXsdt<2> { + let total_length = size_of::() + size_of::() * 2; + let entries = entries.map(encode_addr64); + let mut xsdt = AcpiTableXsdt { + header: AcpiTableHeader { + signature: SIG_XSDT, + length: total_length as u32, + revision: XSDT_REVISION, + ..default_header() + }, + entries, + }; + xsdt.header.checksum = gencsum(xsdt.as_bytes()); + xsdt +} + +// https://uefi.org/htmlspecs/ACPI_Spec_6_4_html/05_ACPI_Software_Programming_Model/ACPI_Software_Programming_Model.html#fadt-format +pub fn create_fadt(dsdt_addr: usize) -> AcpiTableFadt { + let mut fadt = AcpiTableFadt { + header: AcpiTableHeader { + signature: SIG_FADT, + revision: FADT_MAJOR_VERSION, + length: size_of::() as u32, + ..default_header() + }, + sleep_control: AcpiGenericAddress { + space_id: 1, + bit_width: 8, + bit_offset: 0, + access_width: 1, + address: encode_addr64(0x600), + }, + sleep_status: AcpiGenericAddress { + space_id: 1, + bit_width: 8, + bit_offset: 0, + access_width: 1, + address: encode_addr64(0x601), + }, + flags: (1 << 20), + minor_revision: FADT_MINOR_VERSION, + hypervisor_id: *b"ALIOTH ", + xdsdt: encode_addr64(dsdt_addr), + ..Default::default() + }; + fadt.header.checksum = gencsum(fadt.as_bytes()); + fadt +} + +// https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#multiple-apic-description-table-madt +#[cfg(target_arch = "x86_64")] +pub fn create_madt(num_cpu: u32) -> (AcpiTableMadt, AcpiMadtIoApic, Vec) { + let total_length = size_of::() + + size_of::() + + num_cpu as usize * size_of::(); + + let mut madt = AcpiTableMadt { + header: AcpiTableHeader { + signature: SIG_MADT, + length: total_length as u32, + revision: MADT_REVISION, + ..default_header() + }, + address: APIC_START as u32, + flags: 0, + }; + + let io_apic = AcpiMadtIoApic { + header: AcpiSubtableHeader { + type_: MADT_IO_APIC, + length: size_of::() as u8, + }, + id: 0, + address: IOAPIC_START as u32, + global_irq_base: 0, + ..Default::default() + }; + + let mut x2apics = vec![]; + let mut sums = vec![ + wrapping_sum(madt.as_bytes()), + wrapping_sum(io_apic.as_bytes()), + ]; + + for i in 0..num_cpu { + let x2apic = AcpiMadtLocalX2apic { + header: AcpiSubtableHeader { + type_: MADT_LOCAL_X2APIC, + length: size_of::() as u8, + }, + local_apic_id: i, + uid: i, + lapic_flags: 1, + ..Default::default() + }; + sums.push(wrapping_sum(x2apic.as_bytes())); + x2apics.push(x2apic); + } + + madt.header.checksum = gencsum(&sums); + + (madt, io_apic, x2apics) +} + +#[cfg(target_arch = "x86_64")] +pub fn create_acpi_tables(start: usize, num_cpu: u32) -> Vec { + let mut buf = Vec::new(); + + buf.extend(AcpiTableRsdp::default().as_bytes()); + + let dsdt_addr = start + size_of::(); + buf.extend(&DSDT_DSDTTBL_HEADER); + + let fadt_addr = align_up!(dsdt_addr + size_of_val(&DSDT_DSDTTBL_HEADER), 4); + let fadt = create_fadt(dsdt_addr); + buf.extend(fadt.as_bytes()); + log::trace!("fadt: {:#x?}", fadt); + + let madt_addr = fadt_addr + size_of_val(&fadt); + let (madt, madt_ioapic, madt_apics) = create_madt(num_cpu); + buf.extend(madt.as_bytes()); + buf.extend(madt_ioapic.as_bytes()); + for apic in madt_apics.iter() { + buf.extend(apic.as_bytes()); + } + log::trace!("madt: {:#x?} {:#x?} {:#x?}", madt, madt_ioapic, madt_apics); + + let xsdt_addr = madt_addr + madt.header.length as usize; + let xsdt = create_xsdt([fadt_addr, madt_addr]); + log::trace!("xsdt: {:#x?}", xsdt); + buf.extend(xsdt.as_bytes()); + + let rsdp = create_rsdp(xsdt_addr); + buf[0..size_of_val(&rsdp)].copy_from_slice(rsdp.as_bytes()); + + buf +} diff --git a/alioth/src/acpi/bindings.rs b/alioth/src/acpi/bindings.rs new file mode 100644 index 0000000..eeca412 --- /dev/null +++ b/alioth/src/acpi/bindings.rs @@ -0,0 +1,223 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use zerocopy::{AsBytes, FromBytes, FromZeroes}; + +pub const SIG_RSDP: [u8; 8] = *b"RSD PTR "; +pub const SIG_XSDT: [u8; 4] = *b"XSDT"; +pub const SIG_FADT: [u8; 4] = *b"FACP"; +pub const SIG_MADT: [u8; 4] = *b"APIC"; +pub const SIG_MCFG: [u8; 4] = *b"MCFG"; +#[allow(dead_code)] +pub const SIG_DSDT: [u8; 4] = *b"DSDT"; + +pub const RSDP_REVISION: u8 = 2; + +#[repr(C, align(4))] +#[derive(Debug, Clone, Default, AsBytes, FromBytes, FromZeroes)] +pub struct AcpiTableRsdp { + pub signature: [u8; 8], + pub checksum: u8, + pub oem_id: [u8; 6], + pub revision: u8, + pub rsdt_physical_address: u32, + pub length: u32, + pub xsdt_physical_address: [u32; 2], + pub extended_checksum: u8, + pub reserved: [u8; 3], +} + +#[repr(C, align(4))] +#[derive(Debug, Clone, Default, AsBytes, FromBytes, FromZeroes)] +pub struct AcpiTableHeader { + pub signature: [u8; 4], + pub length: u32, + pub revision: u8, + pub checksum: u8, + pub oem_id: [u8; 6], + pub oem_table_id: [u8; 8], + pub oem_revision: u32, + pub asl_compiler_id: [u8; 4], + pub asl_compiler_revision: u32, +} + +pub const XSDT_REVISION: u8 = 1; + +#[repr(C, align(4))] +#[derive(Debug, Clone)] +pub struct AcpiTableXsdt { + pub header: AcpiTableHeader, + pub entries: [[u32; 2]; N], +} + +#[repr(C, align(4))] +#[derive(Debug, Clone, AsBytes, Default, FromBytes, FromZeroes)] +pub struct AcpiGenericAddress { + pub space_id: u8, + pub bit_width: u8, + pub bit_offset: u8, + pub access_width: u8, + pub address: [u32; 2], +} + +pub const FADT_MAJOR_VERSION: u8 = 6; +pub const FADT_MINOR_VERSION: u8 = 4; + +#[repr(C, align(4))] +#[derive(Debug, Clone, Default, AsBytes, FromBytes, FromZeroes)] +pub struct AcpiTableFadt { + pub header: AcpiTableHeader, + pub facs: u32, + pub dsdt: u32, + pub model: u8, + pub preferred_profile: u8, + pub sci_interrupt: u16, + pub smi_command: u32, + pub acpi_enable: u8, + pub acpi_disable: u8, + pub s4_bios_request: u8, + pub pstate_control: u8, + pub pm1a_event_block: u32, + pub pm1b_event_block: u32, + pub pm1a_control_block: u32, + pub pm1b_control_block: u32, + pub pm2_control_block: u32, + pub pm_timer_block: u32, + pub gpe0_block: u32, + pub gpe1_block: u32, + pub pm1_event_length: u8, + pub pm1_control_length: u8, + pub pm2_control_length: u8, + pub pm_timer_length: u8, + pub gpe0_block_length: u8, + pub gpe1_block_length: u8, + pub gpe1_base: u8, + pub cst_control: u8, + pub c2_latency: u16, + pub c3_latency: u16, + pub flush_size: u16, + pub flush_stride: u16, + pub duty_offset: u8, + pub duty_width: u8, + pub day_alarm: u8, + pub month_alarm: u8, + pub century: u8, + pub boot_flags: u8, + pub boot_flags_hi: u8, + pub reserved: u8, + pub flags: u32, + pub reset_register: AcpiGenericAddress, + pub reset_value: u8, + pub arm_boot_flags: u8, + pub arm_boot_flags_hi: u8, + pub minor_revision: u8, + pub xfacs: [u32; 2], + pub xdsdt: [u32; 2], + pub xpm1a_event_block: AcpiGenericAddress, + pub xpm1b_event_block: AcpiGenericAddress, + pub xpm1a_control_block: AcpiGenericAddress, + pub xpm1b_control_block: AcpiGenericAddress, + pub xpm2_control_block: AcpiGenericAddress, + pub xpm_timer_block: AcpiGenericAddress, + pub xgpe0_block: AcpiGenericAddress, + pub xgpe1_block: AcpiGenericAddress, + pub sleep_control: AcpiGenericAddress, + pub sleep_status: AcpiGenericAddress, + pub hypervisor_id: [u8; 8], +} + +pub const MADT_REVISION: u8 = 6; + +#[repr(C, align(4))] +#[derive(Debug, Clone, Default, AsBytes, FromBytes, FromZeroes)] +pub struct AcpiTableMadt { + pub header: AcpiTableHeader, + pub address: u32, + pub flags: u32, +} + +pub const MADT_IO_APIC: u8 = 1; +pub const MADT_LOCAL_X2APIC: u8 = 9; + +#[repr(C)] +#[derive(Debug, Clone, AsBytes, Default, FromBytes, FromZeroes)] +pub struct AcpiSubtableHeader { + pub type_: u8, + pub length: u8, +} + +#[repr(C, align(4))] +#[derive(Debug, Clone, AsBytes, Default, FromBytes, FromZeroes)] +pub struct AcpiMadtLocalX2apic { + pub header: AcpiSubtableHeader, + pub reserved: u16, + pub local_apic_id: u32, + pub lapic_flags: u32, + pub uid: u32, +} + +#[repr(C, align(4))] +#[derive(Debug, Clone, AsBytes, Default, FromBytes, FromZeroes)] +pub struct AcpiMadtIoApic { + pub header: AcpiSubtableHeader, + pub id: u8, + pub reserved: u8, + pub address: u32, + pub global_irq_base: u32, +} + +#[repr(C, align(4))] +#[derive(Debug, Clone, AsBytes, Default, FromBytes, FromZeroes)] +pub struct AcpiMcfgAllocation { + pub address: [u32; 2], + pub pci_segment: u16, + pub start_bus_number: u8, + pub end_bus_number: u8, + pub reserved: u32, +} + +pub const MCFG_REVISION: u8 = 1; + +#[repr(C, align(4))] +#[derive(Debug, Clone)] +pub struct AcpiTableMcfg { + pub header: AcpiTableHeader, + pub reserved: [u8; 8], + pub allocations: [AcpiMcfgAllocation; N], +} + +#[cfg(test)] +mod test { + use std::mem::size_of; + + use super::{ + AcpiGenericAddress, AcpiMadtIoApic, AcpiMadtLocalX2apic, AcpiMcfgAllocation, AcpiTableFadt, + AcpiTableHeader, AcpiTableMadt, AcpiTableMcfg, AcpiTableRsdp, AcpiTableXsdt, + }; + + #[test] + fn test_size() { + assert_eq!(size_of::(), 36); + assert_eq!(size_of::(), 36); + assert_eq!(size_of::(), 12); + assert_eq!(size_of::(), 276); + assert_eq!(size_of::(), 44); + assert_eq!(size_of::(), 12); + assert_eq!(size_of::(), 16); + assert_eq!(size_of::(), 16); + assert_eq!(size_of::>(), 60); + assert_eq!(size_of::>(), 36); + assert_eq!(size_of::>(), 36 + 4 * 8); + } +} diff --git a/alioth/src/action.rs b/alioth/src/action.rs new file mode 100644 index 0000000..61f6afd --- /dev/null +++ b/alioth/src/action.rs @@ -0,0 +1,18 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[derive(Debug)] +pub enum Action { + Shutdown, +} diff --git a/alioth/src/arch.rs b/alioth/src/arch.rs new file mode 100644 index 0000000..7afea90 --- /dev/null +++ b/alioth/src/arch.rs @@ -0,0 +1,18 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(target_arch = "x86_64")] +mod x86_64; +#[cfg(target_arch = "x86_64")] +pub use x86_64::*; diff --git a/alioth/src/arch/x86_64.rs b/alioth/src/arch/x86_64.rs new file mode 100644 index 0000000..0186a6b --- /dev/null +++ b/alioth/src/arch/x86_64.rs @@ -0,0 +1,18 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod layout; +pub mod msr; +pub mod paging; +pub mod reg; diff --git a/alioth/src/arch/x86_64/layout.rs b/alioth/src/arch/x86_64/layout.rs new file mode 100644 index 0000000..8e4ec9a --- /dev/null +++ b/alioth/src/arch/x86_64/layout.rs @@ -0,0 +1,53 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub const REAL_MODE_IVT_START: usize = 0x0; + +pub const BIOS_DATA_AREA_START: usize = 0x400; +pub const BIOS_DATA_END: usize = 0x500; + +pub const BOOT_GDT_START: usize = 0x500; +pub const BOOT_GDT_LIMIT: usize = 0x100; +pub const BOOT_IDT_START: usize = 0x600; +pub const BOOT_IDT_LIMIT: usize = 0xa00; + +pub const LINUX_BOOT_PARAMS_START: usize = 0x1000; // size: 4KiB +pub const HVM_START_INFO_START: usize = 0x1000; // size: 4KiB + +pub const KERNEL_CMD_LINE_START: usize = 0x2000; +pub const KERNEL_CMD_LINE_LIMIT: usize = 0x1000; + +pub const BOOT_PAGING_START: usize = 0x3000; +pub const BOOT_PAGING_LIMIT: usize = 0x4000; + +pub const EBDA_START: usize = 0x8_0000; +pub const EBDA_END: usize = 0xA_0000; + +pub const KERNEL_IMAGE_START: usize = 0x100_0000; // 16 MiB + +pub const RAM_32_END: usize = 0x8000_0000; // 2 GiB +pub const RAM_32_SIZE: usize = RAM_32_END; // 2 GiB + +pub const MMIO_32_START: usize = 0x8000_0000; // 2 GiB +pub const MMIO_32_END: usize = 0xe000_0000; // 3.5 GiB + +pub const PCIE_CONFIG_START: usize = 0xe000_0000; // 3.5 GiB +pub const PCIE_CONFIG_END: usize = 0xf000_0000; // 3.75 GiB, size = 256 MiB + +pub const IOAPIC_START: usize = 0xfec0_0000; +pub const APIC_START: usize = 0xfee0_0000; + +pub const MEM_64_START: usize = 0x1_0000_0000; // 4GiB + +pub const PAGE_SIZE: usize = 0x1000; // 4KiB diff --git a/alioth/src/arch/x86_64/msr.rs b/alioth/src/arch/x86_64/msr.rs new file mode 100644 index 0000000..cfb5161 --- /dev/null +++ b/alioth/src/arch/x86_64/msr.rs @@ -0,0 +1,40 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bitflags::bitflags; + +// Intel Vol.4, Table 2-2. +pub const IA32_EFER: u32 = 0xc000_0080; +pub const IA32_STAR: u32 = 0xc000_0081; +pub const IA32_LSTAR: u32 = 0xc000_0082; +pub const IA32_CSTAR: u32 = 0xc000_0083; +pub const IA32_FMASK: u32 = 0xc000_0084; +pub const IA32_FS_BASE: u32 = 0xc000_0100; +pub const IA32_GS_BASE: u32 = 0xc000_0101; +pub const IA32_KERNEL_GS_BASE: u32 = 0xc000_0102; +pub const IA32_TSC_AUX: u32 = 0xc000_0103; + +bitflags! { + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct Efer: u32 { + /// SYSCALL enable + const SCE = 1 << 0; + /// IA-32e mode enable + const LME = 1 << 8; + /// IA-32e mode active + const LMA = 1 << 10; + /// Execute disable bit enable + const NXE = 1 << 11; + } +} diff --git a/alioth/src/arch/x86_64/paging.rs b/alioth/src/arch/x86_64/paging.rs new file mode 100644 index 0000000..12b87f3 --- /dev/null +++ b/alioth/src/arch/x86_64/paging.rs @@ -0,0 +1,39 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bitflags::bitflags; + +bitflags! { + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct Entry: u32 { + /// Present + const P = 1 << 0; + /// Read/write + const RW = 1 << 1; + /// User/supervisor + const US = 1 << 2; + /// Page-level write-through + const PWT = 1 << 3; + /// Page-level cache disable + const PCD = 1 << 4; + /// Accessed + const A = 1 << 5; + /// Dirty + const D = 1 << 6; + /// Page size + const PS = 1 << 7; + /// Global + const G = 1 << 8; + } +} diff --git a/alioth/src/arch/x86_64/reg.rs b/alioth/src/arch/x86_64/reg.rs new file mode 100644 index 0000000..8c4a753 --- /dev/null +++ b/alioth/src/arch/x86_64/reg.rs @@ -0,0 +1,170 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bitfield::bitfield; +use bitflags::bitflags; + +bitflags! { + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct Rflags: u32 { + /// CarryCarry flag + const CF = 1 << 0; + /// CarryReserved + const RESERVED_1 = 1 << 1; + /// CarryParity flag + const PF = 1 << 2; + /// CarryAuxiliary Carry flag + const AF = 1 << 4; + /// CarryZero flag + const ZF = 1 << 6; + /// CarrySign flag + const SF = 1 << 7; + /// CarryTrap flag + const TF = 1 << 8; + /// CarryInterrupt enable flag + const IF = 1 << 9; + /// CarryDirection flag + const DF = 1 << 10; + /// CarryOverflow flag + const OF = 1 << 11; + /// CarryI/O privilege level + const IOPL = 1 << 13; + /// CarryNested task flag + const NT = 1 << 14; + /// CarryResume flag + const RF = 1 << 16; + /// CarryVirtual 8086 mode flag + const VM = 1 << 17; + /// CarryAlignment Check + const AC = 1 << 18; + /// CarryVirtual interrupt flag + const VIF = 1 << 19; + /// CarryVirtual interrupt pending + const VIP = 1 << 20; + /// CarryIdentification flag + const ID = 1 << 21; + } +} + +bitflags! { + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct Cr0: u32 { + /// CarryProtected Mode Enable + const PE = 1 << 0; + /// CarryMonitor co-processor + const MP = 1 << 1; + /// CarryEmulation + const EM = 1 << 2; + /// CarryTask switched + const TS = 1 << 3; + /// CarryExtension type + const ET = 1 << 4; + /// CarryNumeric error + const NE = 1 << 5; + /// CarryWrite protect + const WP = 1 << 16; + /// CarryAlignment mask + const AM = 1 << 18; + /// CarryNot-write through + const NW = 1 << 29; + /// CarryCache disable + const CD = 1 << 30; + /// CarryPaging + const PG = 1 << 31; + } +} + +bitflags! { + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct Cr3: u64 { + /// CarryPage-level write-through + const PWT = 1 << 3; + /// CarryPage-level Cache disable + const PCD = 1 << 4; + } +} + +bitflags! { + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct Cr4: u32 { + /// CarryVirtual 8086 Mode Extensions + const VME = 1 << 0; + /// CarryProtected-mode Virtual Interrupts + const PVI = 1 << 1; + /// CarryTime Stamp Disable + const TSD = 1 << 2; + /// CarryDebugging Extensions + const DE = 1 << 3; + /// CarryPage Size Extension + const PSE = 1 << 4; + /// CarryPhysical Address Extension + const PAE = 1 << 5; + /// CarryMachine Check Exception + const MCE = 1 << 6; + /// CarryPage Global Enabled + const PGE = 1 << 7; + /// CarryPerformance-Monitoring Counter enable + const PCE = 1 << 8; + /// CarryOperating system support for FXSAVE and FXRSTOR instructions + const OSFXSR = 1 << 9; + /// CarryOperating System Support for Unmasked SIMD Floating-Point Exceptions + const OSXMMEXCPT = 1 << 10; + /// CarryUser-Mode Instruction Prevention + const UMIP = 1 << 11; + /// Carry57-Bit Linear Addresses + const LA57 = 1 << 12; + /// CarryVirtual Machine Extensions Enable + const VMXE = 1 << 13; + /// CarrySafer Mode Extensions Enable + const SMXE = 1 << 14; + /// CarryFSGSBASE Enable + const FSGSBASE = 1 << 16; + /// CarryPCID Enable + const PCIDE = 1 << 17; + /// CarryXSAVE and Processor Extended States Enable + const OSXSAVE = 1 << 18; + /// CarryKey Locker Enable + const KL = 1 << 19; + /// CarrySupervisor Mode Execution Protection Enable + const SMEP = 1 << 20; + /// CarrySupervisor Mode Access Prevention Enable + const SMAP = 1 << 21; + /// CarryProtection Key Enable + const PKE = 1 << 22; + /// CarryControl-flow Enforcement Technology + const CET = 1 << 23; + /// CarryEnable Protection Keys for Supervisor-Mode Pages + const PKS = 1 << 24; + /// CarryUser Interrupts Enable + const UINTR = 1 << 25; + } +} + +bitfield! { + /// Guest segment register access right. + /// + /// See Intel Architecture Software Developer's Manual, Vol.3, Table 24-2. + #[derive(Copy, Clone, Default, PartialEq, Eq, Hash)] + pub struct SegAccess(u32); + impl Debug; + pub seg_type, _ : 3, 0; + pub s_code_data, _ : 4; + pub priv_level, _ : 6, 5; + pub present, _ : 7; + pub available, _ : 12; + pub l_64bit, _ : 13; + pub db_size_32, _: 14; + pub granularity, _: 15; + pub unusable, _: 16; +} diff --git a/alioth/src/device.rs b/alioth/src/device.rs new file mode 100644 index 0000000..530db14 --- /dev/null +++ b/alioth/src/device.rs @@ -0,0 +1,15 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod serial; diff --git a/alioth/src/device/serial.rs b/alioth/src/device/serial.rs new file mode 100644 index 0000000..f120d1c --- /dev/null +++ b/alioth/src/device/serial.rs @@ -0,0 +1,490 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::VecDeque; +use std::io::{self, ErrorKind}; +use std::mem::MaybeUninit; +use std::sync::{Arc, Mutex}; +use std::thread::JoinHandle; + +use bitfield::bitfield; +use bitflags::bitflags; +use libc::{ + cfmakeraw, fcntl, tcgetattr, tcsetattr, termios, F_GETFL, F_SETFL, O_NONBLOCK, STDIN_FILENO, + STDOUT_FILENO, TCSANOW, +}; +use mio::unix::SourceFd; +use mio::{Events, Interest, Poll, Token, Waker}; + +use crate::hv::IntxSender; +use crate::mem::mmio::Mmio; +use crate::{ffi, mem}; + +const TX_HOLDING_REGISTER: u16 = 0x0; +const RX_BUFFER_REGISTER: u16 = 0x0; +const DIVISOR_LATCH_LSB: u16 = 0x0; +const DIVISOR_LATCH_MSB: u16 = 0x1; +const INTERRUPT_ENABLE_REGISTER: u16 = 0x1; +const FIFO_CONTROL_REGISTER: u16 = 0x2; +const INTERRUPT_IDENTIFICATION_REGISTER: u16 = 0x2; +const LINE_CONTROL_REGISTER: u16 = 0x3; +const MODEM_CONTROL_REGISTER: u16 = 0x4; +const LINE_STATUS_REGISTER: u16 = 0x5; +const MODEM_STATUS_REGISTER: u16 = 0x6; +const SCRATCH_REGISTER: u16 = 0x7; + +// offset 0x1, Interrupt Enable Register (IER) +bitflags! { + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct InterruptEnable: u8 { + const MODEM_STATUS = 1 << 3; + const RECEIVER_LINE_STATUS = 1 << 2; + const TX_HOLDING_REGISTER_EMPTY = 1 << 1; + const RECEIVED_DATA_AVAILABLE = 1 << 0; + } +} + +// offset 0x2, write, FIFO Control Register (FCR) +bitfield! { + #[derive(Copy, Clone, Default)] + pub struct FifoControl(u8); + impl Debug; + rx_trigger_size_bits, _: 7, 6; + dma_mode, _: 3; + tx_reset, _: 2; + rx_reset, _: 1; + fifo_enabled, _: 0; +} + +impl FifoControl { + pub fn rx_trigger_size(&self) -> usize { + match self.rx_trigger_size_bits() { + 0b00 => 1, + 0b01 => 4, + 0b10 => 8, + 0b11 => 14, + _ => unreachable!(), + } + } +} + +// offset 0x2, read, Interrupt Identification Register +bitfield! { + #[derive(Copy, Clone)] + pub struct InterruptIdentification(u8); + impl Debug; + fifo_enabled, _: 7, 6; + interrupt_id, set_interrupt_id: 3,1; + no_pending, set_no_pending: 0; // Interrupt Pending Bit +} + +impl InterruptIdentification { + pub fn set_fifo_enabled(&mut self) { + self.0 |= 0b11 << 6; + } + + pub fn clear_fifi_enabled(&mut self) { + self.0 &= !(0b11 << 6); + } + + pub fn set_rx_data_available(&mut self) { + self.0 = (self.0 & !0b1111) | 0b0100; + } + + pub fn set_tx_room_empty(&mut self) { + self.0 = (self.0 & !0b1111) | 0b0010; + } + + pub fn clear_interrupt(&mut self) { + self.0 = (self.0 & !0b1111) | 1; + } +} + +impl Default for InterruptIdentification { + fn default() -> Self { + let mut val = InterruptIdentification(0); + val.clear_interrupt(); + val + } +} + +// offset 0x3, Line Control Register (LCR) +bitfield! { + #[derive(Copy, Clone)] + pub struct LineControl(u8); + impl Debug; + divisor_latch_access, _: 7; + break_, _: 6; + stick_parity, _: 5; + even_parity, _: 4; + parity_enabled, _: 3; + step_bits, _: 2; + word_length, _: 1, 0; +} + +impl Default for LineControl { + fn default() -> Self { + LineControl(0b00000011) // 8 data bits as default + } +} + +// offset 0x4, Modem Control Register +bitfield! { + #[derive(Copy, Clone, Default)] + pub struct ModemControl(u8); + impl Debug; + loop_back, _: 4; + out_2, _: 3; + out_1, _: 2; + request_to_send, _: 1; + data_terminal_ready, _: 0; // Data Terminal Ready +} + +// offset 0x5, Line Status Register (LSR) +bitflags! { + #[derive(Debug)] + pub struct LineStatus: u8 { + const ERROR_IN_RX_FIFO = 1 << 7; + const TX_EMPTY = 1 << 6; + const TX_HOLDING_REGISTER_EMPTY = 1 << 5; + const BREAK_INTERRUPT = 1 << 4; + const FRAMING_ERROR = 1 << 3; + const PARITY_ERROR = 1 << 2; + const OVERRUN_ERROR = 1 << 1; + const DATA_READY = 1 << 0; + } +} + +impl Default for LineStatus { + fn default() -> Self { + LineStatus::TX_EMPTY | LineStatus::TX_HOLDING_REGISTER_EMPTY + } +} + +#[derive(Default, Debug)] +struct SerialReg { + interrupt_enable: InterruptEnable, // 0x1, Interrupt Enable Register (IER) + #[allow(dead_code)] + fifo_control: FifoControl, // 0x2, write, FIFO Control Register (FCR) + interrupt_identification: InterruptIdentification, // 0x2, read, Interrupt Identification Register + line_control: LineControl, // 0x3, Line Control Register (LCR) + modem_control: ModemControl, // 0x4, Modem Control Register (MCR) + line_status: LineStatus, + modem_status: u8, // 0x6, Modem Status Register (MSR) + scratch: u8, // 0x7, Scratch Register (SCR) + divisor: u16, + data: VecDeque, +} + +#[derive(Debug)] +pub struct Serial { + base_port: u16, + irq_sender: Arc, + reg: Arc>, + worker_thread: Option>, + exit_waker: Waker, +} + +impl Mmio for Serial +where + I: IntxSender + Sync + Send + 'static, +{ + fn size(&self) -> usize { + 8 + } + + fn read(&self, offset: usize, _size: u8) -> Result { + let mut reg = self.reg.lock()?; + let ret = match offset as u16 { + DIVISOR_LATCH_LSB if reg.line_control.divisor_latch_access() => reg.divisor as u8, + DIVISOR_LATCH_MSB if reg.line_control.divisor_latch_access() => { + (reg.divisor >> 8) as u8 + } + RX_BUFFER_REGISTER => { + if reg.data.len() <= 1 { + reg.line_status &= !LineStatus::DATA_READY; + } + reg.data.pop_front().unwrap_or(0xff) + } + INTERRUPT_ENABLE_REGISTER => reg.interrupt_enable.bits(), + INTERRUPT_IDENTIFICATION_REGISTER => { + let ret = reg.interrupt_identification.0; + reg.interrupt_identification.clear_interrupt(); + ret + } + LINE_CONTROL_REGISTER => reg.line_control.0, + MODEM_CONTROL_REGISTER => reg.modem_control.0, + LINE_STATUS_REGISTER => reg.line_status.bits(), + MODEM_STATUS_REGISTER => reg.modem_status, + SCRATCH_REGISTER => reg.scratch, + _ => { + log::error!( + "Serial {:#x}: read unreachable port {:#x}", + self.base_port, + offset as u16 + self.base_port + ); + 0x0 + } + }; + Ok(ret as u64) + } + + fn write(&self, offset: usize, _size: u8, val: u64) -> Result<(), mem::Error> { + let byte = val as u8; + let mut reg = self.reg.lock()?; + match offset as u16 { + DIVISOR_LATCH_LSB if reg.line_control.divisor_latch_access() => { + reg.divisor = (reg.divisor & 0xff00) | byte as u16; + } + DIVISOR_LATCH_MSB if reg.line_control.divisor_latch_access() => { + reg.divisor = (reg.divisor & 0x00ff) | (byte as u16) << 8; + } + TX_HOLDING_REGISTER => { + if reg.modem_control.loop_back() { + reg.data.push_back(byte); + if reg + .interrupt_enable + .contains(InterruptEnable::RECEIVED_DATA_AVAILABLE) + { + reg.interrupt_identification.set_rx_data_available(); + self.send_irq(); + } + reg.line_status |= LineStatus::DATA_READY; + } else { + if let Err(e) = + ffi!(unsafe { libc::write(STDOUT_FILENO, &byte as *const u8 as _, 1) }) + { + log::error!( + "Serial {:#x}: cannot write byte {:#02x}: {:?}", + self.base_port, + byte, + e + ) + } + if reg + .interrupt_enable + .contains(InterruptEnable::TX_HOLDING_REGISTER_EMPTY) + { + reg.interrupt_identification.set_tx_room_empty(); + self.send_irq() + } + } + } + INTERRUPT_ENABLE_REGISTER => { + reg.interrupt_enable = InterruptEnable::from_bits_truncate(byte); + } + FIFO_CONTROL_REGISTER => {} + LINE_CONTROL_REGISTER => { + reg.line_control = LineControl(byte); + } + MODEM_CONTROL_REGISTER => { + reg.modem_control = ModemControl(byte); + } + LINE_STATUS_REGISTER => {} + MODEM_STATUS_REGISTER => {} + SCRATCH_REGISTER => { + reg.scratch = byte; + } + _ => log::error!( + "Serial {:#x}: write unreachable offset {:#x}", + self.base_port, + offset as u16 + self.base_port + ), + } + Ok(()) + } +} + +struct StdinBackup { + termios: Option, + flag: Option, +} + +impl StdinBackup { + fn new() -> StdinBackup { + let mut termios_backup = None; + let mut t = MaybeUninit::uninit(); + match ffi!(unsafe { tcgetattr(STDIN_FILENO, t.as_mut_ptr()) }) { + Ok(_) => termios_backup = Some(unsafe { t.assume_init() }), + Err(e) => log::error!("tcgetattr() failed: {}", e), + } + let mut flag_backup = None; + match ffi! { unsafe { fcntl(STDIN_FILENO, F_GETFL) } } { + Ok(f) => flag_backup = Some(f), + Err(e) => log::error!("fcntl(STDIN_FILENO, F_GETFL) failed: {}", e), + } + StdinBackup { + termios: termios_backup, + flag: flag_backup, + } + } +} + +impl Drop for StdinBackup { + fn drop(&mut self) { + if let Some(t) = self.termios.take() { + if let Err(e) = ffi!(unsafe { tcsetattr(STDIN_FILENO, 1, &t) }) { + log::error!("Restroing termios: {:?}", e); + } + } + if let Some(f) = self.flag.take() { + if let Err(e) = ffi!(unsafe { fcntl(STDIN_FILENO, F_SETFL, f) }) { + log::error!("Restoring stdin flag to {:#x}: {:?}", f, e) + } + } + } +} + +struct SeiralWorker { + pub base_port: u16, + pub irq_sender: Arc, + pub reg: Arc>, + pub poll: Poll, +} + +impl SeiralWorker +where + I: IntxSender, +{ + fn setup_termios(&mut self) -> io::Result<()> { + let mut raw_termios = MaybeUninit::uninit(); + ffi!(unsafe { tcgetattr(STDIN_FILENO, raw_termios.as_mut_ptr()) })?; + unsafe { cfmakeraw(raw_termios.as_mut_ptr()) }; + ffi!(unsafe { tcsetattr(STDIN_FILENO, TCSANOW, raw_termios.as_ptr()) })?; + + let flag = ffi!(unsafe { fcntl(STDIN_FILENO, F_GETFL) })?; + ffi!(unsafe { fcntl(STDIN_FILENO, F_SETFL, flag | O_NONBLOCK) })?; + self.poll.registry().register( + &mut SourceFd(&STDIN_FILENO), + TOKEN_STDIN, + Interest::READABLE, + )?; + + Ok(()) + } + + fn read_input(data: &mut VecDeque) -> io::Result { + let mut total_size = 0; + let mut buf = [0u8; 16]; + loop { + match ffi!(unsafe { libc::read(STDIN_FILENO, buf.as_mut_ptr() as _, 16) }) { + Ok(0) => break, + Err(e) if e.kind() == ErrorKind::WouldBlock => break, + Ok(len) => { + data.extend(&buf[0..len as usize]); + total_size += len as usize; + } + Err(e) => Err(e)?, + } + } + Ok(total_size) + } + + fn send_irq(&self) { + if let Err(e) = self.irq_sender.send() { + log::error!("Serial {:#x}: sending interrupt: {:?}", self.base_port, e); + } + } + + fn do_work_inner(&mut self) -> io::Result<()> { + self.setup_termios()?; + let mut events = Events::with_capacity(16); + loop { + self.poll.poll(&mut events, None)?; + for event in events.iter() { + if event.token() == TOKEN_SHUTDOWN { + return Ok(()); + } + let Ok(mut reg) = self.reg.lock() else { + log::error!("Serial {:#x}: mutex poisoned", self.base_port); + return Ok(()); + }; + if Self::read_input(&mut reg.data)? == 0 { + continue; + } + if reg + .interrupt_enable + .contains(InterruptEnable::RECEIVED_DATA_AVAILABLE) + { + reg.interrupt_identification.set_rx_data_available(); + self.send_irq() + } + reg.line_status |= LineStatus::DATA_READY; + } + } + } + + fn do_work(&mut self) { + log::trace!("Serial {:#x}: start", self.base_port); + let _backup = StdinBackup::new(); + if let Err(e) = self.do_work_inner() { + log::error!("Serial {:#x}: {:?}", self.base_port, e) + } else { + log::trace!("Serial {:#x}: done", self.base_port) + } + } +} + +const TOKEN_SHUTDOWN: Token = Token(1); +const TOKEN_STDIN: Token = Token(0); + +impl Serial +where + I: IntxSender + Sync + Send + 'static, +{ + pub fn new(base_port: u16, intx_sender: I) -> io::Result { + let irq_sender = Arc::new(intx_sender); + let reg = Arc::new(Mutex::new(SerialReg::default())); + let poll = Poll::new()?; + let waker = Waker::new(poll.registry(), TOKEN_SHUTDOWN)?; + let mut worker = SeiralWorker { + base_port, + reg: reg.clone(), + poll, + irq_sender: irq_sender.clone(), + }; + let worker_thread = std::thread::Builder::new() + .name(format!("serial_{:#x}", base_port)) + .spawn(move || worker.do_work())?; + let serial = Serial { + reg, + base_port, + irq_sender, + worker_thread: Some(worker_thread), + exit_waker: waker, + }; + Ok(serial) + } + + fn send_irq(&self) { + if let Err(e) = self.irq_sender.send() { + log::error!("Serial {:#x}: sending interrupt: {:?}", self.base_port, e); + } + } +} + +impl Drop for Serial { + fn drop(&mut self) { + if let Err(e) = self.exit_waker.wake() { + log::error!("Serial {:#x}: {:?}", self.base_port, e); + return; + } + let Some(thread) = self.worker_thread.take() else { + return; + }; + if let Err(e) = thread.join() { + log::error!("Serial {:#x}: {:?}", self.base_port, e); + } + } +} diff --git a/alioth/src/hv.rs b/alioth/src/hv.rs new file mode 100644 index 0000000..e606b6c --- /dev/null +++ b/alioth/src/hv.rs @@ -0,0 +1,184 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod arch; + +#[cfg(target_os = "linux")] +mod kvm; +#[cfg(test)] +pub(crate) mod test; +#[cfg(target_os = "linux")] +pub use kvm::Kvm; + +use std::backtrace::Backtrace; +use std::fmt::Debug; +use std::sync::{Arc, PoisonError}; +use std::thread::JoinHandle; + +use arch::Reg; +#[cfg(target_arch = "x86_64")] +use arch::{Cpuid, DtReg, DtRegVal, SReg, SegReg, SegRegVal}; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum Error { + #[error("invalid memory map option for {hypervisor}: {option:?}")] + MemMapOption { + option: MemMapOption, + hypervisor: &'static str, + }, + #[error("lock poisoned")] + RwLockPoisoned, + #[error("IO error: {source}")] + StdIo { + #[from] + source: std::io::Error, + #[backtrace] + backtrace: Backtrace, + }, + #[error("{msg}")] + Unexpected { msg: String }, + #[error("lack capability: {cap}")] + LackCap { cap: String }, + #[error("creating multipe memory")] + CreatingMultipleMemory, +} + +impl From> for Error { + fn from(_: PoisonError) -> Self { + Self::RwLockPoisoned + } +} + +pub type Result = std::result::Result; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct MemMapOption { + pub read: bool, + pub write: bool, + pub exec: bool, + pub log_dirty: bool, +} + +impl Default for MemMapOption { + fn default() -> Self { + Self { + read: true, + write: true, + exec: true, + log_dirty: false, + } + } +} + +pub trait Vcpu { + fn get_reg(&self, reg: Reg) -> Result; + fn set_regs(&mut self, vals: &[(Reg, u64)]) -> Result<(), Error>; + + #[cfg(target_arch = "x86_64")] + fn get_seg_reg(&self, reg: SegReg) -> Result; + + #[cfg(target_arch = "x86_64")] + fn get_dt_reg(&self, reg: DtReg) -> Result; + + #[cfg(target_arch = "x86_64")] + fn get_sreg(&self, reg: SReg) -> Result; + + #[cfg(target_arch = "x86_64")] + fn set_sregs( + &mut self, + sregs: &[(SReg, u64)], + seg_regs: &[(SegReg, SegRegVal)], + dt_regs: &[(DtReg, DtRegVal)], + ) -> Result<(), Error>; + + fn run(&mut self, entry: VmEntry) -> Result; + + #[cfg(target_arch = "x86_64")] + fn set_cpuids(&mut self, cpuids: Vec) -> Result<(), Error>; + + fn dump(&self) -> Result<(), Error>; +} + +pub trait IntxSender: Debug + Send + Sync + 'static { + fn send(&self) -> Result<(), Error>; +} + +impl IntxSender for Arc +where + T: IntxSender, +{ + fn send(&self) -> Result<(), Error> { + IntxSender::send(self.as_ref()) + } +} + +pub trait VmMemory: Debug + Send + Sync + 'static { + fn mem_map( + &self, + slot: u32, + gpa: usize, + size: usize, + hva: usize, + option: MemMapOption, + ) -> Result<(), Error>; + + fn unmap(&self, slot: u32, gpa: usize, size: usize) -> Result<(), Error>; + + fn max_mem_slots(&self) -> Result; +} + +pub trait Vm { + type Vcpu: Vcpu; + type Memory: VmMemory; + type IntxSender: IntxSender + Send + Sync; + fn create_vcpu(&self, id: u32) -> Result; + fn create_intx_sender(&self, pin: u8) -> Result; + fn create_vm_memory(&mut self) -> Result; + fn stop_vcpu(id: u32, handle: &JoinHandle) -> Result<(), Error>; +} + +pub trait Hypervisor { + type Vm: Vm + Sync + Send + 'static; + + fn create_vm(&self) -> Result; + + #[cfg(target_arch = "x86_64")] + fn get_supported_cpuids(&self) -> Result, Error>; +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VmExit { + Io { + port: u16, + write: Option, + size: u8, + }, + Mmio { + addr: usize, + write: Option, + size: u8, + }, + Shutdown, + Unknown(String), + Interrupted, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum VmEntry { + None, + Shutdown, + Io { data: u32 }, + Mmio { data: u64 }, +} diff --git a/alioth/src/hv/arch.rs b/alioth/src/hv/arch.rs new file mode 100644 index 0000000..ef30661 --- /dev/null +++ b/alioth/src/hv/arch.rs @@ -0,0 +1,18 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(target_arch = "x86_64")] +pub use x86_64::*; +#[cfg(target_arch = "x86_64")] +pub mod x86_64; diff --git a/alioth/src/hv/arch/x86_64.rs b/alioth/src/hv/arch/x86_64.rs new file mode 100644 index 0000000..81d7714 --- /dev/null +++ b/alioth/src/hv/arch/x86_64.rs @@ -0,0 +1,100 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::arch::reg::SegAccess; + +#[derive(Debug, Default, Clone)] +pub struct Cpuid { + pub func: u32, + pub index: Option, + pub eax: u32, + pub ebx: u32, + pub ecx: u32, + pub edx: u32, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum Reg { + Rax, + Rbx, + Rcx, + Rdx, + Rsi, + Rdi, + Rsp, + Rbp, + R8, + R9, + R10, + R11, + R12, + R13, + R14, + R15, + Rip, + Rflags, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum SReg { + Cr0, + Cr2, + Cr3, + Cr4, + Cr8, + Efer, + ApicBase, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum SegReg { + Cs, + Ds, + Es, + Fs, + Gs, + Ss, + Tr, + Ldtr, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum DtReg { + Gdtr, + Idtr, +} + +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] +pub struct SegRegVal { + pub selector: u16, + pub base: u64, + pub limit: u32, + pub access: SegAccess, +} + +impl SegRegVal { + pub fn to_desc(&self) -> u64 { + ((self.base & 0xff00_0000) << (56 - 24)) + | (((self.access.0 as u64) & 0x0000_f0ff) << 40) + | (((self.limit as u64) & 0x000f_0000) << (48 - 16)) + | ((self.base & 0x00ff_ffff) << 16) + | ((self.limit as u64) & 0x0000_ffff) + } +} + +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] +pub struct DtRegVal { + pub base: u64, + pub limit: u16, +} diff --git a/alioth/src/hv/kvm.rs b/alioth/src/hv/kvm.rs new file mode 100644 index 0000000..9cc0ea5 --- /dev/null +++ b/alioth/src/hv/kvm.rs @@ -0,0 +1,133 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod bindings; +mod ioctls; +mod vcpu; +mod vm; +mod vmentry; +mod vmexit; + +use std::mem::{size_of, transmute}; +use std::os::fd::{FromRawFd, OwnedFd}; +use std::ptr::null_mut; +use std::sync::Arc; + +use crate::ffi; +#[cfg(target_arch = "x86_64")] +use crate::hv::Cpuid; +use crate::hv::{Error, Hypervisor}; +use bindings::{KvmCpuid2, KvmCpuid2Flag, KvmCpuidEntry2, KVM_API_VERSION, KVM_MAX_CPUID_ENTRIES}; +use ioctls::{kvm_create_irqchip, kvm_create_vm, kvm_get_api_version, kvm_get_vcpu_mmap_size}; + +#[cfg(target_arch = "x86_64")] +use ioctls::{kvm_get_supported_cpuid, kvm_set_identity_map_addr, kvm_set_tss_addr}; +use libc::SIGRTMIN; +use vm::KvmVm; + +#[derive(Debug)] +pub struct Kvm { + fd: OwnedFd, +} + +extern "C" fn sigrtmin_handler(_: libc::c_int, _: *mut libc::siginfo_t, _: *mut libc::c_void) {} + +impl Kvm { + pub fn new() -> Result { + let kvm_file = std::fs::File::open("/dev/kvm")?; + let kvm_fd = OwnedFd::from(kvm_file); + let version = unsafe { kvm_get_api_version(&kvm_fd) }?; + if version != KVM_API_VERSION { + return Err(Error::LackCap { + cap: format!("current KVM API version {version}, need {KVM_API_VERSION}"), + }); + } + let mut action: libc::sigaction = unsafe { transmute([0u8; size_of::()]) }; + action.sa_flags = libc::SA_SIGINFO; + action.sa_sigaction = sigrtmin_handler as _; + ffi!(unsafe { libc::sigfillset(&mut action.sa_mask) })?; + ffi!(unsafe { libc::sigaction(SIGRTMIN(), &action, null_mut()) })?; + Ok(Kvm { fd: kvm_fd }) + } +} + +impl Hypervisor for Kvm { + type Vm = KvmVm; + + fn create_vm(&self) -> Result { + let vcpu_mmap_size = unsafe { kvm_get_vcpu_mmap_size(&self.fd) }? as usize; + let vm_fd = unsafe { kvm_create_vm(&self.fd, 0) }?; + let fd = unsafe { OwnedFd::from_raw_fd(vm_fd) }; + unsafe { kvm_create_irqchip(&fd) }?; + // TODO should be in parameters + #[cfg(target_arch = "x86_64")] + unsafe { kvm_set_tss_addr(&fd, 0xf000_0000) }?; + #[cfg(target_arch = "x86_64")] + unsafe { kvm_set_identity_map_addr(&fd, &0xf000_3000) }?; + Ok(KvmVm { + fd: Arc::new(fd), + vcpu_mmap_size, + memory_created: false, + }) + } + + #[cfg(target_arch = "x86_64")] + fn get_supported_cpuids(&self) -> Result, Error> { + let mut kvm_cpuid2 = KvmCpuid2 { + nent: KVM_MAX_CPUID_ENTRIES as u32, + padding: 0, + entries: [KvmCpuidEntry2::default(); KVM_MAX_CPUID_ENTRIES], + }; + unsafe { kvm_get_supported_cpuid(&self.fd, &mut kvm_cpuid2) }?; + let cpuids = kvm_cpuid2.entries[0..kvm_cpuid2.nent as usize] + .iter() + .map(|e| Cpuid { + func: e.function, + index: if e.flags.contains(KvmCpuid2Flag::SIGNIFCANT_INDEX) { + Some(e.index) + } else { + None + }, + eax: e.eax, + ebx: e.ebx, + ecx: e.ecx, + edx: e.edx, + }) + .collect::>(); + Ok(cpuids) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_get_supported_cpuid() { + let kvm = Kvm::new().unwrap(); + let mut kvm_cpuid_exist = false; + let supported_cpuids = kvm.get_supported_cpuids().unwrap(); + for cpuid in &supported_cpuids { + if cpuid.func == 0x4000_0000 + && cpuid.ebx.to_le_bytes() == *b"KVMK" + && cpuid.ecx.to_le_bytes() == *b"VMKV" + && cpuid.edx.to_le_bytes() == *b"M\0\0\0" + { + kvm_cpuid_exist = true; + } + } + assert!(kvm_cpuid_exist); + } +} diff --git a/alioth/src/hv/kvm/bindings.rs b/alioth/src/hv/kvm/bindings.rs new file mode 100644 index 0000000..1248698 --- /dev/null +++ b/alioth/src/hv/kvm/bindings.rs @@ -0,0 +1,239 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bitflags::bitflags; + +pub const KVMIO: u8 = 0xAE; +pub const KVM_API_VERSION: i32 = 12; +pub const KVM_MAX_CPUID_ENTRIES: usize = 256; + +bitflags! { + #[derive(Debug, Clone, Copy, Default)] + pub struct KvmCpuid2Flag: u32 { + const SIGNIFCANT_INDEX = 1; + } +} + +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct KvmCpuidEntry2 { + pub function: u32, + pub index: u32, + pub flags: KvmCpuid2Flag, + pub eax: u32, + pub ebx: u32, + pub ecx: u32, + pub edx: u32, + pub padding: [u32; 3], +} + +#[repr(C)] +#[derive(Debug, Clone)] +pub struct KvmCpuid2 { + pub nent: u32, + pub padding: u32, + pub entries: [KvmCpuidEntry2; N], +} + +bitflags! { + #[derive(Debug, Clone, Copy, Default)] + pub struct KvmMemFlag: u32 { + const LOG_DIRTY_PAGES = 1; + const READONLY = 2; + } +} + +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct KvmUserspaceMemoryRegion { + pub slot: u32, + pub flags: KvmMemFlag, + pub guest_phys_addr: u64, + pub memory_size: u64, + pub userspace_addr: u64, +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct KvmRegs { + pub rax: u64, + pub rbx: u64, + pub rcx: u64, + pub rdx: u64, + pub rsi: u64, + pub rdi: u64, + pub rsp: u64, + pub rbp: u64, + pub r8: u64, + pub r9: u64, + pub r10: u64, + pub r11: u64, + pub r12: u64, + pub r13: u64, + pub r14: u64, + pub r15: u64, + pub rip: u64, + pub rflags: u64, +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct KvmSegment { + pub base: u64, + pub limit: u32, + pub selector: u16, + pub type_: u8, + pub present: u8, + pub dpl: u8, + pub db: u8, + pub s: u8, + pub l: u8, + pub g: u8, + pub avl: u8, + pub unusable: u8, + pub padding: u8, +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct KvmDtable { + pub base: u64, + pub limit: u16, + pub padding: [u16; 3], +} + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct KvmSregs2 { + pub cs: KvmSegment, + pub ds: KvmSegment, + pub es: KvmSegment, + pub fs: KvmSegment, + pub gs: KvmSegment, + pub ss: KvmSegment, + pub tr: KvmSegment, + pub ldt: KvmSegment, + pub gdt: KvmDtable, + pub idt: KvmDtable, + pub cr0: u64, + pub cr2: u64, + pub cr3: u64, + pub cr4: u64, + pub cr8: u64, + pub efer: u64, + pub apic_base: u64, + pub flags: u64, + pub pdptrs: [u64; 4], +} + +#[repr(C)] +#[derive(Copy, Clone)] +pub struct KvmRun { + pub request_interrupt_window: u8, + pub immediate_exit: u8, + pub padding1: [u8; 6], + pub exit_reason: u32, + pub ready_for_interrupt_injection: u8, + pub if_flag: u8, + pub flags: u16, + pub cr8: u64, + pub apic_base: u64, + pub exit: KvmExit, + pub kvm_valid_regs: u64, + pub kvm_dirty_regs: u64, + pub s: KvmSyncRegsBlock, +} +#[repr(C)] +#[derive(Copy, Clone)] +pub union KvmExit { + pub mmio: KvmExitMmio, + pub io: KvmExitIo, + pub padding: [u8; 256], +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct KvmExitMmio { + pub phys_addr: u64, + pub data: [u8; 8], + pub len: u32, + pub is_write: u8, +} + +#[repr(C)] +#[derive(Debug, Clone, Copy)] +pub struct KvmExitIo { + pub direction: u8, + pub size: u8, + pub port: u16, + pub count: u32, + pub data_offset: u64, +} + +#[repr(C)] +#[derive(Copy, Clone)] +pub union KvmSyncRegsBlock { + pub padding: [u8; 2048], +} + +pub const KVM_EXIT_IO: u32 = 2; +pub const KVM_EXIT_MMIO: u32 = 6; +pub const KVM_EXIT_SHUTDOWN: u32 = 8; + +pub const KVM_EXIT_IO_IN: u8 = 0; +pub const KVM_EXIT_IO_OUT: u8 = 1; + +#[repr(C)] +#[derive(Debug, Default, Copy, Clone)] +pub struct KvmIrqfd { + pub fd: u32, + pub gsi: u32, + pub flags: u32, + pub resamplefd: u32, + pub pad: [u8; 16usize], +} + +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct KvmMsi { + pub address_lo: u32, + pub address_hi: u32, + pub data: u32, + pub flags: u32, + pub devid: u32, + pub pad: [u8; 12usize], +} + +pub const KVM_CAP_NR_MEMSLOTS: u64 = 10; +pub const KVM_CAP_IRQFD: u64 = 32; + +bitflags! { + #[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Hash)] + pub struct KvmIoEventFdFlag: u32 { + const DATA_MATCH = 1 << 0; + const PIO = 1 << 1; + const DEASSIGN = 1 << 2; + } +} + +#[repr(C)] +#[derive(Debug, Copy, Clone, Default)] +pub struct KvmIoEventFd { + pub datamatch: u64, + pub addr: u64, + pub len: u32, + pub fd: i32, + pub flags: KvmIoEventFdFlag, + pub pad: [u32; 9], +} diff --git a/alioth/src/hv/kvm/ioctls.rs b/alioth/src/hv/kvm/ioctls.rs new file mode 100644 index 0000000..31d2feb --- /dev/null +++ b/alioth/src/hv/kvm/ioctls.rs @@ -0,0 +1,56 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::hv::kvm::bindings::{ + KvmCpuid2, KvmIrqfd, KvmRegs, KvmSregs2, KvmUserspaceMemoryRegion, KVMIO, +}; +use crate::utils::ioctls::ioctl_io; +use crate::{ + ioctl_none, ioctl_read, ioctl_write_buf, ioctl_write_ptr, ioctl_write_val, ioctl_writeread_buf, +}; + +ioctl_none!(kvm_get_api_version, KVMIO, 0x00, 0); +ioctl_write_val!(kvm_create_vm, ioctl_io(KVMIO, 0x01)); +ioctl_write_val!(kvm_check_extension, ioctl_io(KVMIO, 0x03)); +ioctl_none!(kvm_get_vcpu_mmap_size, KVMIO, 0x04, 0); +#[cfg(target_arch = "x86_64")] +ioctl_writeread_buf!(kvm_get_supported_cpuid, KVMIO, 0x05, KvmCpuid2); + +ioctl_write_val!(kvm_create_vcpu, ioctl_io(KVMIO, 0x41), u32); +ioctl_write_ptr!( + kvm_set_user_memory_region, + KVMIO, + 0x46, + KvmUserspaceMemoryRegion +); +#[cfg(target_arch = "x86_64")] +ioctl_write_val!(kvm_set_tss_addr, ioctl_io(KVMIO, 0x47)); +#[cfg(target_arch = "x86_64")] +ioctl_write_ptr!(kvm_set_identity_map_addr, KVMIO, 0x48, u64); + +ioctl_none!(kvm_create_irqchip, KVMIO, 0x60, 0); + +ioctl_write_ptr!(kvm_irqfd, KVMIO, 0x76, KvmIrqfd); + +ioctl_none!(kvm_run, KVMIO, 0x80, 0); +ioctl_read!(kvm_get_regs, KVMIO, 0x81, KvmRegs); +ioctl_write_ptr!(kvm_set_regs, KVMIO, 0x82, KvmRegs); + +#[cfg(target_arch = "x86_64")] +ioctl_write_buf!(kvm_set_cpuid2, KVMIO, 0x90, KvmCpuid2); + +#[cfg(target_arch = "x86_64")] +ioctl_read!(kvm_get_sregs2, KVMIO, 0xcc, KvmSregs2); +#[cfg(target_arch = "x86_64")] +ioctl_write_ptr!(kvm_set_sregs2, KVMIO, 0xcd, KvmSregs2); diff --git a/alioth/src/hv/kvm/vcpu.rs b/alioth/src/hv/kvm/vcpu.rs new file mode 100644 index 0000000..4abe45b --- /dev/null +++ b/alioth/src/hv/kvm/vcpu.rs @@ -0,0 +1,481 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(target_arch = "x86_64")] +mod x86_64; + +use std::io::ErrorKind; +use std::ops::{Deref, DerefMut}; +use std::os::fd::{OwnedFd, RawFd}; +use std::ptr::null_mut; + +use libc::{mmap, munmap, MAP_FAILED, MAP_SHARED, PROT_READ, PROT_WRITE}; + +use crate::ffi; +use crate::hv::arch::Reg; +use crate::hv::kvm::bindings::{KvmRun, KVM_EXIT_IO, KVM_EXIT_MMIO}; +use crate::hv::kvm::ioctls::kvm_run; +#[cfg(target_arch = "x86_64")] +use crate::hv::{Cpuid, DtReg, DtRegVal, SReg, SegReg, SegRegVal}; +use crate::hv::{Error, Vcpu, VmEntry, VmExit}; + +use super::bindings::KVM_EXIT_SHUTDOWN; + +pub(super) struct KvmRunBlock { + addr: usize, + size: usize, +} + +impl KvmRunBlock { + pub unsafe fn new(fd: RawFd, mmap_size: usize) -> Result { + let prot = PROT_READ | PROT_WRITE; + let addr = ffi!( + unsafe { mmap(null_mut(), mmap_size, prot, MAP_SHARED, fd, 0,) }, + MAP_FAILED + )?; + Ok(KvmRunBlock { + addr: addr as usize, + size: mmap_size, + }) + } + + pub(super) unsafe fn data_slice(&self, offset: usize, count: usize) -> &[T] { + std::slice::from_raw_parts((self.addr + offset) as *const T, count) + } + + pub(super) unsafe fn data_slice_mut(&mut self, offset: usize, count: usize) -> &mut [T] { + std::slice::from_raw_parts_mut((self.addr + offset) as *mut T, count) + } +} + +impl Deref for KvmRunBlock { + type Target = KvmRun; + + fn deref(&self) -> &Self::Target { + unsafe { &*(self.addr as *const Self::Target) } + } +} + +impl DerefMut for KvmRunBlock { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *(self.addr as *mut Self::Target) } + } +} + +impl Drop for KvmRunBlock { + fn drop(&mut self) { + if let Err(e) = ffi!(unsafe { munmap(self.addr as _, self.size) }) { + log::error!("unmap kvm_run: {}", e) + } + } +} + +pub struct KvmVcpu { + pub(super) kvm_run: KvmRunBlock, + pub(super) fd: OwnedFd, +} + +impl Vcpu for KvmVcpu { + fn get_reg(&self, reg: Reg) -> Result { + self.kvm_get_reg(reg) + } + + #[cfg(target_arch = "x86_64")] + fn get_dt_reg(&self, reg: DtReg) -> Result { + self.kvm_get_dt_reg(reg) + } + + #[cfg(target_arch = "x86_64")] + fn get_seg_reg(&self, reg: SegReg) -> Result { + self.kvm_get_seg_reg(reg) + } + + #[cfg(target_arch = "x86_64")] + fn get_sreg(&self, reg: SReg) -> Result { + self.kvm_get_sreg(reg) + } + + fn set_regs(&mut self, vals: &[(Reg, u64)]) -> Result<(), Error> { + self.kvm_set_regs(vals) + } + + #[cfg(target_arch = "x86_64")] + fn set_sregs( + &mut self, + sregs: &[(SReg, u64)], + seg_regs: &[(SegReg, SegRegVal)], + dt_regs: &[(DtReg, DtRegVal)], + ) -> Result<(), Error> { + self.kvm_set_sregs(sregs, seg_regs, dt_regs) + } + + fn run(&mut self, entry: VmEntry) -> Result { + match entry { + VmEntry::None => {} + VmEntry::Io { data } => self.entry_io(data), + VmEntry::Mmio { data } => self.entry_mmio(data), + VmEntry::Shutdown => self.immediate_exit(), + }; + let ret = unsafe { kvm_run(&self.fd) }; + match ret { + Err(e) => match (e.kind(), entry) { + (ErrorKind::WouldBlock, _) => Ok(VmExit::Interrupted), + (ErrorKind::Interrupted, VmEntry::Shutdown) => Ok(VmExit::Shutdown), + (ErrorKind::Interrupted, _) => Ok(VmExit::Interrupted), + _ => Err(e.into()), + }, + Ok(_) => match self.kvm_run.exit_reason { + KVM_EXIT_IO => self.handle_io(), + KVM_EXIT_MMIO => self.handle_mmio(), + KVM_EXIT_SHUTDOWN => Ok(VmExit::Shutdown), + reason => Ok(VmExit::Unknown(format!("unkown kvm exit: {:#x}", reason))), + }, + } + } + + #[cfg(target_arch = "x86_64")] + fn set_cpuids(&mut self, cpuids: Vec) -> Result<(), Error> { + self.kvm_set_cpuids(cpuids) + } + + fn dump(&self) -> Result<(), Error> { + Ok(()) + } +} + +#[cfg(test)] +mod test { + use std::assert_matches::assert_matches; + use std::mem::size_of_val; + use std::ptr::null_mut; + + use libc::{mmap, MAP_ANONYMOUS, MAP_FAILED, MAP_SHARED, PROT_EXEC, PROT_READ, PROT_WRITE}; + + #[cfg(target_arch = "x86_64")] + use crate::arch::msr::Efer; + #[cfg(target_arch = "x86_64")] + use crate::arch::paging::Entry; + #[cfg(target_arch = "x86_64")] + use crate::arch::reg::SegAccess; + #[cfg(target_arch = "x86_64")] + use crate::arch::reg::{Cr0, Cr4}; + use crate::ffi; + use crate::hv::arch::Reg; + #[cfg(target_arch = "x86_64")] + use crate::hv::{DtReg, DtRegVal, SReg, SegReg, SegRegVal}; + use crate::hv::{Hypervisor, Kvm, MemMapOption, Vcpu, Vm, VmEntry, VmExit, VmMemory}; + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_vcpu_regs() { + let kvm = Kvm::new().unwrap(); + let vm = kvm.create_vm().unwrap(); + let mut vcpu = vm.create_vcpu(0).unwrap(); + let regs = [ + (Reg::Rax, 0xa93f90f6ce9c8040), + (Reg::Rbx, 0xacbfb3f1f6f9cc1a), + (Reg::Rcx, 0x885e7996751c1cd5), + (Reg::Rdx, 0xd0fdf85b84d0cc9c), + (Reg::Rsi, 0x3cc1f46972391c30), + (Reg::Rdi, 0xf67783992ddc4484), + (Reg::Rsp, 0x6363e7f07d68f992), + (Reg::Rbp, 0x7aeb086e85756325), + (Reg::R8, 0x72a90eeeb1f73300), + (Reg::R9, 0x8893ba64a98de27e), + (Reg::R10, 0x543f074b89fd6531), + (Reg::R11, 0x5330fea600e3a98c), + (Reg::R12, 0x5d2af23af80a0c15), + (Reg::R13, 0x596ad2d66a74a573), + (Reg::R14, 0x9d97437934678adb), + (Reg::R15, 0x7ae7b06eebe1f4fc), + (Reg::Rip, 0xdb424549231b8d3e), + (Reg::Rflags, 1 << 1), + ]; + vcpu.set_regs(®s).unwrap(); + for (reg, val) in regs { + assert_eq!(vcpu.get_reg(reg).unwrap(), val); + } + + let sregs = [ + (SReg::Cr0, 1 << 0 | 1 << 5 | 1 << 31), + (SReg::Cr2, 0xffff88ac93e00000), + (SReg::Cr3, 0x1362d001), + (SReg::Cr4, 1 << 5), + (SReg::Cr8, 0x0), + (SReg::Efer, 1 << 8 | 1 << 10), + (SReg::ApicBase, 0xfee00900), + ]; + let seg_regs = [ + ( + SegReg::Cs, + SegRegVal { + selector: 0x10, + base: 0, + limit: 0xffff_ffff, + access: SegAccess(0xa09b), + }, + ), + ( + SegReg::Ds, + SegRegVal { + selector: 0x18, + base: 0, + limit: 0xffff_ffff, + access: SegAccess(0xc093), + }, + ), + ( + SegReg::Es, + SegRegVal { + selector: 0x18, + base: 0, + limit: 0xffff_ffff, + access: SegAccess(0xc093), + }, + ), + ( + SegReg::Fs, + SegRegVal { + selector: 0x18, + base: 0, + limit: 0xffff_ffff, + access: SegAccess(0xc093), + }, + ), + ( + SegReg::Gs, + SegRegVal { + selector: 0x18, + base: 0, + limit: 0xffff_ffff, + access: SegAccess(0xc093), + }, + ), + ( + SegReg::Ss, + SegRegVal { + selector: 0x18, + base: 0, + limit: 0xffff_ffff, + access: SegAccess(0xc093), + }, + ), + ( + SegReg::Tr, + SegRegVal { + selector: 0x20, + base: 0, + limit: 0xf_ffff, + access: SegAccess(0x8b), + }, + ), + ( + SegReg::Ldtr, + SegRegVal { + selector: 0x28, + base: 0, + limit: 0xf_ffff, + access: SegAccess(0x82), + }, + ), + ]; + + let dt_regs = [ + ( + DtReg::Gdtr, + DtRegVal { + base: 0xfffffe2a4aeeb000, + limit: 0x7f, + }, + ), + ( + DtReg::Idtr, + DtRegVal { + base: 0xfffffe0000000000, + limit: 0xfff, + }, + ), + ]; + vcpu.set_sregs(&sregs, &seg_regs, &dt_regs).unwrap(); + + for (sreg, val) in sregs { + assert_eq!(vcpu.get_sreg(sreg).unwrap(), val); + } + for (seg_reg, val) in seg_regs { + assert_eq!(vcpu.get_seg_reg(seg_reg).unwrap(), val) + } + for (dt_reg, val) in dt_regs { + assert_eq!(vcpu.get_dt_reg(dt_reg).unwrap(), val) + } + } + + #[test] + #[cfg(target_arch = "x86_64")] + fn test_kvm_run() { + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + let memory = vm.create_vm_memory().unwrap(); + + let prot = PROT_WRITE | PROT_EXEC | PROT_READ; + let flag = MAP_ANONYMOUS | MAP_SHARED; + let user_mem = ffi!( + unsafe { mmap(null_mut(), 0x4000, prot, flag, -1, 0,) }, + MAP_FAILED + ) + .unwrap(); + let mmap_option = MemMapOption { + read: true, + write: true, + exec: true, + ..Default::default() + }; + memory + .mem_map(0, 0, 0x4000, user_mem as usize, mmap_option) + .unwrap(); + + // layout + // 0x1000 - 0x1f00 code + // 0x1f00 - 0x2000 GDT + // 0x2000 - 0x3000 PML4 + // 0x3000 - 0x4000 PDPT + + #[rustfmt::skip] + const CODE: [u8; 29] = [ + // mov dx, 0x3f8 + 0x66, 0xba, 0xf8, 0x03, + // in al, dx + 0xec, + // add eax, 0x1 + 0x83, 0xc0, 0x01, + // out dx, al + 0xee, + // mov rax, [0x5000] + 0x48, 0x8b, 0x04, 0x25, 0x00, 0x50, 0x00, + 0x00, + // add rax, 0x11 + 0x48, 0x83, 0xc0, 0x11, + // mov [0x5004], rax + 0x48, 0x89, 0x04, 0x25, 0x04, 0x50, 0x00, + 0x00, + ]; + unsafe { ((user_mem as usize + 0x1000) as *mut [u8; 29]).write(CODE) }; + + let pml4e = (Entry::P | Entry::RW).bits() as u64 | 0x3000; + unsafe { ((user_mem as usize + 0x2000) as *mut u64).write(pml4e) } + let ptpte = (Entry::P | Entry::RW | Entry::PS).bits() as u64; + unsafe { ((user_mem as usize + 0x3000) as *mut u64).write(ptpte) } + + let mut vcpu = vm.create_vcpu(0).unwrap(); + let cs = SegRegVal { + selector: 0x10, + base: 0, + limit: 0xffff_ffff, + access: SegAccess(0xa09b), + }; + let ds = SegRegVal { + selector: 0x18, + base: 0, + limit: 0xffff_ffff, + access: SegAccess(0xc093), + }; + let tr = SegRegVal { + selector: 0x20, + base: 0, + limit: 0, + access: SegAccess(0x8b), + }; + let ldtr = SegRegVal { + selector: 0x28, + base: 0, + limit: 0, + access: SegAccess(0x82), + }; + let gdt = [ + 0, + 0, + cs.to_desc(), + ds.to_desc(), + tr.to_desc(), + ldtr.to_desc(), + ]; + assert!(size_of_val(&gdt) < 0x100); + unsafe { ((user_mem as usize + 0x1f00) as *mut [u64; 6]).write(gdt) }; + let gdtr = DtRegVal { + base: 0x1f00, + limit: size_of_val(&gdt) as u16 - 1, + }; + let idtr = DtRegVal { base: 0, limit: 0 }; + vcpu.set_sregs( + &[ + (SReg::Efer, (Efer::LMA | Efer::LME).bits() as u64), + (SReg::Cr0, (Cr0::NE | Cr0::PE | Cr0::PG).bits() as u64), + (SReg::Cr3, 0x2000), + (SReg::Cr4, Cr4::PAE.bits() as u64), + ], + &[ + (SegReg::Cs, cs), + (SegReg::Ds, ds), + (SegReg::Es, ds), + (SegReg::Fs, ds), + (SegReg::Gs, ds), + (SegReg::Ss, ds), + (SegReg::Tr, tr), + (SegReg::Ldtr, ldtr), + ], + &[(DtReg::Gdtr, gdtr), (DtReg::Idtr, idtr)], + ) + .unwrap(); + vcpu.set_regs(&[ + (Reg::Rip, 0x1000), + (Reg::Rax, 0x2), + (Reg::Rbx, 0x2), + (Reg::Rdx, 0x3f8), + (Reg::Rsi, 0x1000), + (Reg::Rflags, 0x2), + ]) + .unwrap(); + assert_matches!( + vcpu.run(VmEntry::None), + Ok(VmExit::Io { + port: 0x3f8, + write: None, + size: 1 + }) + ); + assert_matches!( + vcpu.run(VmEntry::Io { data: 0x10 }), + Ok(VmExit::Io { + port: 0x3f8, + write: Some(0x11), + size: 1 + }) + ); + assert_matches!( + vcpu.run(VmEntry::None), + Ok(VmExit::Mmio { + addr: 0x5000, + write: None, + size: 8 + }) + ); + assert_matches!( + vcpu.run(VmEntry::Mmio { data: 0x0000_ffff }), + Ok(VmExit::Mmio { + addr: 0x5004, + write: Some(0x0001_0010), + size: 8 + }) + ); + } +} diff --git a/alioth/src/hv/kvm/vcpu/x86_64.rs b/alioth/src/hv/kvm/vcpu/x86_64.rs new file mode 100644 index 0000000..856fb84 --- /dev/null +++ b/alioth/src/hv/kvm/vcpu/x86_64.rs @@ -0,0 +1,236 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::arch::reg::SegAccess; +use crate::hv::arch::{Cpuid, DtReg, DtRegVal, Reg, SReg, SegReg, SegRegVal}; +use crate::hv::kvm::bindings::{ + KvmCpuid2, KvmCpuid2Flag, KvmCpuidEntry2, KvmRegs, KvmSregs2, KVM_MAX_CPUID_ENTRIES, +}; +use crate::hv::kvm::ioctls::{ + kvm_get_regs, kvm_get_sregs2, kvm_set_cpuid2, kvm_set_regs, kvm_set_sregs2, +}; +use crate::hv::kvm::vcpu::KvmVcpu; +use crate::hv::{Error, Result}; + +impl KvmVcpu { + fn get_kvm_sregs2(&self) -> Result { + let kvm_sregs2 = unsafe { kvm_get_sregs2(&self.fd) }?; + Ok(kvm_sregs2) + } + + fn set_kvm_sregs2(&self, kvm_sregs2: &KvmSregs2) -> Result<()> { + unsafe { kvm_set_sregs2(&self.fd, kvm_sregs2) }?; + Ok(()) + } + + fn get_kvm_regs(&self) -> Result { + let kvm_regs = unsafe { kvm_get_regs(&self.fd) }?; + Ok(kvm_regs) + } + + fn set_kvm_regs(&self, kvm_regs: &KvmRegs) -> Result<()> { + unsafe { kvm_set_regs(&self.fd, kvm_regs) }?; + Ok(()) + } + + pub fn kvm_set_regs(&self, vals: &[(Reg, u64)]) -> Result<()> { + let mut kvm_regs = self.get_kvm_regs()?; + for (reg, val) in vals { + match reg { + Reg::Rax => kvm_regs.rax = *val, + Reg::Rbx => kvm_regs.rbx = *val, + Reg::Rcx => kvm_regs.rcx = *val, + Reg::Rdx => kvm_regs.rdx = *val, + Reg::Rsi => kvm_regs.rsi = *val, + Reg::Rdi => kvm_regs.rdi = *val, + Reg::Rsp => kvm_regs.rsp = *val, + Reg::Rbp => kvm_regs.rbp = *val, + Reg::R8 => kvm_regs.r8 = *val, + Reg::R9 => kvm_regs.r9 = *val, + Reg::R10 => kvm_regs.r10 = *val, + Reg::R11 => kvm_regs.r11 = *val, + Reg::R12 => kvm_regs.r12 = *val, + Reg::R13 => kvm_regs.r13 = *val, + Reg::R14 => kvm_regs.r14 = *val, + Reg::R15 => kvm_regs.r15 = *val, + Reg::Rip => kvm_regs.rip = *val, + Reg::Rflags => kvm_regs.rflags = *val, + } + } + self.set_kvm_regs(&kvm_regs) + } + + pub fn kvm_set_sregs( + &mut self, + sregs: &[(SReg, u64)], + seg_regs: &[(SegReg, SegRegVal)], + dt_regs: &[(DtReg, DtRegVal)], + ) -> Result<(), Error> { + let mut kvm_sregs2 = self.get_kvm_sregs2()?; + for (reg, val) in sregs { + match reg { + SReg::Cr0 => kvm_sregs2.cr0 = *val, + SReg::Cr2 => kvm_sregs2.cr2 = *val, + SReg::Cr3 => kvm_sregs2.cr3 = *val, + SReg::Cr4 => kvm_sregs2.cr4 = *val, + SReg::Cr8 => kvm_sregs2.cr8 = *val, + SReg::Efer => kvm_sregs2.efer = *val, + SReg::ApicBase => kvm_sregs2.apic_base = *val, + } + } + for (reg, val) in dt_regs { + let target = match reg { + DtReg::Idtr => &mut kvm_sregs2.idt, + DtReg::Gdtr => &mut kvm_sregs2.gdt, + }; + target.limit = val.limit; + target.base = val.base; + } + for (reg, val) in seg_regs { + let target = match reg { + SegReg::Cs => &mut kvm_sregs2.cs, + SegReg::Ds => &mut kvm_sregs2.ds, + SegReg::Es => &mut kvm_sregs2.es, + SegReg::Fs => &mut kvm_sregs2.fs, + SegReg::Gs => &mut kvm_sregs2.gs, + SegReg::Ss => &mut kvm_sregs2.ss, + SegReg::Tr => &mut kvm_sregs2.tr, + SegReg::Ldtr => &mut kvm_sregs2.ldt, + }; + target.selector = val.selector; + target.base = val.base; + target.limit = val.limit; + target.type_ = val.access.seg_type() as u8; + target.s = val.access.s_code_data() as u8; + target.dpl = val.access.priv_level() as u8; + target.present = val.access.present() as u8; + target.avl = val.access.available() as u8; + target.db = val.access.db_size_32() as u8; + target.g = val.access.granularity() as u8; + target.l = val.access.l_64bit() as u8; + target.unusable = val.access.unusable() as u8; + } + self.set_kvm_sregs2(&kvm_sregs2)?; + Ok(()) + } + + pub fn kvm_get_reg(&self, reg: Reg) -> Result { + let kvm_regs = self.get_kvm_regs()?; + let val = match reg { + Reg::Rax => kvm_regs.rax, + Reg::Rbx => kvm_regs.rbx, + Reg::Rcx => kvm_regs.rcx, + Reg::Rdx => kvm_regs.rdx, + Reg::Rsi => kvm_regs.rsi, + Reg::Rdi => kvm_regs.rdi, + Reg::Rsp => kvm_regs.rsp, + Reg::Rbp => kvm_regs.rbp, + Reg::R8 => kvm_regs.r8, + Reg::R9 => kvm_regs.r9, + Reg::R10 => kvm_regs.r10, + Reg::R11 => kvm_regs.r11, + Reg::R12 => kvm_regs.r12, + Reg::R13 => kvm_regs.r13, + Reg::R14 => kvm_regs.r14, + Reg::R15 => kvm_regs.r15, + Reg::Rip => kvm_regs.rip, + Reg::Rflags => kvm_regs.rflags, + }; + Ok(val) + } + + pub fn kvm_get_dt_reg(&self, reg: DtReg) -> Result { + let kvm_sregs2 = self.get_kvm_sregs2()?; + let target = match reg { + DtReg::Idtr => &kvm_sregs2.idt, + DtReg::Gdtr => &kvm_sregs2.gdt, + }; + Ok(DtRegVal { + limit: target.limit, + base: target.base, + }) + } + + pub fn kvm_get_seg_reg(&self, reg: SegReg) -> Result { + let kvm_sregs2 = self.get_kvm_sregs2()?; + let kvm_segment = match reg { + SegReg::Cs => kvm_sregs2.cs, + SegReg::Ds => kvm_sregs2.ds, + SegReg::Es => kvm_sregs2.es, + SegReg::Fs => kvm_sregs2.fs, + SegReg::Gs => kvm_sregs2.gs, + SegReg::Ss => kvm_sregs2.ss, + SegReg::Tr => kvm_sregs2.tr, + SegReg::Ldtr => kvm_sregs2.ldt, + }; + let access = (kvm_segment.unusable as u32) << 16 + | (kvm_segment.g as u32) << 15 + | (kvm_segment.db as u32) << 14 + | (kvm_segment.l as u32) << 13 + | (kvm_segment.avl as u32) << 12 + | (kvm_segment.present as u32) << 7 + | (kvm_segment.dpl as u32) << 5 + | (kvm_segment.s as u32) << 4 + | (kvm_segment.type_ as u32); + let val = SegRegVal { + selector: kvm_segment.selector, + base: kvm_segment.base, + limit: kvm_segment.limit, + access: SegAccess(access), + }; + Ok(val) + } + + pub fn kvm_get_sreg(&self, reg: SReg) -> Result { + let kvm_sregs2 = self.get_kvm_sregs2()?; + let val = match reg { + SReg::Cr0 => kvm_sregs2.cr0, + SReg::Cr2 => kvm_sregs2.cr2, + SReg::Cr3 => kvm_sregs2.cr3, + SReg::Cr4 => kvm_sregs2.cr4, + SReg::Cr8 => kvm_sregs2.cr8, + SReg::Efer => kvm_sregs2.efer, + SReg::ApicBase => kvm_sregs2.apic_base, + }; + Ok(val) + } + + pub fn kvm_set_cpuids(&mut self, cpuids: Vec) -> Result<(), Error> { + if cpuids.len() > KVM_MAX_CPUID_ENTRIES { + Err(Error::Unexpected { + msg: format!("exeeds kvm cpuid entry limit: {}", KVM_MAX_CPUID_ENTRIES), + })? + } + let mut kvm_cpuid2 = KvmCpuid2 { + nent: cpuids.len() as u32, + padding: 0, + entries: [KvmCpuidEntry2::default(); KVM_MAX_CPUID_ENTRIES], + }; + for (cpuid, entry) in std::iter::zip(cpuids, kvm_cpuid2.entries.iter_mut()) { + entry.eax = cpuid.eax; + entry.ebx = cpuid.ebx; + entry.ecx = cpuid.ecx; + entry.edx = cpuid.edx; + entry.function = cpuid.func; + if let Some(index) = cpuid.index { + entry.index = index; + entry.flags = KvmCpuid2Flag::SIGNIFCANT_INDEX; + } else { + entry.flags = KvmCpuid2Flag::empty(); + } + } + unsafe { kvm_set_cpuid2(&self.fd, &kvm_cpuid2) }?; + Ok(()) + } +} diff --git a/alioth/src/hv/kvm/vm.rs b/alioth/src/hv/kvm/vm.rs new file mode 100644 index 0000000..c0965a8 --- /dev/null +++ b/alioth/src/hv/kvm/vm.rs @@ -0,0 +1,234 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::os::fd::{AsRawFd, FromRawFd, OwnedFd}; +use std::os::unix::thread::JoinHandleExt; +use std::sync::Arc; +use std::thread::JoinHandle; + +use libc::{eventfd, write, EFD_CLOEXEC, EFD_NONBLOCK, SIGRTMIN}; + +use crate::ffi; +use crate::hv::{Error, IntxSender, MemMapOption, Vm, VmMemory}; + +use super::bindings::{ + KvmIrqfd, KvmMemFlag, KvmUserspaceMemoryRegion, KVM_CAP_IRQFD, KVM_CAP_NR_MEMSLOTS, +}; +use super::ioctls::{kvm_check_extension, kvm_create_vcpu, kvm_irqfd, kvm_set_user_memory_region}; +use super::vcpu::{KvmRunBlock, KvmVcpu}; + +pub struct KvmVm { + pub(super) fd: Arc, + pub(super) vcpu_mmap_size: usize, + pub(super) memory_created: bool, +} + +#[derive(Debug)] +pub struct KvmMemory { + pub(super) fd: Arc, +} + +impl VmMemory for KvmMemory { + fn mem_map( + &self, + slot: u32, + gpa: usize, + size: usize, + hva: usize, + option: MemMapOption, + ) -> Result<(), Error> { + let mut flags = KvmMemFlag::empty(); + if !option.read || !option.exec { + return Err(Error::MemMapOption { + option, + hypervisor: "kvm", + }); + } + if !option.write { + flags |= KvmMemFlag::READONLY; + } + if option.log_dirty { + flags |= KvmMemFlag::LOG_DIRTY_PAGES; + } + let region = KvmUserspaceMemoryRegion { + slot, + guest_phys_addr: gpa as _, + memory_size: size as _, + userspace_addr: hva as _, + flags, + }; + unsafe { kvm_set_user_memory_region(&self.fd, ®ion) }?; + Ok(()) + } + + fn unmap(&self, slot: u32, gpa: usize, _size: usize) -> Result<(), Error> { + let flags = KvmMemFlag::empty(); + let region = KvmUserspaceMemoryRegion { + slot, + guest_phys_addr: gpa as _, + memory_size: 0, + userspace_addr: 0, + flags, + }; + unsafe { kvm_set_user_memory_region(&self.fd, ®ion) }?; + Ok(()) + } + + fn max_mem_slots(&self) -> Result { + let ret = unsafe { kvm_check_extension(&self.fd, KVM_CAP_NR_MEMSLOTS) }?; + Ok(ret as u32) + } +} + +#[derive(Debug)] +pub struct KvmIntxSender { + event_fd: OwnedFd, +} + +impl IntxSender for KvmIntxSender { + fn send(&self) -> Result<(), Error> { + ffi!(unsafe { write(self.event_fd.as_raw_fd(), &1u64 as *const _ as _, 8) })?; + Ok(()) + } +} + +impl KvmVm { + fn check_extension(&self, id: u64) -> Result { + let ret = unsafe { kvm_check_extension(&self.fd, id) }?; + Ok(ret == 1) + } +} + +impl Vm for KvmVm { + type Vcpu = KvmVcpu; + type IntxSender = KvmIntxSender; + type Memory = KvmMemory; + + fn create_vcpu(&self, id: u32) -> Result { + let vcpu_fd = unsafe { kvm_create_vcpu(&self.fd, id) }?; + let kvm_run = unsafe { KvmRunBlock::new(vcpu_fd, self.vcpu_mmap_size) }?; + Ok(KvmVcpu { + fd: unsafe { OwnedFd::from_raw_fd(vcpu_fd) }, + kvm_run, + }) + } + + fn stop_vcpu(_id: u32, handle: &JoinHandle) -> Result<(), Error> { + ffi!(unsafe { libc::pthread_kill(handle.as_pthread_t(), SIGRTMIN()) })?; + Ok(()) + } + + fn create_vm_memory(&mut self) -> Result { + if self.memory_created { + Err(Error::CreatingMultipleMemory) + } else { + let kvm_memory = KvmMemory { + fd: self.fd.clone(), + }; + self.memory_created = true; + Ok(kvm_memory) + } + } + + fn create_intx_sender(&self, pin: u8) -> Result { + if !self.check_extension(KVM_CAP_IRQFD)? { + Err(Error::LackCap { + cap: "KVM_CAP_IRQFD".to_string(), + })?; + } + let event_fd = ffi!(unsafe { eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK) })?; + let request = KvmIrqfd { + fd: event_fd as u32, + gsi: pin as u32, + ..Default::default() + }; + unsafe { kvm_irqfd(&self.fd, &request) }?; + Ok(KvmIntxSender { + event_fd: unsafe { OwnedFd::from_raw_fd(event_fd) }, + }) + } +} + +#[cfg(test)] +mod test { + use std::assert_matches::assert_matches; + use std::ptr::null_mut; + + use libc::{mmap, MAP_ANONYMOUS, MAP_FAILED, MAP_PRIVATE, PROT_EXEC, PROT_READ, PROT_WRITE}; + + use super::*; + use crate::ffi; + use crate::hv::{Hypervisor, Kvm, MemMapOption}; + + #[test] + fn test_mem_map() { + let kvm = Kvm::new().unwrap(); + let mut vm = kvm.create_vm().unwrap(); + let vm_memory = vm.create_vm_memory().unwrap(); + assert_matches!(vm_memory.max_mem_slots(), Ok(1..)); + let prot = PROT_WRITE | PROT_READ | PROT_EXEC; + let flag = MAP_ANONYMOUS | MAP_PRIVATE; + let user_mem = ffi!( + unsafe { mmap(null_mut(), 0x1000, prot, flag, -1, 0,) }, + MAP_FAILED + ) + .unwrap(); + let option_no_write = MemMapOption { + read: false, + write: true, + exec: true, + log_dirty: true, + }; + assert_matches!( + vm_memory.mem_map(0, 0x0, 0x1000, user_mem as usize, option_no_write), + Err(Error::MemMapOption { + option: MemMapOption { + read: false, + write: true, + exec: true, + log_dirty: true, + }, + hypervisor: "kvm" + }) + ); + let option_no_exec = MemMapOption { + read: false, + write: true, + exec: true, + log_dirty: true, + }; + assert_matches!( + vm_memory.mem_map(0, 0x0, 0x1000, user_mem as usize, option_no_exec), + Err(Error::MemMapOption { + option: MemMapOption { + read: false, + write: true, + exec: true, + log_dirty: true, + }, + hypervisor: "kvm" + }) + ); + let option = MemMapOption { + read: true, + write: false, + exec: true, + log_dirty: true, + }; + vm_memory + .mem_map(0, 0x0, 0x1000, user_mem as usize, option) + .unwrap(); + vm_memory.mem_map(0, 0x0, 0, 0, option).unwrap(); + } +} diff --git a/alioth/src/hv/kvm/vmentry.rs b/alioth/src/hv/kvm/vmentry.rs new file mode 100644 index 0000000..41f09ed --- /dev/null +++ b/alioth/src/hv/kvm/vmentry.rs @@ -0,0 +1,51 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::hv::kvm::bindings::{KVM_EXIT_IO, KVM_EXIT_IO_IN, KVM_EXIT_MMIO}; + +use super::vcpu::KvmVcpu; + +impl KvmVcpu { + #[cfg(target_endian = "little")] + pub(super) fn entry_mmio(&mut self, data: u64) { + assert_eq!(self.kvm_run.exit_reason, KVM_EXIT_MMIO); + let kvm_mmio = unsafe { &mut self.kvm_run.exit.mmio }; + assert_eq!(kvm_mmio.is_write, 0); + kvm_mmio.data = data.to_ne_bytes(); + } + + pub(super) fn immediate_exit(&mut self) { + self.kvm_run.immediate_exit = 1; + } + + pub(super) fn entry_io(&mut self, data: u32) { + assert_eq!(self.kvm_run.exit_reason, KVM_EXIT_IO); + let kvm_io = unsafe { &self.kvm_run.exit.io }; + assert_eq!(kvm_io.direction, KVM_EXIT_IO_IN); + let offset = kvm_io.data_offset as usize; + let count = kvm_io.count as usize; + match kvm_io.size { + 1 => unsafe { + self.kvm_run.data_slice_mut(offset, count)[0] = data as u8; + }, + 2 => unsafe { + self.kvm_run.data_slice_mut(offset, count)[0] = data as u16; + }, + 4 => unsafe { + self.kvm_run.data_slice_mut(offset, count)[0] = data; + }, + _ => unreachable!("kvm_io.size = {}", kvm_io.size), + } + } +} diff --git a/alioth/src/hv/kvm/vmexit.rs b/alioth/src/hv/kvm/vmexit.rs new file mode 100644 index 0000000..b3495ad --- /dev/null +++ b/alioth/src/hv/kvm/vmexit.rs @@ -0,0 +1,63 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::hv::kvm::bindings::{KVM_EXIT_IO_IN, KVM_EXIT_IO_OUT}; +use crate::hv::{Error, VmExit}; + +use super::vcpu::KvmVcpu; + +impl KvmVcpu { + #[cfg(target_endian = "little")] + pub(super) fn handle_mmio(&mut self) -> Result { + let kvm_mmio = unsafe { &self.kvm_run.exit.mmio }; + let exit = VmExit::Mmio { + addr: kvm_mmio.phys_addr as usize, + write: if kvm_mmio.is_write > 0 { + Some(u64::from_ne_bytes(kvm_mmio.data)) + } else { + None + }, + size: kvm_mmio.len as u8, + }; + Ok(exit) + } + + pub(super) fn handle_io(&mut self) -> Result { + let kvm_io = unsafe { &self.kvm_run.exit.io }; + let offset = kvm_io.data_offset as usize; + let count = kvm_io.count as usize; + assert_eq!(count, 1); + let write = match (kvm_io.direction, kvm_io.size) { + (KVM_EXIT_IO_IN, _) => None, + (KVM_EXIT_IO_OUT, 1) => { + Some(unsafe { self.kvm_run.data_slice::(offset, count) }[0] as u32) + } + (KVM_EXIT_IO_OUT, 2) => { + Some(unsafe { self.kvm_run.data_slice::(offset, count) }[0] as u32) + } + (KVM_EXIT_IO_OUT, 4) => { + Some(unsafe { self.kvm_run.data_slice::(offset, count) }[0]) + } + _ => unreachable!( + "kvm_io.direction = {}, kvm_io.size = {}", + kvm_io.direction, kvm_io.size + ), + }; + Ok(VmExit::Io { + port: kvm_io.port, + write, + size: kvm_io.size, + }) + } +} diff --git a/alioth/src/hv/test.rs b/alioth/src/hv/test.rs new file mode 100644 index 0000000..b61c376 --- /dev/null +++ b/alioth/src/hv/test.rs @@ -0,0 +1,41 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{Error, MemMapOption, Result}; + +#[derive(Debug)] +pub struct FakeVmMemory; + +impl crate::hv::VmMemory for FakeVmMemory { + fn mem_map( + &self, + _slot: u32, + _gpa: usize, + _size: usize, + _hva: usize, + _option: MemMapOption, + ) -> Result<()> { + Ok(()) + } + + fn unmap(&self, _slot: u32, _gpa: usize, _size: usize) -> Result<()> { + Ok(()) + } + + fn max_mem_slots(&self) -> Result { + Err(Error::LackCap { + cap: "MaxMemSlots".to_string(), + }) + } +} diff --git a/alioth/src/lib.rs b/alioth/src/lib.rs new file mode 100644 index 0000000..22e0341 --- /dev/null +++ b/alioth/src/lib.rs @@ -0,0 +1,27 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![feature(assert_matches)] +#![feature(error_generic_member_access)] +#![feature(pointer_is_aligned)] + +pub mod acpi; +pub mod action; +pub mod arch; +pub mod device; +pub mod hv; +pub mod loader; +pub mod mem; +pub(crate) mod utils; +pub mod vm; diff --git a/alioth/src/loader.rs b/alioth/src/loader.rs new file mode 100644 index 0000000..01488b5 --- /dev/null +++ b/alioth/src/loader.rs @@ -0,0 +1,104 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::ops::Range; + +use thiserror::Error; + +use crate::hv::arch::Reg; +#[cfg(target_arch = "x86_64")] +use crate::hv::arch::{DtReg, DtRegVal, SReg, SegReg, SegRegVal}; +use crate::mem::{MemRegion, MemRegionType}; + +pub mod linux; + +#[derive(Debug, Clone, Default)] +pub struct InitState { + pub regs: Vec<(Reg, u64)>, + #[cfg(target_arch = "x86_64")] + pub sregs: Vec<(SReg, u64)>, + #[cfg(target_arch = "x86_64")] + pub dt_regs: Vec<(DtReg, DtRegVal)>, + #[cfg(target_arch = "x86_64")] + pub seg_regs: Vec<(SegReg, SegRegVal)>, + pub initramfs: Option>, +} + +#[derive(Debug, Error)] +pub enum Error { + #[error("io: {0}")] + Io( + #[backtrace] + #[from] + std::io::Error, + ), + + #[error("mem: {0}")] + Mem( + #[backtrace] + #[from] + crate::mem::Error, + ), + + #[error("msssing magic number {magic:#x}, found {found:#x}")] + MissingMagic { magic: u64, found: u64 }, + + #[error("cannot find entry point")] + NoEntryPoint, + + #[error("not a 64bit kernel")] + Not64BitKernel, + + #[error("not a relocatable kernel")] + NotRelocatableKernel, + + #[error("kernel command line too long, length: {0}, limit: {1}")] + CmdLineTooLong(usize, usize), + + #[error("cannot load initramfs at {addr:#x} - {max:#x}, initramfs max address: {addr_max:#x}")] + InitramfsAddrLimit { + addr: usize, + max: usize, + addr_max: usize, + }, + + #[error("cannot find a memory region to load initramfs")] + CannotLoadInitramfs, + + #[error("{name} too old, minimum supported version {min:#x}, found version {found:#x}")] + TooOld { + name: &'static str, + min: u64, + found: u64, + }, +} + +pub fn search_initramfs_address( + mem_regions: &[(usize, MemRegion)], + size: usize, + addr_max: usize, +) -> Result { + for (start, region) in mem_regions.iter().rev() { + let region_max = region.size - 1 + start; + let limit = std::cmp::min(region_max, addr_max); + if limit < size - 1 { + continue; + } + let load_addr = (limit - (size - 1)) & !0xfff; + if region.type_ == MemRegionType::Ram && load_addr >= *start { + return Ok(load_addr); + } + } + Err(Error::CannotLoadInitramfs) +} diff --git a/alioth/src/loader/linux.rs b/alioth/src/loader/linux.rs new file mode 100644 index 0000000..6b90c13 --- /dev/null +++ b/alioth/src/loader/linux.rs @@ -0,0 +1,21 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(target_arch = "x86_64")] +pub mod bootparams; + +#[cfg(target_arch = "x86_64")] +mod x86_64; +#[cfg(target_arch = "x86_64")] +pub use x86_64::load; diff --git a/alioth/src/loader/linux/bootparams.rs b/alioth/src/loader/linux/bootparams.rs new file mode 100644 index 0000000..0ab8938 --- /dev/null +++ b/alioth/src/loader/linux/bootparams.rs @@ -0,0 +1,145 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use bitflags::bitflags; +use zerocopy::{AsBytes, FromBytes, FromZeroes}; + +pub const MAGIC_AA55: u16 = 0xaa55; +pub const MAGIC_HDRS: u32 = 0x53726448; // "HdrS" +pub const SETUP_HEADER_OFFSET: u64 = 0x01f1; + +bitflags! { + #[repr(C)] + #[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Hash)] + pub struct LoadFlags: u8 { + const LOADED_HIGH = (1<<0); + const KASLR_FLAG = (1<<1); + const QUIET_FLAG = (1<<5); + const KEEP_SEGMENTS = (1<<6); + const CAN_USE_HEAP = (1<<7); + } +} + +bitflags! { + #[derive(Debug, Copy, Clone, Default, PartialEq, Eq, Hash)] + pub struct XLoadFlags: u16 { + const XLF_KERNEL_64 = (1<<0); + const XLF_CAN_BE_LOADED_ABOVE_4G = (1<<1); + const XLF_EFI_HANDOVER_32 = (1<<2); + const XLF_EFI_HANDOVER_64 = (1<<3); + const XLF_EFI_KEXEC = (1<<4); + const XLF_5LEVEL = (1<<5); + const XLF_5LEVEL_ENABLED = (1<<6); + } +} + +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, AsBytes, FromBytes, FromZeroes)] +pub struct SetupHeader { + pub setup_sects: u8, + pub root_flags: u16, + pub syssize: u32, + pub ram_size: u16, + pub vid_mode: u16, + pub root_dev: u16, + pub boot_flag: u16, + pub jump: u16, + pub header: u32, + pub version: u16, + pub realmode_swtch: u32, + pub start_sys_seg: u16, + pub kernel_version: u16, + pub type_of_loader: u8, + pub loadflags: u8, + pub setup_move_size: u16, + pub code32_start: u32, + pub ramdisk_image: u32, + pub ramdisk_size: u32, + pub bootsect_kludge: u32, + pub heap_end_ptr: u16, + pub ext_loader_ver: u8, + pub ext_loader_type: u8, + pub cmd_line_ptr: u32, + pub initrd_addr_max: u32, + pub kernel_alignment: u32, + pub relocatable_kernel: u8, + pub min_alignment: u8, + pub xloadflags: u16, + pub cmdline_size: u32, + pub hardware_subarch: u32, + pub hardware_subarch_data: u64, + pub payload_offset: u32, + pub payload_length: u32, + pub setup_data: u64, + pub pref_address: u64, + pub init_size: u32, + pub handover_offset: u32, + pub kernel_info_offset: u32, +} + +pub const E820_RAM: u32 = 1; +pub const E820_RESERVED: u32 = 2; +pub const E820_ACPI: u32 = 3; +pub const E820_NVS: u32 = 4; +pub const E820_UNUSABLE: u32 = 5; +pub const E820_PMEM: u32 = 7; +pub const E820_RESERVED_KERN: u32 = 128; + +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, AsBytes, FromBytes, FromZeroes)] +pub struct BootE820Entry { + pub addr: u64, + pub size: u64, + pub type_: u32, +} + +#[repr(C, packed)] +#[derive(Debug, Copy, Clone, AsBytes, FromBytes, FromZeroes)] +pub struct BootParams { + pub screen_info: [u8; 64], + pub apm_bios_info: [u8; 20], + pub _pad2: [u8; 4usize], + pub tboot_addr: u64, + pub ist_info: [u8; 16], + pub acpi_rsdp_addr: u64, + pub _pad3: [u8; 8usize], + pub hd0_info: [u8; 16usize], + pub hd1_info: [u8; 16usize], + pub sys_desc_table: [u8; 16], + pub olpc_ofw_header: [u8; 16], + pub ext_ramdisk_image: u32, + pub ext_ramdisk_size: u32, + pub ext_cmd_line_ptr: u32, + pub _pad4: [u8; 112usize], + pub cc_blob_address: u32, + pub edid_info: [u8; 128], + pub efi_info: [u8; 32], + pub alt_mem_k: u32, + pub scratch: u32, + pub e820_entries: u8, + pub eddbuf_entries: u8, + pub edd_mbr_sig_buf_entries: u8, + pub kbd_status: u8, + pub secure_boot: u8, + pub _pad5: [u8; 2usize], + pub sentinel: u8, + pub _pad6: [u8; 1usize], + pub hdr: SetupHeader, + pub _pad7: [u8; 36usize], + pub edd_mbr_sig_buffer: [u32; 16usize], + pub e820_table: [BootE820Entry; 128usize], + pub _pad8: [u8; 48usize], + pub eddbuf: [u8; 492usize], + pub _pad9: [u8; 276usize], +} diff --git a/alioth/src/loader/linux/x86_64.rs b/alioth/src/loader/linux/x86_64.rs new file mode 100644 index 0000000..e972745 --- /dev/null +++ b/alioth/src/loader/linux/x86_64.rs @@ -0,0 +1,240 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fs::File; +use std::io::{BufReader, Read, Seek, SeekFrom}; +use std::mem::{size_of, size_of_val}; +use std::path::Path; + +use crate::arch::msr::Efer; +use crate::arch::paging::Entry; +use crate::arch::reg::{Cr0, Cr4, Rflags, SegAccess}; +use crate::hv::arch::{DtReg, DtRegVal, Reg, SReg, SegReg, SegRegVal}; +use crate::mem::ram::RamBus; +use zerocopy::{AsBytes, FromZeroes}; + +use crate::arch::layout::{ + BOOT_GDT_START, BOOT_PAGING_START, EBDA_START, KERNEL_CMD_LINE_LIMIT, KERNEL_CMD_LINE_START, + KERNEL_IMAGE_START, LINUX_BOOT_PARAMS_START, +}; +use crate::mem::{MemRegion, MemRegionType}; + +use crate::loader::linux::bootparams::{ + BootE820Entry, BootParams, XLoadFlags, E820_ACPI, E820_PMEM, E820_RAM, E820_RESERVED, + MAGIC_AA55, MAGIC_HDRS, SETUP_HEADER_OFFSET, +}; +use crate::loader::{search_initramfs_address, Error, InitState}; + +// loading bzImage and ramdisk above 4G in 64bit. +const MINIMAL_VERSION: u16 = 0x020c; + +pub fn load>( + memory: &RamBus, + mem_regions: &[(usize, MemRegion)], + kernel: P, + cmd_line: Option<&str>, + initramfs: Option

, +) -> Result { + let mut boot_params = BootParams::new_zeroed(); + let kernel_file = File::open(kernel)?; + let kernel_meta = kernel_file.metadata()?; + let mut kernel = BufReader::new(kernel_file); + kernel.seek(SeekFrom::Start(SETUP_HEADER_OFFSET))?; + kernel.read_exact(boot_params.hdr.as_bytes_mut())?; + + // For backwards compatibility, if the setup_sects field contains 0, + // the real value is 4. + if boot_params.hdr.setup_sects == 0 { + boot_params.hdr.setup_sects = 4; + } + + if boot_params.hdr.boot_flag != MAGIC_AA55 { + return Err(Error::MissingMagic { + magic: MAGIC_AA55 as u64, + found: boot_params.hdr.boot_flag as u64, + }); + } + if boot_params.hdr.header != MAGIC_HDRS { + return Err(Error::MissingMagic { + magic: MAGIC_HDRS as u64, + found: boot_params.hdr.header as u64, + }); + } + if boot_params.hdr.version < MINIMAL_VERSION { + return Err(Error::TooOld { + name: "bzimage", + min: MINIMAL_VERSION as u64, + found: boot_params.hdr.version as u64, + }); + } + if !XLoadFlags::from_bits_retain(boot_params.hdr.xloadflags).contains(XLoadFlags::XLF_KERNEL_64) + { + return Err(Error::Not64BitKernel); + } + if boot_params.hdr.relocatable_kernel == 0 { + return Err(Error::NotRelocatableKernel); + } + + boot_params.hdr.type_of_loader = 0xff; + + // load cmd line + if let Some(cmd_line) = cmd_line { + let cmd_line_limit = + std::cmp::min(boot_params.hdr.cmdline_size as usize, KERNEL_CMD_LINE_LIMIT); + if cmd_line.len() > cmd_line_limit { + return Err(Error::CmdLineTooLong(cmd_line.len(), cmd_line_limit)); + } + memory.write_range(KERNEL_CMD_LINE_START, cmd_line.len(), cmd_line.as_bytes())?; + boot_params.hdr.cmd_line_ptr = KERNEL_CMD_LINE_START as u32; + boot_params.ext_cmd_line_ptr = (KERNEL_CMD_LINE_START >> 32) as u32; + } + + // load kernel image + let kernel_offset = (boot_params.hdr.setup_sects as u64 + 1) * 512; + kernel.seek(SeekFrom::Start(kernel_offset))?; + let kernel_size = (kernel_meta.len() - kernel_offset) as usize; + memory.write_range(KERNEL_IMAGE_START, kernel_size, kernel)?; + + // load initramfs + let initramfs_range; + if let Some(initramfs) = initramfs { + let initramfs = File::open(initramfs)?; + let initramfs_size = initramfs.metadata()?.len() as usize; + let initramfs_gpa = search_initramfs_address( + mem_regions, + initramfs_size, + boot_params.hdr.initrd_addr_max as usize, + )?; + let initramfs_end = initramfs_gpa + initramfs_size; + memory.write_range(initramfs_gpa, initramfs_size, initramfs)?; + boot_params.hdr.ramdisk_image = initramfs_gpa as u32; + boot_params.ext_ramdisk_image = (initramfs_gpa >> 32) as u32; + boot_params.hdr.ramdisk_size = initramfs_size as u32; + boot_params.ext_ramdisk_size = (initramfs_size >> 32) as u32; + log::info!( + "initramfs loaded at {:#x} - {:#x}, ", + initramfs_gpa, + initramfs_end - 1, + ); + initramfs_range = Some(initramfs_gpa..initramfs_end); + } else { + initramfs_range = None; + } + + // setup e820 table + for (index, (addr, region)) in mem_regions.iter().enumerate() { + let type_ = match region.type_ { + MemRegionType::Ram => E820_RAM, + MemRegionType::Reserved => E820_RESERVED, + MemRegionType::Acpi => E820_ACPI, + MemRegionType::Pmem => E820_PMEM, + }; + boot_params.e820_table[index] = BootE820Entry { + addr: *addr as u64, + size: region.size as u64, + type_, + }; + } + boot_params.e820_entries = mem_regions.len() as u8; + + boot_params.acpi_rsdp_addr = EBDA_START as u64; + + memory.write(LINUX_BOOT_PARAMS_START, &boot_params)?; + + // set up identity paging + let pml4_start = BOOT_PAGING_START; + let pdpt_start = pml4_start + 0x1000; + let pml4e = (Entry::P | Entry::RW).bits() as u64 | pdpt_start as u64; + memory.write(pml4_start, &pml4e)?; + let alignment = boot_params.hdr.kernel_alignment as usize; + let runtime_start = (KERNEL_IMAGE_START + alignment - 1) & !(alignment - 1); + let max_addr = std::cmp::max( + runtime_start + boot_params.hdr.init_size as usize, + std::cmp::max( + LINUX_BOOT_PARAMS_START + size_of::(), + KERNEL_CMD_LINE_START + KERNEL_CMD_LINE_LIMIT, + ), + ); + let num_page = (max_addr as u64 + (1 << 30) - 1) >> 30; + for i in 0..num_page { + let pdpte = (i << 30) | (Entry::P | Entry::RW | Entry::PS).bits() as u64; + memory.write(pdpt_start + i as usize * size_of::(), &pdpte)?; + } + + // set up gdt + let boot_cs = SegRegVal { + selector: 0x10, + base: 0, + limit: 0xfff_ffff, + access: SegAccess(0xa09b), + }; + let boot_ds = SegRegVal { + selector: 0x18, + base: 0, + limit: 0xfff_ffff, + access: SegAccess(0xc093), + }; + let boot_tr = SegRegVal { + selector: 0x20, + base: 0, + limit: 0, + access: SegAccess(0x8b), + }; + let boot_ldtr = SegRegVal { + selector: 0x28, + base: 0, + limit: 0, + access: SegAccess(0x82), + }; + let gdt = [ + 0, + 0, + boot_cs.to_desc(), + boot_ds.to_desc(), + boot_tr.to_desc(), + boot_ldtr.to_desc(), + ]; + let gdtr = DtRegVal { + base: BOOT_GDT_START as u64, + limit: size_of_val(&gdt) as u16 - 1, + }; + let idtr = DtRegVal { base: 0, limit: 0 }; + memory.write(BOOT_GDT_START, &gdt)?; + + Ok(InitState { + regs: vec![ + (Reg::Rsi, LINUX_BOOT_PARAMS_START as u64), + (Reg::Rip, KERNEL_IMAGE_START as u64 + 0x200), + (Reg::Rflags, Rflags::RESERVED_1.bits() as u64), + ], + sregs: vec![ + (SReg::Efer, (Efer::LMA | Efer::LME).bits() as u64), + (SReg::Cr0, (Cr0::NE | Cr0::PE | Cr0::PG).bits() as u64), + (SReg::Cr3, pml4_start as u64), + (SReg::Cr4, Cr4::PAE.bits() as u64), + ], + seg_regs: vec![ + (SegReg::Cs, boot_cs), + (SegReg::Ds, boot_ds), + (SegReg::Es, boot_ds), + (SegReg::Fs, boot_ds), + (SegReg::Gs, boot_ds), + (SegReg::Ss, boot_ds), + (SegReg::Tr, boot_tr), + (SegReg::Ldtr, boot_ldtr), + ], + dt_regs: vec![(DtReg::Gdtr, gdtr), (DtReg::Idtr, idtr)], + initramfs: initramfs_range, + }) +} diff --git a/alioth/src/mem.rs b/alioth/src/mem.rs new file mode 100644 index 0000000..4cce478 --- /dev/null +++ b/alioth/src/mem.rs @@ -0,0 +1,347 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +mod addressable; +pub mod io; +pub mod mmio; +pub mod ram; + +use std::backtrace::Backtrace; +use std::sync::{Arc, Mutex, PoisonError}; + +use thiserror::Error; + +use crate::action::Action; +use crate::align_up; +use crate::hv::{self, VmEntry, VmMemory}; +use ram::UserMem; + +use addressable::{Addressable, SlotBackend}; +use io::IoBus; +use mmio::{Mmio, MmioBus}; +use ram::RamBus; + +use self::io::IoDev; + +use crate::arch::layout::{ + MEM_64_START, MMIO_32_START, PCIE_CONFIG_END, PCIE_CONFIG_START, RAM_32_END, +}; + +#[derive(Debug, Error)] +pub enum Error { + #[error("[{new_addr:#x}, {new_end:#x}) overlaps with [{curr_addr:#x}, {curr_end:#x})")] + Overlap { + new_addr: usize, + new_end: usize, + curr_addr: usize, + curr_end: usize, + }, + #[error("(addr={addr:#x}, size={size:#x}) is out of range")] + OutOfRange { addr: usize, size: usize }, + #[error("io: {source:#x?}")] + Io { + #[from] + source: std::io::Error, + backtrace: Backtrace, + }, + #[error("mmap: {0}")] + Mmap(#[source] std::io::Error), + #[error("offset {offset:#x} exceeds limit {limit:#x}")] + ExceedLimit { offset: usize, limit: usize }, + #[error("{0:#x} is not mapped")] + NotMapped(usize), + #[error("zero memory size")] + ZeroMemorySize, + #[error("lock poisoned")] + LockPoisoned, + #[error("cannot allocate")] + CanotAllocate { backtrace: Backtrace }, + #[error("cannot register MMIO notifier: {0}")] + Notifier(#[source] Box), + #[error("{0}")] + Hv( + #[from] + #[backtrace] + hv::Error, + ), + #[error("cannot handle action: {0:x?}")] + Action(Action), + #[error("not aligned")] + NotAligned, + #[error("not backed by continuous host memory")] + NotContinuous, +} + +pub type Result = std::result::Result; + +impl From> for Error { + fn from(_: PoisonError) -> Self { + Error::LockPoisoned + } +} + +#[derive(Debug, Default)] +pub struct Allocator { + ram32: Addressable, + dev32: Addressable, + mem64: Addressable, + #[cfg(target_arch = "x86_64")] + io: Addressable, +} + +#[derive(Debug)] +pub struct Memory { + ram_bus: Arc, + mmio_bus: MmioBus, + io_bus: IoBus, + // TODO do we need a global lock? + allocator: Mutex, +} + +pub enum AddrOpt { + Any, + Fixed(usize), + Below4G, + Above4G, +} + +#[derive(Debug)] +pub enum DevMem { + UserMem(UserMem), + Mmio(Arc), +} + +impl Memory { + pub fn new(vm_memory: M) -> Self + where + M: VmMemory, + { + Memory { + ram_bus: Arc::new(RamBus::new(vm_memory)), + mmio_bus: MmioBus::new(), + allocator: Mutex::new(Allocator::default()), + io_bus: IoBus::new(), + } + } + + pub fn ram_bus(&self) -> &Arc { + &self.ram_bus + } + + pub fn to_mem_regions(&self) -> Result, Error> { + let mut regions = Vec::new(); + let allocator = self.allocator.lock()?; + for (addr, region) in allocator.ram32.iter() { + regions.push((addr, *region)); + } + for (addr, region) in allocator.dev32.iter() { + regions.push((addr, *region)); + } + for (addr, region) in allocator.mem64.iter() { + regions.push((addr, *region)); + } + Ok(regions) + } + + fn alloc_sub( + size: usize, + segment: &mut Addressable, + segment_start: usize, + segment_end: usize, + regions: &[(usize, MemRegionType)], + ) -> Result { + // let rounded_size = usize::next_power_of_two(usize::max(size, PAGE_SIZE)); + let rounded_size = size; + let start = if let Some((start, region)) = segment.last() { + start + region.size + } else { + segment_start + }; + let start = std::cmp::max(start, segment_start); + let aligned_start = align_up!(start, rounded_size); + log::info!( + "aligned start = {:#x}, segment_end = {:#x}", + aligned_start, + segment_end + ); + if aligned_start + rounded_size <= segment_end { + let mut addr = aligned_start; + for (size, type_) in regions.iter() { + let region = MemRegion { + size: *size, + type_: *type_, + }; + segment.add(addr, region)?; + addr += size; + } + Ok(aligned_start) + } else { + Err(Error::CanotAllocate { + backtrace: Backtrace::force_capture(), + }) + } + } + + pub fn add_ram( + &self, + gpa: AddrOpt, + user_mem: UserMem, + regions: &[(usize, MemRegionType)], + ) -> Result { + let addr = self.alloc(gpa, user_mem.size(), false, regions)?; + self.ram_bus.add(addr, user_mem)?; + Ok(addr) + } + + #[cfg(target_arch = "x86_64")] + pub fn add_io_dev(&self, port: Option, dev: IoDev) -> Result { + let mut allocator = self.allocator.lock()?; + let port = match port { + Some(port) => { + allocator.io.add( + port as usize, + MemRegion { + size: dev.size(), + type_: MemRegionType::Reserved, + }, + )?; + port + } + None => { + let port = Self::alloc_sub( + dev.size(), + &mut allocator.io, + 0x1000, + 0xffff, + &[(dev.size(), MemRegionType::Reserved)], + )?; + port as u16 + } + }; + self.io_bus.add(port, dev)?; + Ok(port) + } + + fn alloc( + &self, + gpa: AddrOpt, + size: usize, + is_dev: bool, + regions: &[(usize, MemRegionType)], + ) -> Result { + let mut allocator = self.allocator.lock()?; + let addr_start = match gpa { + AddrOpt::Fixed(gpa) => { + let below_4g = gpa + size <= u32::MAX as usize; + let mut region_gpa = gpa; + for (size, type_) in regions.iter() { + let region = MemRegion { + size: *size, + type_: *type_, + }; + if below_4g { + if is_dev { + allocator.dev32.add(region_gpa, region)?; + } else { + allocator.ram32.add(region_gpa, region)?; + } + } else { + allocator.mem64.add(region_gpa, region)?; + } + region_gpa += size; + } + Ok(gpa) + } + AddrOpt::Above4G | AddrOpt::Any => Self::alloc_sub( + size, + &mut allocator.mem64, + MEM_64_START, + usize::MAX, + regions, + ), + AddrOpt::Below4G => { + if is_dev { + Self::alloc_sub( + size, + &mut allocator.dev32, + MMIO_32_START, + PCIE_CONFIG_END, + regions, + ) + } else { + Self::alloc_sub(size, &mut allocator.ram32, 0, RAM_32_END, regions) + } + } + }?; + Ok(addr_start) + } + + fn handle_action(&self, action: Action) -> Result { + match action { + Action::Shutdown => Ok(VmEntry::Shutdown), + } + } + + pub fn handle_mmio(&self, gpa: usize, write: Option, size: u8) -> Result { + if let Some(val) = write { + match self.mmio_bus.write(gpa, size, val) { + Ok(()) => Ok(VmEntry::None), + Err(Error::Action(action)) => self.handle_action(action), + Err(e) => Err(e), + } + } else { + let data = self.mmio_bus.read(gpa, size)?; + Ok(VmEntry::Mmio { data }) + } + } + + pub fn handle_io(&self, port: u16, write: Option, size: u8) -> Result { + if port == 0x600 || port == 0x601 { + log::warn!("port = {:#x}, val = {:#x?}, size = {}", port, write, size); + if write == Some(0x34) { + return Ok(VmEntry::Shutdown); + } + } + if let Some(val) = write { + match self.io_bus.write(port, size, val) { + Ok(()) => Ok(VmEntry::None), + Err(Error::Action(action)) => self.handle_action(action), + Err(e) => Err(e), + } + } else { + let data = self.io_bus.read(port, size)?; + Ok(VmEntry::Io { data }) + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MemRegionType { + Ram, + Reserved, + Acpi, + Pmem, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct MemRegion { + pub size: usize, + pub type_: MemRegionType, +} + +impl SlotBackend for MemRegion { + fn size(&self) -> usize { + self.size + } +} diff --git a/alioth/src/mem/addressable.rs b/alioth/src/mem/addressable.rs new file mode 100644 index 0000000..0664ef4 --- /dev/null +++ b/alioth/src/mem/addressable.rs @@ -0,0 +1,277 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::ops::RangeBounds; + +use crate::mem::{Error, Result}; + +pub trait SlotBackend { + fn size(&self) -> usize; +} + +#[derive(Debug)] +struct Slot +where + B: SlotBackend, +{ + addr: usize, + backend: B, +} + +impl Slot +where + B: SlotBackend, +{ + fn new(addr: usize, backend: B) -> Result { + debug_assert_ne!(backend.size(), 0); + match (backend.size() - 1).checked_add(addr) { + None => Err(Error::OutOfRange { + addr, + size: backend.size(), + }), + Some(_) => Ok(Self { addr, backend }), + } + } + + fn addr_end(&self) -> usize { + self.addr.wrapping_add(self.backend.size()) + } +} + +pub struct Iter<'a, B> +where + B: SlotBackend, +{ + iter: std::slice::Iter<'a, Slot>, +} + +impl<'a, B> Iterator for Iter<'a, B> +where + B: SlotBackend, +{ + type Item = (usize, &'a B); + fn next(&mut self) -> Option { + self.iter.next().map(|slot| (slot.addr, &slot.backend)) + } +} + +impl<'a, B> DoubleEndedIterator for Iter<'a, B> +where + B: SlotBackend, +{ + fn next_back(&mut self) -> Option { + self.iter.next_back().map(|slot| (slot.addr, &slot.backend)) + } +} + +#[derive(Debug)] +pub struct Addressable +where + B: SlotBackend, +{ + slots: Vec>, +} + +impl Default for Addressable +where + B: SlotBackend, +{ + fn default() -> Self { + Addressable { slots: Vec::new() } + } +} + +impl Addressable +where + B: SlotBackend, +{ + pub fn new() -> Self { + Self::default() + } + + pub fn iter(&self) -> Iter<'_, B> { + Iter { + iter: self.slots.iter(), + } + } + + pub fn drain( + &mut self, + range: impl RangeBounds, + ) -> impl Iterator + '_ { + self.slots.drain(range).map(|s| (s.addr, s.backend)) + } + + pub fn is_empty(&self) -> bool { + self.slots.is_empty() + } + + pub fn last(&self) -> Option<(usize, &B)> { + self.slots.last().map(|slot| (slot.addr, &slot.backend)) + } +} + +impl Addressable +where + B: SlotBackend, +{ + pub fn add(&mut self, addr: usize, backend: B) -> Result<&mut B> { + assert_ne!(backend.size(), 0); + let slot = Slot::new(addr, backend)?; + let result = match self.slots.binary_search_by_key(&addr, |s| s.addr) { + Ok(index) => Err(&self.slots[index]), + Err(index) => { + if index < self.slots.len() && self.slots[index].addr < slot.addr_end() { + Err(&self.slots[index]) + } else if index > 0 && slot.addr < self.slots[index - 1].addr_end() { + Err(&self.slots[index - 1]) + } else { + Ok(index) + } + } + }; + match result { + Err(curr_slot) => Err(Error::Overlap { + new_addr: slot.addr, + new_end: slot.addr_end(), + curr_addr: curr_slot.addr, + curr_end: curr_slot.addr_end(), + }), + Ok(index) => { + self.slots.insert(index, slot); + // TODO add some compiler hint to eliminate bound check? + Ok(&mut self.slots[index].backend) + } + } + } + + pub fn remove(&mut self, addr: usize) -> Result { + match self.slots.binary_search_by_key(&addr, |s| s.addr) { + Ok(index) => Ok(self.slots.remove(index).backend), + Err(_) => Err(Error::NotMapped(addr)), + } + } + + pub fn search(&self, addr: usize) -> Option<(usize, &B)> { + match self.slots.binary_search_by_key(&addr, |s| s.addr) { + Ok(index) => Some((self.slots[index].addr, &self.slots[index].backend)), + Err(0) => None, + Err(index) => { + let candidate = &self.slots[index - 1]; + if addr < candidate.addr_end() { + Some((candidate.addr, &candidate.backend)) + } else { + None + } + } + } + } +} + +#[cfg(test)] +mod test { + use std::assert_matches::assert_matches; + + use super::*; + + #[derive(Debug, PartialEq)] + struct Backend { + size: usize, + } + + impl SlotBackend for Backend { + fn size(&self) -> usize { + self.size + } + } + + #[test] + fn test_overflow() { + assert_matches!( + Slot::new(usize::MAX, Backend { size: 0x10 }), + Err(Error::OutOfRange { + size: 0x10, + addr: usize::MAX, + }) + ); + } + + #[test] + fn test_addressable() { + let mut memory = Addressable::::new(); + assert_matches!(memory.add(0x1000, Backend { size: 0x1000 }), Ok(_)); + assert_matches!(memory.add(0x5000, Backend { size: 0x1000 }), Ok(_)); + assert_matches!(memory.add(0x2000, Backend { size: 0x2000 }), Ok(_)); + assert_eq!(memory.slots.len(), 3); + assert!(!memory.is_empty()); + assert_eq!(memory.last(), Some((0x5000, &memory.slots[2].backend))); + // assert_matches!(memory.last_mut(), Some((0x5000, _))); + assert_matches!( + memory.add(0x1000, Backend { size: 0x2000 }), + Err(Error::Overlap { + new_addr: 0x1000, + new_end: 0x3000, + curr_addr: 0x1000, + curr_end: 0x2000 + }) + ); + assert_matches!( + memory.add(0x0, Backend { size: 0x2000 }), + Err(Error::Overlap { + new_addr: 0x0, + new_end: 0x2000, + curr_addr: 0x1000, + curr_end: 0x2000 + }) + ); + assert_matches!( + memory.add(0x3000, Backend { size: 0x1000 }), + Err(Error::Overlap { + new_addr: 0x3000, + new_end: 0x4000, + curr_addr: 0x2000, + curr_end: 0x4000 + }) + ); + + assert_eq!( + memory.search(0x1000), + Some((memory.slots[0].addr, &memory.slots[0].backend)) + ); + assert_eq!(memory.search(0x0), None); + assert_eq!( + memory.search(0x1500), + Some((memory.slots[0].addr, &memory.slots[0].backend)) + ); + assert_eq!(memory.search(0x4000), None); + + let mut iter = memory.iter(); + assert_eq!( + iter.next(), + Some((memory.slots[0].addr, &memory.slots[0].backend)) + ); + assert_eq!( + iter.next_back(), + Some((memory.slots[2].addr, &memory.slots[2].backend)) + ); + assert_eq!( + iter.next(), + Some((memory.slots[1].addr, &memory.slots[1].backend)) + ); + assert_eq!(iter.next(), None); + + assert_matches!(memory.remove(0x1000), Ok(Backend { size: 0x1000 })); + assert_matches!(memory.remove(0x2001), Err(Error::NotMapped(0x2001))); + } +} diff --git a/alioth/src/mem/io.rs b/alioth/src/mem/io.rs new file mode 100644 index 0000000..51196e2 --- /dev/null +++ b/alioth/src/mem/io.rs @@ -0,0 +1,56 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::{Arc, RwLock}; + +use super::mmio::{Mmio, MmioRange}; +use super::Result; + +#[derive(Debug)] +pub struct IoBus { + inner: RwLock, +} + +pub type IoDev = Arc; + +impl Default for IoBus { + fn default() -> Self { + Self::new() + } +} + +impl IoBus { + pub fn new() -> IoBus { + Self { + inner: RwLock::new(MmioRange::with_size(u16::MAX as usize)), + } + } + + pub(super) fn add(&self, port: u16, dev: IoDev) -> Result<()> { + let mut inner = self.inner.write()?; + let dev = inner.add(port as usize, dev)?; + dev.mapped(port as usize)?; + Ok(()) + } + + pub fn read(&self, port: u16, size: u8) -> Result { + let inner = self.inner.read()?; + inner.read(port as usize, size).map(|v| v as u32) + } + + pub fn write(&self, port: u16, size: u8, val: u32) -> Result<()> { + let inner = self.inner.read()?; + inner.write(port as usize, size, val as u64) + } +} diff --git a/alioth/src/mem/mmio.rs b/alioth/src/mem/mmio.rs new file mode 100644 index 0000000..945e441 --- /dev/null +++ b/alioth/src/mem/mmio.rs @@ -0,0 +1,174 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::type_name; +use std::fmt::Debug; +use std::sync::{Arc, RwLock}; + +use super::addressable::{Addressable, SlotBackend}; +use super::{Error, Result}; + +pub trait Mmio: Debug { + fn read(&self, offset: usize, size: u8) -> Result; + fn write(&self, offset: usize, size: u8, val: u64) -> Result<()>; + fn mapped(&self, addr: usize) -> Result<()> { + log::trace!("{:#x} -> {}", addr, type_name::()); + Ok(()) + } + fn unmapped(&self) -> Result<()> { + log::trace!("{} unmapped", type_name::()); + Ok(()) + } + fn size(&self) -> usize; +} + +pub type MmioRegion = Arc; + +impl SlotBackend for MmioRegion { + fn size(&self) -> usize { + Mmio::size(self.as_ref()) + } +} + +#[derive(Debug)] +pub struct MmioRange { + limit: usize, + inner: Addressable, +} + +impl MmioRange { + pub fn with_size(size: usize) -> Self { + assert_ne!(size, 0); + MmioRange { + limit: size - 1, + inner: Addressable::new(), + } + } + + fn new() -> Self { + MmioRange { + limit: usize::MAX, + inner: Addressable::new(), + } + } + + pub fn is_empty(&self) -> bool { + self.inner.is_empty() + } + + pub fn add(&mut self, offset: usize, dev: MmioRegion) -> Result<&mut MmioRegion> { + let in_range = (dev.size() - 1) + .checked_add(offset) + .map(|max| max <= self.limit); + match in_range { + Some(true) => self.inner.add(offset, dev), + Some(false) | None => Err(Error::OutOfRange { + addr: offset, + size: dev.size(), + }), + } + } + + pub fn remove(&mut self, addr: usize) -> Result { + self.inner.remove(addr) + } + + pub fn read(&self, addr: usize, size: u8) -> Result { + match self.inner.search(addr) { + Some((start, dev)) => dev.read(addr - start, size), + None => Ok(0), + } + } + + pub fn write(&self, addr: usize, size: u8, val: u64) -> Result<()> { + match self.inner.search(addr) { + Some((start, dev)) => dev.write(addr - start, size, val), + None => Ok(()), + } + } +} + +impl Mmio for MmioRange { + fn size(&self) -> usize { + // Overflow happens when limit = usize::MAX, which is only possible when + // it was constructed through MmioRange::new(). MmioRange::new() is private + // and only MmioBus uses it. + self.limit.wrapping_add(1) + } + + fn read(&self, offset: usize, size: u8) -> Result { + self.read(offset, size) + } + + fn write(&self, offset: usize, size: u8, val: u64) -> Result<()> { + self.write(offset, size, val) + } + + fn mapped(&self, addr: usize) -> Result<()> { + for (offset, range) in self.inner.iter() { + range.mapped(addr + offset)?; + } + Ok(()) + } + + fn unmapped(&self) -> Result<()> { + for (_, range) in self.inner.iter() { + range.unmapped()?; + } + Ok(()) + } +} + +#[derive(Debug)] +pub struct MmioBus { + inner: RwLock, +} + +impl Default for MmioBus { + fn default() -> Self { + Self::new() + } +} + +impl MmioBus { + pub fn new() -> MmioBus { + Self { + inner: RwLock::new(MmioRange::new()), + } + } + + pub(super) fn add(&self, addr: usize, dev: MmioRegion) -> Result<()> { + let mut inner = self.inner.write()?; + let dev = inner.add(addr, dev)?; + dev.mapped(addr)?; + Ok(()) + } + + pub(super) fn remove(&self, addr: usize) -> Result { + let mut inner = self.inner.write()?; + let dev = inner.remove(addr)?; + dev.unmapped()?; + Ok(dev) + } + + pub fn read(&self, addr: usize, size: u8) -> Result { + let inner = self.inner.read()?; + inner.read(addr, size) + } + + pub fn write(&self, addr: usize, size: u8, val: u64) -> Result<()> { + let inner = self.inner.read()?; + inner.write(addr, size, val) + } +} diff --git a/alioth/src/mem/ram.rs b/alioth/src/mem/ram.rs new file mode 100644 index 0000000..17972ab --- /dev/null +++ b/alioth/src/mem/ram.rs @@ -0,0 +1,670 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::type_name; +use std::cell::UnsafeCell; +use std::fmt::Debug; +use std::fs::File; +use std::io::{IoSlice, IoSliceMut, Read, Write}; +use std::mem::size_of; +use std::ops::Deref; +use std::os::fd::AsRawFd; +use std::ptr::{null_mut, NonNull}; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::{Arc, RwLock, RwLockReadGuard}; + +use libc::{ + c_void, mmap, msync, munmap, MAP_ANONYMOUS, MAP_FAILED, MAP_PRIVATE, MS_ASYNC, PROT_EXEC, + PROT_READ, PROT_WRITE, +}; +use zerocopy::{AsBytes, FromBytes}; + +use crate::ffi; +use crate::hv::{MemMapOption, VmMemory}; + +use super::addressable::{Addressable, SlotBackend}; +use super::{Error, Result}; + +const UNASSIGNED_SLOT_ID: u32 = u32::MAX; + +#[derive(Debug)] +struct UserMemInner { + addr: NonNull, + len: usize, + map_callback: RwLock>>, + slot_id: AtomicU32, +} + +unsafe impl Send for UserMemInner {} +unsafe impl Sync for UserMemInner {} + +impl Drop for UserMemInner { + fn drop(&mut self) { + let ret = unsafe { munmap(self.addr.as_ptr(), self.len) }; + if ret != 0 { + log::error!("munmap({:p}, {:x}) = {:x}", self.addr, self.len, ret); + } else { + log::info!("munmap({:p}, {:x}) = {:x}, done", self.addr, self.len, ret); + } + } +} + +pub trait MmapCallback: Debug { + fn mapped(&self, addr: usize) -> Result<(), Error> { + log::trace!("{:#x} -> {}", addr, type_name::()); + Ok(()) + } + fn unmapped(&self) -> Result<(), Error> { + log::trace!("{} unmapped", type_name::()); + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub struct UserMem { + addr: usize, + size: usize, + inner: Arc, +} + +impl SlotBackend for UserMem { + fn size(&self) -> usize { + self.size + } +} + +impl UserMem { + pub fn addr(&self) -> usize { + self.addr + } + + pub fn size(&self) -> usize { + self.size + } + + pub fn sync(&self) -> Result<()> { + ffi!(unsafe { msync(self.addr as *mut _, self.size, MS_ASYNC) })?; + Ok(()) + } + + pub fn add_map_callback( + &self, + f: Box, + ) -> Result<(), Error> { + let mut callbacks = self.inner.map_callback.write()?; + callbacks.push(f); + Ok(()) + } + + fn mapped_to_guest(&self, gpa: usize) -> Result<(), Error> { + let callbacks = self.inner.map_callback.read()?; + for callback in callbacks.iter() { + callback.mapped(gpa)?; + } + Ok(()) + } + + fn unmapped_from_guest(&self) -> Result<(), Error> { + let callbacks = self.inner.map_callback.read()?; + for callback in callbacks.iter() { + callback.unmapped()?; + } + Ok(()) + } + + fn new_raw(addr: *mut c_void, len: usize) -> Self { + let addr = NonNull::new(addr).expect("address from mmap() should not be null"); + UserMem { + addr: addr.as_ptr() as usize, + size: len, + inner: Arc::new(UserMemInner { + addr, + len, + map_callback: RwLock::new(Vec::new()), + slot_id: AtomicU32::new(UNASSIGNED_SLOT_ID), + }), + } + } + + pub fn new_file(file: File) -> Result { + let mut prot = PROT_READ | PROT_EXEC; + let meta = file.metadata().map_err(Error::Mmap)?; + let is_readonly = meta.permissions().readonly(); + if !is_readonly { + prot |= PROT_WRITE; + } + let size = meta.len() as usize; + let addr = unsafe { mmap(null_mut(), size, prot, MAP_PRIVATE, file.as_raw_fd(), 0) }; + match addr { + MAP_FAILED => Err(Error::Mmap(std::io::Error::last_os_error())), + addr => Ok(Self::new_raw(addr, size)), + } + } + + pub fn new_anon(size: usize) -> Result { + let prot = PROT_WRITE | PROT_READ | PROT_EXEC; + let addr = unsafe { mmap(null_mut(), size, prot, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0) }; + match addr { + MAP_FAILED => Err(Error::Mmap(std::io::Error::last_os_error())), + addr => Ok(Self::new_raw(addr, size)), + } + } + + /// Given offset and len, return the host virtual address and len; + /// len might be truncated. + fn get_valid_range(&self, offset: usize, len: usize) -> Result<(usize, usize)> { + let end = offset.wrapping_add(len).wrapping_sub(1); + if offset >= self.size || end < offset { + return Err(Error::OutOfRange { + addr: offset, + size: len, + }); + } + let valid_len = std::cmp::min(self.size - offset, len); + Ok((self.addr + offset, valid_len)) + } + + pub fn read(&self, offset: usize) -> Result + where + T: FromBytes, + { + let s = self.get_partial_slice(offset, size_of::())?; + match FromBytes::read_from(s) { + None => Err(Error::OutOfRange { + addr: offset, + size: size_of::(), + }), + Some(v) => Ok(v), + } + } + + pub fn write(&self, offset: usize, val: &T) -> Result<(), Error> + where + T: AsBytes, + { + let s = self.get_partial_slice_mut(offset, size_of::())?; + match AsBytes::write_to(val, s) { + None => Err(Error::OutOfRange { + addr: offset, + size: size_of::(), + }), + Some(()) => Ok(()), + } + } + + /// Given offset and len, return a slice, len might be truncated. + fn get_partial_slice(&self, offset: usize, len: usize) -> Result<&[u8], Error> { + let (addr, len) = self.get_valid_range(offset, len)?; + Ok(unsafe { std::slice::from_raw_parts(addr as *const u8, len) }) + } + + /// Given offset and len, return a mutable slice, len might be truncated. + fn get_partial_slice_mut(&self, offset: usize, len: usize) -> Result<&mut [u8], Error> { + let (addr, len) = self.get_valid_range(offset, len)?; + Ok(unsafe { std::slice::from_raw_parts_mut(addr as *mut u8, len) }) + } +} + +#[derive(Debug)] +pub struct RamBus { + inner: RwLock>, + vm_memory: Box, + next_slot_id: AtomicU32, + max_mem_slots: u32, +} + +pub struct RamLayoutGuard<'a> { + inner: RwLockReadGuard<'a, Addressable>, +} + +impl Deref for RamLayoutGuard<'_> { + type Target = Addressable; + + fn deref(&self) -> &Addressable { + &self.inner + } +} + +struct Iter<'a> { + inner: &'a Addressable, + gpa: usize, + remain: usize, +} + +impl<'a> Iterator for Iter<'a> { + type Item = Result<&'a [u8]>; + fn next(&mut self) -> Option { + if self.remain == 0 { + return None; + } + let r = self.inner.get_partial_slice(self.gpa, self.remain); + if let Ok(s) = r { + self.gpa += s.len(); + self.remain -= s.len(); + } + Some(r) + } +} + +struct IterMut<'a> { + inner: &'a Addressable, + gpa: usize, + remain: usize, +} + +impl<'a> Iterator for IterMut<'a> { + type Item = Result<&'a mut [u8]>; + fn next(&mut self) -> Option { + if self.remain == 0 { + return None; + } + let r = self.inner.get_partial_slice_mut(self.gpa, self.remain); + if let Ok(ref s) = r { + self.gpa += s.len(); + self.remain -= s.len(); + } + Some(r) + } +} + +impl Addressable { + fn slice_iter(&self, gpa: usize, len: usize) -> Iter { + Iter { + inner: self, + gpa, + remain: len, + } + } + + fn slice_iter_mut(&self, gpa: usize, len: usize) -> IterMut { + IterMut { + inner: self, + gpa, + remain: len, + } + } + + fn get_partial_slice(&self, gpa: usize, len: usize) -> Result<&[u8]> { + let Some((start, user_mem)) = self.search(gpa) else { + return Err(Error::NotMapped(gpa)); + }; + user_mem.get_partial_slice(gpa - start, len) + } + + fn get_partial_slice_mut(&self, gpa: usize, len: usize) -> Result<&mut [u8]> { + let Some((start, user_mem)) = self.search(gpa) else { + return Err(Error::NotMapped(gpa)); + }; + user_mem.get_partial_slice_mut(gpa - start, len) + } + + pub fn get_slice(&self, gpa: usize, len: usize) -> Result<&[UnsafeCell], Error> { + let total_len = len * size_of::(); + let host_ref = self.get_partial_slice(gpa, total_len)?; + let ptr = host_ref.as_ptr() as *const UnsafeCell; + if host_ref.len() != total_len { + Err(Error::NotContinuous) + } else if !ptr.is_aligned() { + Err(Error::NotAligned) + } else { + Ok(unsafe { &*core::ptr::slice_from_raw_parts(ptr, len) }) + } + } + + pub fn get_ref(&self, gpa: usize) -> Result<&UnsafeCell, Error> { + let host_ref = self.get_partial_slice(gpa, size_of::())?; + let ptr = host_ref.as_ptr() as *const UnsafeCell; + if host_ref.len() != size_of::() { + Err(Error::NotContinuous) + } else if !ptr.is_aligned() { + Err(Error::NotAligned) + } else { + Ok(unsafe { &*ptr }) + } + } + + pub fn read(&self, gpa: usize) -> Result + where + T: FromBytes + AsBytes, + { + let mut val = T::new_zeroed(); + let buf = val.as_bytes_mut(); + let host_ref = self.get_partial_slice(gpa, size_of::())?; + if host_ref.len() == buf.len() { + buf.copy_from_slice(host_ref); + Ok(val) + } else { + let mut cur = 0; + for r in self.slice_iter(gpa, size_of::()) { + let s = r?; + let s_len = s.len(); + buf[cur..(cur + s_len)].copy_from_slice(s); + cur += s_len; + } + Ok(val) + } + } + + pub fn write(&self, gpa: usize, val: &T) -> Result<(), Error> + where + T: AsBytes, + { + let buf = val.as_bytes(); + let host_ref = self.get_partial_slice_mut(gpa, size_of::())?; + if host_ref.len() == buf.len() { + host_ref.copy_from_slice(buf); + Ok(()) + } else { + let mut cur = 0; + for r in self.slice_iter_mut(gpa, size_of::()) { + let s = r?; + let s_len = s.len(); + s.copy_from_slice(&buf[cur..(cur + s_len)]); + cur += s_len; + } + Ok(()) + } + } + + pub fn translate_iov<'a>(&'a self, iov: &[(usize, usize)]) -> Result>> { + let mut slices = vec![]; + for (gpa, len) in iov { + for r in self.slice_iter(*gpa, *len) { + slices.push(IoSlice::new(r?)); + } + } + Ok(slices) + } + + pub fn translate_iov_mut<'a>(&'a self, iov: &[(usize, usize)]) -> Result>> { + let mut slices = vec![]; + for (gpa, len) in iov { + for r in self.slice_iter_mut(*gpa, *len) { + slices.push(IoSliceMut::new(r?)); + } + } + Ok(slices) + } +} + +impl Drop for RamBus { + fn drop(&mut self) { + if let Err(e) = self.clear() { + log::info!("dropping RamBus: {:x?}", e) + } + } +} + +impl RamBus { + pub fn lock_layout(&self) -> Result, Error> { + let guard = RamLayoutGuard { + inner: self.inner.read()?, + }; + Ok(guard) + } + + pub fn new(vm_memory: M) -> Self { + let max_mem_slots = match vm_memory.max_mem_slots() { + Ok(val) => val, + Err(e) => { + log::error!( + "quering hypervisor for maximum supported memory slots, got error {e:?}" + ); + log::error!( + "assuming the maximum assuported memory slots is {:#x}", + u16::MAX + ); + u16::MAX as u32 + } + }; + Self { + inner: RwLock::new(Addressable::default()), + vm_memory: Box::new(vm_memory), + next_slot_id: AtomicU32::new(0), + max_mem_slots, + } + } + + fn map_to_vm(&self, user_mem: &UserMem, addr: usize) -> Result<(), Error> { + let mem_options = MemMapOption { + read: true, + write: true, + exec: true, + log_dirty: false, + }; + let slot_id = user_mem.inner.slot_id.load(Ordering::Acquire); + self.vm_memory + .mem_map(slot_id, addr, user_mem.size(), user_mem.addr(), mem_options)?; + log::trace!( + "user memory {} mapped: {:#x} -> {:#x}", + slot_id, + user_mem.addr, + addr + ); + Ok(()) + } + + fn unmap_from_vm(&self, user_mem: &UserMem, addr: usize) -> Result<(), Error> { + let slot_id = user_mem.inner.slot_id.load(Ordering::Acquire); + self.vm_memory.unmap(slot_id, addr, user_mem.size())?; + log::trace!( + "user memory {} unmapped: {:#x} -> {:#x}", + slot_id, + user_mem.addr, + addr + ); + Ok(()) + } + + pub(crate) fn add(&self, gpa: usize, user_mem: UserMem) -> Result<(), Error> { + let mut inner = self.inner.write()?; + let mem = inner.add(gpa, user_mem)?; + let mut slot_id = mem.inner.slot_id.load(Ordering::Acquire); + if slot_id == UNASSIGNED_SLOT_ID { + slot_id = self.next_slot_id.fetch_add(1, Ordering::AcqRel) % self.max_mem_slots; + mem.inner.slot_id.store(slot_id, Ordering::Release); + } + self.map_to_vm(mem, gpa)?; + mem.mapped_to_guest(gpa)?; + Ok(()) + } + + fn clear(&self) -> Result<()> { + let mut innter = self.inner.write()?; + for (gpa, user_mem) in innter.drain(..) { + self.unmap_from_vm(&user_mem, gpa)?; + user_mem.unmapped_from_guest()?; + } + Ok(()) + } + + pub(super) fn remove(&self, gpa: usize) -> Result { + let mut inner = self.inner.write()?; + let mem = inner.remove(gpa)?; + self.unmap_from_vm(&mem, gpa)?; + mem.unmapped_from_guest()?; + Ok(mem) + } + + pub fn read(&self, gpa: usize) -> Result + where + T: FromBytes + AsBytes, + { + let inner = self.inner.read()?; + inner.read(gpa) + } + + pub fn write(&self, gpa: usize, val: &T) -> Result<(), Error> + where + T: AsBytes, + { + let inner = self.inner.read()?; + inner.write(gpa, val) + } + + pub fn read_range(&self, gpa: usize, len: usize, dst: &mut impl Write) -> Result<()> { + let inner = self.inner.read()?; + for r in inner.slice_iter(gpa, len) { + dst.write_all(r?)?; + } + Ok(()) + } + + pub fn write_range(&self, gpa: usize, len: usize, mut src: impl Read) -> Result<()> { + let inner = self.inner.read()?; + for r in inner.slice_iter_mut(gpa, len) { + src.read_exact(r?)?; + } + Ok(()) + } + + pub fn read_vectored(&self, bufs: &[(usize, usize)], callback: F) -> Result + where + F: FnOnce(&[IoSlice<'_>]) -> T, + { + let inner = self.inner.read()?; + let mut iov = vec![]; + for (gpa, len) in bufs { + for r in inner.slice_iter(*gpa, *len) { + iov.push(IoSlice::new(r?)); + } + } + Ok(callback(&iov)) + } + + pub fn write_vectored(&self, bufs: &[(usize, usize)], callback: F) -> Result + where + F: FnOnce(&mut [IoSliceMut<'_>]) -> T, + { + let inner = self.inner.read()?; + let mut iov = vec![]; + for (gpa, len) in bufs { + for r in inner.slice_iter_mut(*gpa, *len) { + iov.push(IoSliceMut::new(r?)); + } + } + Ok(callback(&mut iov)) + } +} + +#[cfg(test)] +mod test { + use std::assert_matches::assert_matches; + use std::io::{Read, Write}; + use std::mem::size_of; + use std::ptr::null_mut; + use std::sync::atomic::{AtomicBool, Ordering}; + use std::sync::Arc; + + use libc::{mmap, munmap, MAP_ANONYMOUS, MAP_FAILED, MAP_PRIVATE, PROT_READ, PROT_WRITE}; + use zerocopy::{AsBytes, FromBytes, FromZeroes}; + + use crate::hv::test::FakeVmMemory; + + use super::{MmapCallback, RamBus, Result, UserMem}; + + #[derive(Debug, AsBytes, FromBytes, FromZeroes, PartialEq, Eq)] + #[repr(C)] + struct MyStruct { + data: [u32; 8], + } + + const PAGE_SIZE: usize = 1 << 12; + + #[test] + fn test_ram_bus_read() { + let bus = RamBus::new(FakeVmMemory); + let prot = PROT_READ | PROT_WRITE; + let size = 3 * PAGE_SIZE; + let addr = unsafe { mmap(null_mut(), size, prot, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0) }; + assert_ne!(addr, MAP_FAILED); + let munmap_ret = unsafe { munmap(addr.add(PAGE_SIZE), PAGE_SIZE) }; + assert_ne!(munmap_ret, -1); + let mem1 = UserMem::new_raw(addr, PAGE_SIZE); + let mem2_addr = unsafe { addr.add(2 * PAGE_SIZE) }; + let mem2 = UserMem::new_raw(mem2_addr, PAGE_SIZE); + + #[derive(Debug)] + struct Callback { + mapped: Arc, + } + impl MmapCallback for Callback { + fn mapped(&self, _addr: usize) -> Result<()> { + self.mapped.store(true, Ordering::Release); + Ok(()) + } + + fn unmapped(&self) -> Result<()> { + self.mapped.store(false, Ordering::Release); + Ok(()) + } + } + let mem1_mapped = Arc::new(AtomicBool::new(false)); + let mem1_callback = Callback { + mapped: mem1_mapped.clone(), + }; + mem1.add_map_callback(Box::new(mem1_callback)).unwrap(); + bus.add(0x0, mem1).unwrap(); + assert!(mem1_mapped.load(Ordering::Acquire)); + bus.add(PAGE_SIZE, mem2).unwrap(); + + let data = MyStruct { + data: [1, 2, 3, 4, 5, 6, 7, 8], + }; + let data_size = size_of::(); + for gpa in (PAGE_SIZE - data_size)..=PAGE_SIZE { + bus.write(gpa, &data).unwrap(); + let r: MyStruct = bus.read(gpa).unwrap(); + assert_eq!(r, data) + } + let memory_end = PAGE_SIZE * 2; + for gpa in (memory_end - data_size - 10)..=(memory_end - data_size) { + bus.write(gpa, &data).unwrap(); + let r: MyStruct = bus.read(gpa).unwrap(); + assert_eq!(r, data) + } + for gpa in (memory_end - data_size + 1)..memory_end { + assert_matches!(bus.write(gpa, &data), Err(_)); + assert_matches!(bus.read::(gpa), Err(_)); + } + + let data: Vec = (0..64).collect(); + for gpa in (PAGE_SIZE - 64)..=PAGE_SIZE { + bus.write_range(gpa, 64, &*data).unwrap(); + let mut buf = Vec::new(); + bus.read_range(gpa, 64, &mut buf).unwrap(); + assert_eq!(data, buf) + } + + let guest_iov = [(0, 16), (PAGE_SIZE - 16, 32), (2 * PAGE_SIZE - 16, 16)]; + let write_ret = bus.write_vectored(&guest_iov, |iov| { + assert_eq!(iov.len(), 4); + (&*data).read_vectored(iov) + }); + assert_matches!(write_ret, Ok(Ok(64))); + let mut buf_read = Vec::new(); + let read_ret = bus.read_vectored(&guest_iov, |iov| { + assert_eq!(iov.len(), 4); + buf_read.write_vectored(iov) + }); + assert_matches!(read_ret, Ok(Ok(64))); + + let locked_bus = bus.lock_layout().unwrap(); + let bufs = locked_bus.translate_iov(&guest_iov).unwrap(); + println!("{:?}", bufs); + drop(locked_bus); + bus.remove(0x0).unwrap(); + assert!(!mem1_mapped.load(Ordering::Acquire)); + } +} diff --git a/alioth/src/utils.rs b/alioth/src/utils.rs new file mode 100644 index 0000000..a81eda5 --- /dev/null +++ b/alioth/src/utils.rs @@ -0,0 +1,78 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub mod ioctls; + +#[macro_export] +macro_rules! align_up { + ($num:expr, $align:expr) => {{ + debug_assert_eq!(($align as u64).count_ones(), 1); + let mask = $align - 1; + ($num.wrapping_add(mask)) & !mask + }}; +} + +#[macro_export] +macro_rules! ffi { + ($f:expr) => {{ + let ret = $f; + if ret <= -1 { + Err(::std::io::Error::last_os_error()) + } else { + Ok(ret) + } + }}; + ($f:expr, $failure:ident) => {{ + let ret = $f; + if ret == $failure { + Err(::std::io::Error::last_os_error()) + } else { + Ok(ret) + } + }}; +} + +#[macro_export] +macro_rules! unsafe_impl_zerocopy { + ($ty:ty, $($name:ident), +) => { + $( + unsafe impl ::zerocopy::$name for $ty { + fn only_derive_is_allowed_to_implement_this_trait() + where + Self: Sized, + { + } + } + )+ + }; +} + +#[cfg(test)] +mod test { + #[test] + fn test_align_up() { + assert_eq!(align_up!(0u64, 4), 0); + assert_eq!(align_up!(1u64, 4), 4); + assert_eq!(align_up!(3u64, 4), 4); + + assert_eq!(align_up!(u64::MAX, 1), u64::MAX); + assert_eq!(align_up!(u64::MAX, 4), 0); + } + + #[test] + #[should_panic] + fn test_align_up_panic() { + let _ = align_up!(1u64, 3); + } +} diff --git a/alioth/src/utils/ioctls.rs b/alioth/src/utils/ioctls.rs new file mode 100644 index 0000000..7c0b87c --- /dev/null +++ b/alioth/src/utils/ioctls.rs @@ -0,0 +1,184 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::mem::size_of; + +use libc::c_ulong; + +const IOC_NONN: c_ulong = 0; +const IOC_WRITE: c_ulong = 1; +const IOC_READ: c_ulong = 2; + +const IOC_NRSHIFT: usize = 0; +const IOC_TYPESHIFT: usize = 8; +const IOC_SIZESHIFT: usize = 16; +const IOC_DIRSHIFT: usize = 30; + +const fn ioctl_ioc(dir: c_ulong, type_: u8, nr: u8, size: c_ulong) -> c_ulong { + (dir << IOC_DIRSHIFT) + | (size << IOC_SIZESHIFT) + | ((type_ as c_ulong) << IOC_TYPESHIFT) + | ((nr as c_ulong) << IOC_NRSHIFT) +} + +pub const fn ioctl_io(type_: u8, nr: u8) -> c_ulong { + ioctl_ioc(IOC_NONN, type_, nr, 0) +} + +pub const fn ioctl_ior(type_: u8, nr: u8) -> c_ulong { + ioctl_ioc(IOC_READ, type_, nr, size_of::() as c_ulong) +} + +pub const fn ioctl_iow(type_: u8, nr: u8) -> c_ulong { + ioctl_ioc(IOC_WRITE, type_, nr, size_of::() as c_ulong) +} + +pub const fn ioctl_iowr(type_: u8, nr: u8) -> c_ulong { + ioctl_ioc(IOC_WRITE | IOC_READ, type_, nr, size_of::() as c_ulong) +} + +#[macro_export] +macro_rules! ioctl_none { + ($name:ident, $type_:expr, $nr:expr, $val:expr) => { + pub unsafe fn $name(fd: &F) -> ::std::io::Result { + $crate::ffi!(::libc::ioctl( + fd.as_raw_fd(), + $crate::utils::ioctls::ioctl_io($type_, $nr), + $val as ::libc::c_ulong, + )) + } + }; +} + +#[macro_export] +macro_rules! ioctl_write_val { + ($name:ident, $code:expr) => { + pub unsafe fn $name( + fd: &F, + val: ::libc::c_ulong, + ) -> ::std::io::Result { + $crate::ffi!(::libc::ioctl(fd.as_raw_fd(), $code, val)) + } + }; + ($name:ident, $code:expr, $ty:ty) => { + pub unsafe fn $name( + fd: &F, + val: $ty, + ) -> ::std::io::Result { + $crate::ffi!(::libc::ioctl(fd.as_raw_fd(), $code, val)) + } + }; +} + +#[macro_export] +macro_rules! ioctl_write_ptr { + ($name:ident, $code:expr, $ty:ty) => { + pub unsafe fn $name( + fd: &F, + val: &$ty, + ) -> ::std::io::Result { + $crate::ffi!(::libc::ioctl(fd.as_raw_fd(), $code, val as *const $ty)) + } + }; + + ($name:ident, $type_:expr, $nr:expr, $ty:ty) => { + pub unsafe fn $name( + fd: &F, + val: &$ty, + ) -> ::std::io::Result { + $crate::ffi!(::libc::ioctl( + fd.as_raw_fd(), + $crate::utils::ioctls::ioctl_iow::<$ty>($type_, $nr), + val as *const $ty, + )) + } + }; +} + +#[macro_export] +macro_rules! ioctl_write_buf { + ($name:ident, $type_:expr, $nr:expr, $ty:ident) => { + pub unsafe fn $name( + fd: &F, + val: &$ty, + ) -> ::std::io::Result { + $crate::ffi!(::libc::ioctl( + fd.as_raw_fd(), + $crate::utils::ioctls::ioctl_iow::<$ty<0>>($type_, $nr), + val as *const $ty, + )) + } + }; +} + +#[macro_export] +macro_rules! ioctl_writeread { + ($name:ident, $type_:expr, $nr:expr, $ty:ty) => { + pub unsafe fn $name( + fd: &F, + val: &mut $ty, + ) -> ::std::io::Result { + $crate::ffi!(::libc::ioctl( + fd.as_raw_fd(), + $crate::utils::ioctls::ioctl_iowr::<$ty>($type_, $nr), + val as *mut $ty, + )) + } + }; +} + +#[macro_export] +macro_rules! ioctl_writeread_buf { + ($name:ident, $type_:expr, $nr:expr, $ty:ident) => { + pub unsafe fn $name( + fd: &F, + val: &mut $ty, + ) -> ::std::io::Result { + $crate::ffi!(::libc::ioctl( + fd.as_raw_fd(), + $crate::utils::ioctls::ioctl_iowr::<$ty<0>>($type_, $nr), + val as *mut $ty, + )) + } + }; +} + +#[macro_export] +macro_rules! ioctl_read { + ($name:ident, $type_:expr, $nr:expr, $ty:ty) => { + pub unsafe fn $name(fd: &F) -> ::std::io::Result<$ty> { + let mut val = ::core::mem::MaybeUninit::<$ty>::uninit(); + $crate::ffi!(::libc::ioctl( + fd.as_raw_fd(), + $crate::utils::ioctls::ioctl_ior::<$ty>($type_, $nr), + val.as_mut_ptr(), + ))?; + ::std::io::Result::Ok(val.assume_init()) + } + }; +} + +#[cfg(test)] +mod test { + use crate::utils::ioctls::{ioctl_io, ioctl_ior, ioctl_iow, ioctl_iowr}; + + #[test] + fn test_codes() { + const KVMIO: u8 = 0xAE; + assert_eq!(ioctl_io(KVMIO, 0x01), 0xae01); + assert_eq!(ioctl_ior::<[u8; 320]>(KVMIO, 0xcc), 0x8140aecc); + assert_eq!(ioctl_iow::<[u8; 320]>(KVMIO, 0xcd), 0x4140aecd); + assert_eq!(ioctl_iowr::<[u8; 8]>(KVMIO, 0x05), 0xc008ae05); + } +} diff --git a/alioth/src/vm.rs b/alioth/src/vm.rs new file mode 100644 index 0000000..9401172 --- /dev/null +++ b/alioth/src/vm.rs @@ -0,0 +1,270 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#[cfg(target_arch = "x86_64")] +mod x86_64; + +use std::fmt::Debug; +use std::path::PathBuf; +use std::sync::atomic::{AtomicU8, Ordering}; +use std::sync::{Arc, Barrier, PoisonError}; +use std::thread::{self, JoinHandle}; + +use mio::{Events, Poll, Token, Waker}; +use thiserror::Error; + +use crate::device::serial::Serial; +use crate::hv::{self, Vcpu, Vm, VmEntry, VmExit}; +use crate::loader::{self, linux, InitState}; +use crate::mem; +use crate::mem::Memory; + +#[cfg(target_arch = "x86_64")] +use x86_64::ArchBoard; + +#[derive(Debug, Error)] +pub enum Error { + #[error("hypervisor: {0}")] + Hv( + #[from] + #[backtrace] + hv::Error, + ), + + #[error("memory: {0}")] + Memory( + #[from] + #[backtrace] + mem::Error, + ), + + #[error("host io: {0}")] + HostIo( + #[from] + #[backtrace] + std::io::Error, + ), + + #[error("loader: {0}")] + Loader( + #[from] + #[backtrace] + loader::Error, + ), + + #[error("rwlock poisoned")] + RwLockPoisoned, + + #[error("ACPI bytes exceed EBDA area")] + AcpiTooLong, + + #[error("cannot handle {0:#x?}")] + VmExit(String), +} + +type Result = std::result::Result; + +impl From> for Error { + fn from(_: PoisonError) -> Self { + Error::RwLockPoisoned + } +} + +#[derive(Debug)] +pub struct Payload { + pub executable: PathBuf, + pub exec_type: ExecType, + pub initramfs: Option, + pub cmd_line: Option, +} + +pub struct BoardConfig { + pub mem_size: usize, + pub num_cpu: u32, +} + +#[derive(Debug)] +pub enum ExecType { + Linux, +} + +pub struct Machine +where + H: crate::hv::Hypervisor, +{ + vcpu_threads: Vec>>, + board: Arc>, + // board_config: BoardConfig, + payload: Option, + poll: Poll, +} + +const STATE_CREATED: u8 = 0; +const STATE_RUNNING: u8 = 1; +const STATE_SHUTDOWN: u8 = 2; + +struct Board +where + V: crate::hv::Vm, +{ + vm: V, + memory: Memory, + arch: ArchBoard, + config: BoardConfig, + waker: Waker, + state: AtomicU8, +} + +impl Board +where + V: crate::hv::Vm, +{ + fn vcpu_loop(&self, vcpu: &mut ::Vcpu, id: u32) -> Result<(), Error> { + let mut vm_entry = VmEntry::None; + loop { + // TODO is there any race here? + if self.state.load(Ordering::Acquire) == STATE_SHUTDOWN { + vm_entry = VmEntry::Shutdown; + } + let vm_exit = vcpu.run(vm_entry)?; + vm_entry = match vm_exit { + VmExit::Io { port, write, size } => self.memory.handle_io(port, write, size)?, + VmExit::Mmio { addr, write, size } => self.memory.handle_mmio(addr, write, size)?, + VmExit::Shutdown => { + log::info!("vcpu {id} requested shutdown"); + break Ok(()); + } + VmExit::Interrupted => VmEntry::None, + VmExit::Unknown(msg) => break Err(Error::VmExit(msg)), + }; + } + } + + fn run_vcpu_inner( + &self, + id: u32, + init_state: &InitState, + barrier: &Barrier, + ) -> Result<(), Error> { + let mut vcpu = self.init_vcpu(id, init_state)?; + barrier.wait(); + self.vcpu_loop(&mut vcpu, id) + } + + fn run_vcpu(&self, id: u32, init_state: &InitState, barrier: &Barrier) -> Result<(), Error> { + let ret = self.run_vcpu_inner(id, init_state, barrier); + self.state.store(STATE_SHUTDOWN, Ordering::Release); + self.waker.wake()?; + ret + } +} + +impl Machine +where + H: crate::hv::Hypervisor + 'static, +{ + pub fn new(hv: H, config: BoardConfig) -> Result { + let mut vm = hv.create_vm()?; + let vm_mmemory = vm.create_vm_memory()?; + let memory = Memory::new(vm_mmemory); + let arch = ArchBoard::new(&hv)?; + + let poll = Poll::new()?; + let waker = Waker::new(poll.registry(), Token(0))?; + let board = Board { + vm, + memory, + arch, + config, + waker, + state: AtomicU8::new(STATE_CREATED), + }; + + let machine = Machine { + board: Arc::new(board), + payload: None, + vcpu_threads: Vec::new(), + poll, + }; + Ok(machine) + } + + #[cfg(target_arch = "x86_64")] + pub fn add_com1(&self) -> Result<(), Error> { + let com1_intx_sender = self.board.vm.create_intx_sender(4)?; + let com1 = Serial::new(0x3f8, com1_intx_sender)?; + self.board.memory.add_io_dev(Some(0x3f8), Arc::new(com1))?; + Ok(()) + } + + pub fn add_payload(&mut self, payload: Payload) { + self.payload = Some(payload) + } + + fn load_payload(&self) -> Result { + let Some(payload) = &self.payload else { + return Ok(InitState::default()); + }; + let mem_regions = self.board.memory.to_mem_regions()?; + let init_state = match payload.exec_type { + ExecType::Linux => linux::load( + self.board.memory.ram_bus(), + &mem_regions, + &payload.executable, + payload.cmd_line.as_deref(), + payload.initramfs.as_ref(), + )?, + }; + Ok(init_state) + } + + pub fn boot(&mut self) -> Result<(), Error> { + self.create_ram()?; + let init_state = Arc::new(self.load_payload()?); + self.create_firmware_data(&init_state)?; + self.board.state.store(STATE_RUNNING, Ordering::Release); + let barrier = Arc::new(Barrier::new(self.board.config.num_cpu as usize)); + for vcpu_id in 0..self.board.config.num_cpu { + let init_state = init_state.clone(); + let barrier = barrier.clone(); + let board = self.board.clone(); + let handle = thread::Builder::new() + .name(format!("vcpu_{}", vcpu_id)) + .spawn(move || board.run_vcpu(vcpu_id, &init_state, &barrier))?; + self.vcpu_threads.push(handle); + } + Ok(()) + } + + pub fn wait(&mut self) -> Vec> { + let mut events = Events::with_capacity(8); + if let Err(e) = self.poll.poll(&mut events, None) { + return vec![Err(e.into())]; + } + self.vcpu_threads + .drain(..) + .enumerate() + .map(|(id, handle)| { + ::stop_vcpu(id as u32, &handle)?; + match handle.join() { + Err(e) => { + log::error!("cannot join vcpu {}: {:?}", id, e); + Ok(()) + } + Ok(r) => r, + } + }) + .collect() + } +} diff --git a/alioth/src/vm/x86_64.rs b/alioth/src/vm/x86_64.rs new file mode 100644 index 0000000..0d06da2 --- /dev/null +++ b/alioth/src/vm/x86_64.rs @@ -0,0 +1,113 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::acpi::create_acpi_tables; +use crate::arch::layout::{ + BIOS_DATA_END, EBDA_END, EBDA_START, MEM_64_START, RAM_32_END, RAM_32_SIZE, +}; +use crate::hv::arch::Cpuid; +use crate::hv::{Hypervisor, Vcpu, Vm}; +use crate::loader::InitState; +use crate::mem::ram::UserMem; +use crate::mem::{AddrOpt, MemRegionType}; +use crate::vm::{Board, Error, Machine, Result}; + +pub struct ArchBoard { + cpuids: Vec, +} + +impl ArchBoard { + pub fn new(hv: &H) -> Result { + let mut cpuids = hv.get_supported_cpuids()?; + for cpuid in &mut cpuids { + if cpuid.func == 0x1 { + cpuid.ecx |= (1 << 24) | (1 << 31); + } + } + Ok(Self { cpuids }) + } +} + +impl Board +where + V: Vm, +{ + pub fn init_vcpu(&self, id: u32, init_state: &InitState) -> Result<::Vcpu, Error> { + let mut vcpu = self.vm.create_vcpu(id)?; + if id == 0 { + vcpu.set_regs(&init_state.regs)?; + vcpu.set_sregs(&init_state.sregs, &init_state.seg_regs, &init_state.dt_regs)?; + } + let mut cpuids = self.arch.cpuids.clone(); + for cpuid in &mut cpuids { + if cpuid.func == 0x1 { + cpuid.ebx &= 0x00ff_ffff; + cpuid.ebx |= id << 24; + } else if cpuid.func == 0xb { + cpuid.edx = id; + } + } + vcpu.set_cpuids(cpuids)?; + Ok(vcpu) + } +} + +impl Machine +where + H: Hypervisor, +{ + pub fn create_firmware_data(&self, _init_state: &InitState) -> Result<()> { + let acpi_bytes = create_acpi_tables(EBDA_START, self.board.config.num_cpu); + if acpi_bytes.len() > EBDA_END - EBDA_START { + return Err(Error::AcpiTooLong); + } + let ram = self.board.memory.ram_bus(); + ram.write_range(EBDA_START, acpi_bytes.len(), &*acpi_bytes)?; + Ok(()) + } + + pub fn create_ram(&self) -> Result<()> { + let config = &self.board.config; + let memory = &self.board.memory; + if config.mem_size > RAM_32_SIZE { + memory.add_ram( + AddrOpt::Fixed(0), + UserMem::new_anon(RAM_32_SIZE)?, + &[ + (BIOS_DATA_END, MemRegionType::Reserved), + (EBDA_START - BIOS_DATA_END, MemRegionType::Ram), + (EBDA_END - EBDA_START, MemRegionType::Acpi), + (RAM_32_END - EBDA_END, MemRegionType::Ram), + ], + )?; + memory.add_ram( + AddrOpt::Fixed(MEM_64_START), + UserMem::new_anon(config.mem_size - RAM_32_END)?, + &[(config.mem_size - RAM_32_END, MemRegionType::Ram)], + )?; + } else { + memory.add_ram( + AddrOpt::Fixed(0), + UserMem::new_anon(config.mem_size)?, + &[ + (BIOS_DATA_END, MemRegionType::Reserved), + (EBDA_START - BIOS_DATA_END, MemRegionType::Ram), + (EBDA_END - EBDA_START, MemRegionType::Acpi), + (config.mem_size - EBDA_END, MemRegionType::Ram), + ], + )?; + } + Ok(()) + } +} diff --git a/docs/contributing.md b/docs/contributing.md new file mode 100644 index 0000000..bc23aae --- /dev/null +++ b/docs/contributing.md @@ -0,0 +1,33 @@ +# How to contribute + +We'd love to accept your patches and contributions to this project. + +## Before you begin + +### Sign our Contributor License Agreement + +Contributions to this project must be accompanied by a +[Contributor License Agreement](https://cla.developers.google.com/about) (CLA). +You (or your employer) retain the copyright to your contribution; this simply +gives us permission to use and redistribute your contributions as part of the +project. + +If you or your current employer have already signed the Google CLA (even if it +was for a different project), you probably don't need to do it again. + +Visit to see your current agreements or to +sign a new one. + +### Review our community guidelines + +This project follows +[Google's Open Source Community Guidelines](https://opensource.google/conduct/). + +## Contribution process + +### Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. \ No newline at end of file diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..271800c --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly" \ No newline at end of file diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..24cac62 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,2 @@ +imports_granularity = "Module" +