From ef83f2beeb5cbbed6f6711aa703bcf78ffd2c16a Mon Sep 17 00:00:00 2001 From: Waleed Khan Date: Sat, 8 Jul 2023 02:23:32 -0700 Subject: [PATCH] feat(fsmonitor): Watchman filesystem monitor implementation --- Cargo.lock | 219 +++++++++++++++++++++++++++++- Cargo.toml | 1 + lib/Cargo.toml | 14 +- lib/src/fsmonitor.rs | 215 +++++++++++++++++++++++++++++ lib/src/lib.rs | 1 + lib/src/protos/working_copy.proto | 8 ++ lib/src/protos/working_copy.rs | 19 +++ lib/src/settings.rs | 9 ++ lib/src/working_copy.rs | 121 +++++++++++++++-- lib/tests/test_working_copy.rs | 99 +++++++++++++- lib/testutils/src/lib.rs | 45 +++++- src/cli_util.rs | 1 + src/commands/mod.rs | 1 + src/merge_tools.rs | 3 + 14 files changed, 738 insertions(+), 18 deletions(-) create mode 100644 lib/src/fsmonitor.rs diff --git a/Cargo.lock b/Cargo.lock index eb07d21d1..2f0636f5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -200,11 +200,24 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "bytes" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "206fdffcfa2df7cbe15601ef46c813fce0965eb3286db6b56c583b814b51c81c" +dependencies = [ + "byteorder", + "iovec", +] + [[package]] name = "bytes" version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +dependencies = [ + "serde", +] [[package]] name = "camino" @@ -639,6 +652,102 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" + +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bca583b7e26f571124fe5b7561d49cb2868d79116cfa0eefce955557c6fee8c" + +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-macro" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.23", +] + +[[package]] +name = "futures-sink" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f43be4fe21a13b9781a69afa4985b0f6ee0e1afab2c6f454a8cf30e2b2237b6e" + +[[package]] +name = "futures-task" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" + +[[package]] +name = "futures-util" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" +dependencies = [ + "futures 0.1.31", + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "gen-protos" version = "0.1.0" @@ -806,6 +915,15 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "iovec" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e" +dependencies = [ + "libc", +] + [[package]] name = "is-terminal" version = "0.4.7" @@ -910,7 +1028,7 @@ dependencies = [ "backoff", "blake2", "byteorder", - "bytes", + "bytes 1.4.0", "chrono", "config", "criterion", @@ -936,8 +1054,10 @@ dependencies = [ "test-case", "testutils", "thiserror", + "tokio", "tracing", "version_check", + "watchman_client", "whoami", "zstd", ] @@ -1278,6 +1398,12 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "pkg-config" version = "0.3.25" @@ -1394,7 +1520,7 @@ version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" dependencies = [ - "bytes", + "bytes 1.4.0", "prost-derive", ] @@ -1404,7 +1530,7 @@ version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ - "bytes", + "bytes 1.4.0", "heck", "itertools 0.10.5", "lazy_static", @@ -1671,6 +1797,19 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bser" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b929ea725591083cbca8b8ea178ed6efc918eccd40b784e199ce88967104199" +dependencies = [ + "anyhow", + "byteorder", + "bytes 0.4.12", + "serde", + "thiserror", +] + [[package]] name = "serde_derive" version = "1.0.167" @@ -1779,6 +1918,16 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f67ad224767faa3c7d8b6d91985b78e70a1324408abcb1cfcc2be4c06bc06043" +[[package]] +name = "socket2" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64a4a911eed85daf18834cfaa86a79b7d266ff93ff5ba14005426219480ed662" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "strsim" version = "0.10.0" @@ -1951,6 +2100,52 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" +[[package]] +name = "tokio" +version = "1.28.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94d7b1cfd2aa4011f2de74c2c4c63665e27a71006b0a192dcd2710272e73dfa2" +dependencies = [ + "autocfg", + "bytes 1.4.0", + "libc", + "mio", + "num_cpus", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-macros" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.23", +] + +[[package]] +name = "tokio-util" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36943ee01a6d67977dd3f84a5a1d2efeb4ada3a1ae771cadfaa535d9d9fc6507" +dependencies = [ + "bytes 1.4.0", + "futures-core", + "futures-io", + "futures-sink", + "log", + "pin-project-lite", + "slab", + "tokio", +] + [[package]] name = "toml" version = "0.5.9" @@ -2189,6 +2384,24 @@ version = "0.2.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6598dd0bd3c7d51095ff6531a5b23e02acdc81804e30d8f07afb77b7215a140a" +[[package]] +name = "watchman_client" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "839fea2d85719bb69089290d7970bba2131f544448db8f990ea75813c30775ca" +dependencies = [ + "anyhow", + "bytes 1.4.0", + "futures 0.3.28", + "maplit", + "serde", + "serde_bser", + "thiserror", + "tokio", + "tokio-util", + "winapi", +] + [[package]] name = "web-sys" version = "0.3.59" diff --git a/Cargo.toml b/Cargo.toml index d38ff8243..f6e4103c2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,3 +77,4 @@ testutils = { path = "lib/testutils" } default = [] bench = ["criterion"] vendored-openssl = ["git2/vendored-openssl", "jujutsu-lib/vendored-openssl"] +watchman = ["jujutsu-lib/watchman"] diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 5ff3004c8..d9dfa050b 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -25,7 +25,10 @@ backoff = "0.4.0" blake2 = "0.10.6" byteorder = "1.4.3" bytes = "1.4.0" -chrono = { version = "0.4.26", default-features = false, features = ["std", "clock"] } +chrono = { version = "0.4.26", default-features = false, features = [ + "std", + "clock", +] } config = { version = "0.13.3", default-features = false, features = ["toml"] } digest = "0.10.7" git2 = "0.17.2" @@ -40,11 +43,17 @@ rand = "0.8.5" rand_chacha = "0.3.1" regex = "1.9.0" serde_json = "1.0.100" -smallvec = { version = "1.11.0", features = ["const_generics", "const_new", "union"] } +smallvec = { version = "1.11.0", features = [ + "const_generics", + "const_new", + "union", +] } strsim = "0.10.0" tempfile = "3.6.0" thiserror = "1.0.43" +tokio = { version = "1.18.2", optional = true } tracing = "0.1.37" +watchman_client = { version = "0.8.0", optional = true } whoami = "1.4.1" zstd = "0.12.3" @@ -62,3 +71,4 @@ testutils = { path = "testutils" } [features] default = [] vendored-openssl = ["git2/vendored-openssl"] +watchman = ["dep:tokio", "dep:watchman_client"] diff --git a/lib/src/fsmonitor.rs b/lib/src/fsmonitor.rs new file mode 100644 index 000000000..665cea1a8 --- /dev/null +++ b/lib/src/fsmonitor.rs @@ -0,0 +1,215 @@ +// Copyright 2023 The Jujutsu Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Interfaces with a filesystem monitor tool to efficiently query for +//! filesystem updates, without having to crawl the entire working copy. This is +//! particularly useful for large working copies, or for working copies for +//! which it's expensive to materialize files, such those backed by a network or +//! virtualized filesystem. + +#![warn(missing_docs)] + +use std::path::PathBuf; +use std::str::FromStr; + +/// The recognized kinds of filesystem monitors. +pub enum FsmonitorKind { + /// The Watchman filesystem monitor (https://facebook.github.io/watchman/). + Watchman, + + /// Only used in tests. + Test { + /// The set of changed files to pretend that the filesystem monitor is + /// reporting. + changed_files: Vec, + }, +} + +impl FromStr for FsmonitorKind { + type Err = config::ConfigError; + + fn from_str(s: &str) -> Result { + match s { + "watchman" => Ok(Self::Watchman), + "test" => Err(config::ConfigError::Message( + "cannot use test fsmonitor in real repository".to_string(), + )), + other => Err(config::ConfigError::Message(format!( + "unknown fsmonitor kind: {}", + other + ))), + } + } +} + +/// Filesystem monitor integration using Watchman +/// (https://facebook.github.io/watchman/). Requires `watchman` to already be +/// installed on the system. +#[cfg(feature = "watchman")] +pub mod watchman { + use std::path::{Path, PathBuf}; + + use itertools::Itertools; + use thiserror::Error; + use tracing::info; + use watchman_client::prelude::{ + Clock as InnerClock, ClockSpec, NameOnly, QueryRequestCommon, QueryResult, + }; + + /// Represents an instance in time from the perspective of the filesystem + /// monitor. + /// + /// This can be used to perform incremental queries. When making a query, + /// the result will include an associated "clock" representing the time + /// that the query was made. By passing the same clock into a future + /// query, we inform the filesystem monitor that we only wish to get + /// changed files since the previous point in time. + #[derive(Clone, Debug)] + pub struct Clock(InnerClock); + + impl From for Clock { + fn from(clock: crate::protos::working_copy::WatchmanClock) -> Self { + use crate::protos::working_copy::watchman_clock::WatchmanClock; + let watchman_clock = clock.watchman_clock.unwrap(); + let clock = match watchman_clock { + WatchmanClock::StringClock(string_clock) => { + InnerClock::Spec(ClockSpec::StringClock(string_clock)) + } + WatchmanClock::UnixTimestamp(unix_timestamp) => { + InnerClock::Spec(ClockSpec::UnixTimestamp(unix_timestamp)) + } + }; + Self(clock) + } + } + + impl From for crate::protos::working_copy::WatchmanClock { + fn from(clock: Clock) -> Self { + use crate::protos::working_copy::{watchman_clock, WatchmanClock}; + let Clock(clock) = clock; + let watchman_clock = match clock { + InnerClock::Spec(ClockSpec::StringClock(string_clock)) => { + watchman_clock::WatchmanClock::StringClock(string_clock) + } + InnerClock::Spec(ClockSpec::UnixTimestamp(unix_timestamp)) => { + watchman_clock::WatchmanClock::UnixTimestamp(unix_timestamp) + } + InnerClock::ScmAware(_) => { + unimplemented!("SCM-aware Watchman clocks not supported") + } + }; + WatchmanClock { + watchman_clock: Some(watchman_clock), + } + } + } + + #[allow(missing_docs)] + #[derive(Debug, Error)] + pub enum Error { + #[error("Could not connect to Watchman: {0}")] + WatchmanConnectError(watchman_client::Error), + + #[error("Could not canonicalize working copy root path: {0}")] + CanonicalizeRootError(std::io::Error), + + #[error("Watchman failed to resolve the working copy root path: {0}")] + ResolveRootError(watchman_client::Error), + + #[error("Failed to query Watchman: {0}")] + WatchmanQueryError(watchman_client::Error), + } + + /// Handle to the underlying Watchman instance. + pub struct Fsmonitor { + client: watchman_client::Client, + resolved_root: watchman_client::ResolvedRoot, + } + + impl Fsmonitor { + /// Initialize the Watchman filesystem monitor. If it's not already + /// running, this will start it and have it crawl the working + /// copy to build up its in-memory representation of the + /// filesystem, which may take some time. + pub async fn init(working_copy_path: &Path) -> Result { + info!("Initializing Watchman filesystem monitor..."); + let connector = watchman_client::Connector::new(); + let client = connector + .connect() + .await + .map_err(Error::WatchmanConnectError)?; + let working_copy_root = watchman_client::CanonicalPath::canonicalize(working_copy_path) + .map_err(Error::CanonicalizeRootError)?; + let resolved_root = client + .resolve_root(working_copy_root) + .await + .map_err(Error::ResolveRootError)?; + Ok(Fsmonitor { + client, + resolved_root, + }) + } + + /// Query for changed files since the previous point in time. + /// + /// The returned list of paths is absolute. If it is `None`, then the + /// caller must crawl the entire working copy themselves. + pub async fn query_changed_files( + &self, + previous_clock: Option, + ) -> Result<(Clock, Option>), Error> { + info!("Querying Watchman for changed files..."); + let QueryResult { + version: _, + is_fresh_instance, + files, + clock, + state_enter: _, + state_leave: _, + state_metadata: _, + saved_state_info: _, + debug: _, + }: QueryResult = self + .client + .query( + &self.resolved_root, + QueryRequestCommon { + since: previous_clock.map(|Clock(clock)| clock), + ..Default::default() + }, + ) + .await + .map_err(Error::WatchmanQueryError)?; + + let clock = Clock(clock); + if is_fresh_instance { + // The Watchman documentation states that if it was a fresh + // instance, we need to delete any tree entries that didn't appear + // in the returned list of changed files. For now, the caller will + // handle this by manually crawling the working copy again. + Ok((clock, None)) + } else { + let paths = files + .unwrap_or_default() + .into_iter() + .map(|file_info| { + let NameOnly { name } = file_info; + self.resolved_root.path().join(name.into_inner()) + }) + .collect_vec(); + Ok((clock, Some(paths))) + } + } + } +} diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 70984f38c..da1526e37 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -29,6 +29,7 @@ pub mod default_submodule_store; pub mod diff; pub mod file_util; pub mod files; +pub mod fsmonitor; pub mod git; pub mod git_backend; pub mod gitignore; diff --git a/lib/src/protos/working_copy.proto b/lib/src/protos/working_copy.proto index e2fe5e7f2..1f2b27be0 100644 --- a/lib/src/protos/working_copy.proto +++ b/lib/src/protos/working_copy.proto @@ -40,6 +40,14 @@ message TreeState { bytes tree_id = 1; map file_states = 2; SparsePatterns sparse_patterns = 3; + WatchmanClock watchman_clock = 4; +} + +message WatchmanClock { + oneof watchman_clock { + string string_clock = 1; + int64 unix_timestamp = 2; + } } message Checkout { diff --git a/lib/src/protos/working_copy.rs b/lib/src/protos/working_copy.rs index 7d770a99c..8bfcda93f 100644 --- a/lib/src/protos/working_copy.rs +++ b/lib/src/protos/working_copy.rs @@ -30,6 +30,25 @@ pub struct TreeState { >, #[prost(message, optional, tag = "3")] pub sparse_patterns: ::core::option::Option, + #[prost(message, optional, tag = "4")] + pub watchman_clock: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct WatchmanClock { + #[prost(oneof = "watchman_clock::WatchmanClock", tags = "1, 2")] + pub watchman_clock: ::core::option::Option, +} +/// Nested message and enum types in `WatchmanClock`. +pub mod watchman_clock { + #[allow(clippy::derive_partial_eq_without_eq)] + #[derive(Clone, PartialEq, ::prost::Oneof)] + pub enum WatchmanClock { + #[prost(string, tag = "1")] + StringClock(::prost::alloc::string::String), + #[prost(int64, tag = "2")] + UnixTimestamp(i64), + } } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/lib/src/settings.rs b/lib/src/settings.rs index 02658093c..e10b5a753 100644 --- a/lib/src/settings.rs +++ b/lib/src/settings.rs @@ -20,6 +20,7 @@ use rand::prelude::*; use rand_chacha::ChaCha20Rng; use crate::backend::{ChangeId, ObjectId, Signature, Timestamp}; +use crate::fsmonitor::FsmonitorKind; #[derive(Debug, Clone)] pub struct UserSettings { @@ -109,6 +110,14 @@ impl UserSettings { .unwrap_or_else(|_| Self::user_email_placeholder().to_string()) } + pub fn fsmonitor_kind(&self) -> Result, config::ConfigError> { + match self.config.get_string("core.fsmonitor") { + Ok(fsmonitor_kind) => Ok(Some(fsmonitor_kind.parse()?)), + Err(config::ConfigError::NotFound(_)) => Ok(None), + Err(err) => Err(err), + } + } + pub fn user_email_placeholder() -> &'static str { "(no email configured)" } diff --git a/lib/src/working_copy.rs b/lib/src/working_copy.rs index 607a2c9c6..f8f6d14b0 100644 --- a/lib/src/working_copy.rs +++ b/lib/src/working_copy.rs @@ -26,6 +26,7 @@ use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::UNIX_EPOCH; +use itertools::Itertools; use once_cell::unsync::OnceCell; use prost::Message; use tempfile::NamedTempFile; @@ -34,11 +35,16 @@ use thiserror::Error; use crate::backend::{ BackendError, ConflictId, FileId, MillisSinceEpoch, ObjectId, SymlinkId, TreeId, TreeValue, }; +#[cfg(feature = "watchman")] +use crate::fsmonitor::watchman; +use crate::fsmonitor::FsmonitorKind; use crate::gitignore::GitIgnoreFile; use crate::lock::FileLock; -use crate::matchers::{DifferenceMatcher, Matcher, PrefixMatcher}; +use crate::matchers::{ + DifferenceMatcher, EverythingMatcher, IntersectionMatcher, Matcher, PrefixMatcher, +}; use crate::op_store::{OperationId, WorkspaceId}; -use crate::repo_path::{RepoPath, RepoPathComponent, RepoPathJoin}; +use crate::repo_path::{FsPathParseError, RepoPath, RepoPathComponent, RepoPathJoin}; use crate::store::Store; use crate::tree::{Diff, Tree}; use crate::tree_builder::TreeBuilder; @@ -122,6 +128,11 @@ pub struct TreeState { // Currently only path prefixes sparse_patterns: Vec, own_mtime: MillisSinceEpoch, + + /// The most recent clock value returned by Watchman. Will only be set if + /// the repo is configured to use the Watchman filesystem monitor and + /// Watchman has been queried at least once. + watchman_clock: Option, } fn file_state_from_proto(proto: crate::protos::working_copy::FileState) -> FileState { @@ -270,6 +281,10 @@ pub struct CheckoutStats { #[derive(Debug, Error)] pub enum SnapshotError { + #[error("Failed to open file {path}: {err:?}")] + FileOpenError { path: PathBuf, err: std::io::Error }, + #[error("Failed to query the filesystem monitor: {0}")] + FsmonitorError(String), #[error("{message}: {err}")] IoError { message: String, @@ -326,18 +341,25 @@ fn suppress_file_exists_error(orig_err: CheckoutError) -> Result<(), CheckoutErr pub struct SnapshotOptions<'a> { pub base_ignores: Arc, + pub fsmonitor_kind: Option, pub progress: Option<&'a SnapshotProgress<'a>>, } -impl<'a> SnapshotOptions<'a> { +impl SnapshotOptions<'_> { pub fn empty_for_test() -> Self { SnapshotOptions { base_ignores: GitIgnoreFile::empty(), + fsmonitor_kind: None, progress: None, } } } +struct FsmonitorMatcher { + matcher: Option>, + watchman_clock: Option, +} + #[derive(Debug, Error)] pub enum ResetError { // The current working-copy commit was deleted, maybe by an overly aggressive GC that happened @@ -385,6 +407,7 @@ impl TreeState { file_states: BTreeMap::new(), sparse_patterns: vec![RepoPath::root()], own_mtime: MillisSinceEpoch(0), + watchman_clock: None, } } @@ -418,6 +441,7 @@ impl TreeState { self.tree_id = TreeId::new(proto.tree_id.clone()); self.file_states = file_states_from_proto(&proto); self.sparse_patterns = sparse_patterns_from_proto(&proto); + self.watchman_clock = proto.watchman_clock; } fn save(&mut self) { @@ -438,6 +462,7 @@ impl TreeState { .push(path.to_internal_file_string()); } proto.sparse_patterns = Some(sparse_patterns); + proto.watchman_clock = self.watchman_clock.clone(); let mut temp_file = NamedTempFile::new_in(&self.state_path).unwrap(); temp_file @@ -487,11 +512,22 @@ impl TreeState { Ok(self.store.write_symlink(path, str_target)?) } + #[cfg(feature = "watchman")] + #[tokio::main] + async fn query_watchman( + &self, + ) -> Result<(watchman::Clock, Option>), watchman::Error> { + let fsmonitor = watchman::Fsmonitor::init(&self.working_copy_path).await?; + let previous_clock = self.watchman_clock.clone().map(watchman::Clock::from); + fsmonitor.query_changed_files(previous_clock).await + } + /// Look for changes to the working copy. If there are any changes, create - /// a new tree from it. + /// a new tree from it and return it, and also update the dirstate on disk. pub fn snapshot(&mut self, options: SnapshotOptions) -> Result { let SnapshotOptions { base_ignores, + fsmonitor_kind, progress, } = options; @@ -506,6 +542,19 @@ impl TreeState { }) .collect(); + let fsmonitor_clock_needs_save = fsmonitor_kind.is_some(); + let FsmonitorMatcher { + matcher: fsmonitor_matcher, + watchman_clock, + } = self.make_fsmonitor_matcher(fsmonitor_kind, &mut deleted_files)?; + + let matcher = IntersectionMatcher::new( + sparse_matcher.as_ref(), + match fsmonitor_matcher.as_ref() { + None => &EverythingMatcher, + Some(fsmonitor_matcher) => fsmonitor_matcher.as_ref(), + }, + ); struct WorkItem { dir: RepoPath, disk_dir: PathBuf, @@ -522,7 +571,7 @@ impl TreeState { git_ignore, }) = work.pop() { - if sparse_matcher.visit(&dir).is_nothing() { + if matcher.visit(&dir).is_nothing() { continue; } let git_ignore = git_ignore @@ -554,6 +603,7 @@ impl TreeState { { continue; } + work.push(WorkItem { dir: sub_path, disk_dir: entry.path(), @@ -561,7 +611,7 @@ impl TreeState { }); } else { deleted_files.remove(&sub_path); - if sparse_matcher.matches(&sub_path) { + if matcher.matches(&sub_path) { if let Some(progress) = progress { progress(&sub_path); } @@ -581,9 +631,64 @@ impl TreeState { self.file_states.remove(file); tree_builder.remove(file.clone()); } - let changed = tree_builder.has_overrides(); + let has_changes = tree_builder.has_overrides(); self.tree_id = tree_builder.write_tree(); - Ok(changed) + self.watchman_clock = watchman_clock; + Ok(has_changes || fsmonitor_clock_needs_save) + } + + fn make_fsmonitor_matcher( + &mut self, + fsmonitor_kind: Option, + deleted_files: &mut HashSet, + ) -> Result { + let (watchman_clock, changed_files) = match fsmonitor_kind { + None => (None, None), + Some(FsmonitorKind::Test { changed_files }) => (None, Some(changed_files)), + #[cfg(feature = "watchman")] + Some(FsmonitorKind::Watchman) => match self.query_watchman() { + Ok((watchman_clock, changed_files)) => (Some(watchman_clock.into()), changed_files), + Err(err) => { + tracing::warn!(?err, "Failed to query filesystem monitor"); + (None, None) + } + }, + #[cfg(not(feature = "watchman"))] + Some(FsmonitorKind::Watchman) => { + return Err(SnapshotError::FsmonitorError( + "Cannot query Watchman because jj was not compiled with the `watchman` \ + feature (consider disabling `core.fsmonitor`)" + .to_string(), + )); + } + }; + let matcher: Option> = match changed_files { + None => None, + Some(changed_files) => { + let repo_paths = changed_files + .into_iter() + .filter_map(|path| { + match RepoPath::parse_fs_path( + &self.working_copy_path, + &self.working_copy_path, + path, + ) { + Ok(repo_path) => Some(repo_path), + Err(FsPathParseError::InputNotInRepo(_)) => None, + } + }) + .collect_vec(); + + let repo_path_set: HashSet<_> = repo_paths.iter().collect(); + deleted_files.retain(|path| repo_path_set.contains(path)); + + Some(Box::new(PrefixMatcher::new(&repo_paths))) + } + }; + Ok(FsmonitorMatcher { + matcher, + watchman_clock, + }) } fn has_files_under(&self, dir: &RepoPath) -> bool { diff --git a/lib/tests/test_working_copy.rs b/lib/tests/test_working_copy.rs index 5a64b499d..1039e28b5 100644 --- a/lib/tests/test_working_copy.rs +++ b/lib/tests/test_working_copy.rs @@ -23,6 +23,7 @@ use std::sync::Arc; use itertools::Itertools; use jujutsu_lib::backend::{TreeId, TreeValue}; use jujutsu_lib::conflicts::Conflict; +use jujutsu_lib::fsmonitor::FsmonitorKind; #[cfg(unix)] use jujutsu_lib::op_store::OperationId; use jujutsu_lib::op_store::WorkspaceId; @@ -30,7 +31,7 @@ use jujutsu_lib::repo::{ReadonlyRepo, Repo}; use jujutsu_lib::repo_path::{RepoPath, RepoPathComponent, RepoPathJoin}; use jujutsu_lib::settings::UserSettings; use jujutsu_lib::tree_builder::TreeBuilder; -use jujutsu_lib::working_copy::{SnapshotOptions, WorkingCopy}; +use jujutsu_lib::working_copy::{LockedWorkingCopy, SnapshotOptions, WorkingCopy}; use test_case::test_case; use testutils::{write_random_commit, TestWorkspace}; @@ -821,3 +822,99 @@ fn test_existing_directory_symlink(use_git: bool) { // Therefore, "../escaped" shouldn't be created. assert!(!workspace_root.parent().unwrap().join("escaped").exists()); } + +#[test] +fn test_fsmonitor() { + let settings = testutils::user_settings(); + let mut test_workspace = TestWorkspace::init(&settings, true); + let repo = &test_workspace.repo; + let workspace_root = test_workspace.workspace.workspace_root().clone(); + + let wc = test_workspace.workspace.working_copy_mut(); + assert_eq!(wc.sparse_patterns(), vec![RepoPath::root()]); + + let foo_path = RepoPath::from_internal_string("foo"); + let bar_path = RepoPath::from_internal_string("bar"); + let nested_path = RepoPath::from_internal_string("path/to/nested"); + testutils::write_working_copy_file(&workspace_root, &foo_path, "foo\n"); + testutils::write_working_copy_file(&workspace_root, &bar_path, "bar\n"); + testutils::write_working_copy_file(&workspace_root, &nested_path, "nested\n"); + + let ignored_path = RepoPath::from_internal_string("path/to/ignored"); + let gitignore_path = RepoPath::from_internal_string("path/.gitignore"); + testutils::write_working_copy_file(&workspace_root, &ignored_path, "ignored\n"); + testutils::write_working_copy_file(&workspace_root, &gitignore_path, "to/ignored\n"); + + let snapshot = |locked_wc: &mut LockedWorkingCopy, paths: &[&RepoPath]| { + let fs_paths = paths + .iter() + .map(|p| p.to_fs_path(&workspace_root)) + .collect(); + locked_wc + .snapshot(SnapshotOptions { + fsmonitor_kind: Some(FsmonitorKind::Test { + changed_files: fs_paths, + }), + ..SnapshotOptions::empty_for_test() + }) + .unwrap() + }; + + { + let mut locked_wc = wc.start_mutation(); + let tree_id = snapshot(&mut locked_wc, &[]); + assert_eq!(tree_id, *repo.store().empty_tree_id()); + locked_wc.discard(); + } + + { + let mut locked_wc = wc.start_mutation(); + let tree_id = snapshot(&mut locked_wc, &[&foo_path]); + insta::assert_snapshot!(testutils::dump_tree(repo.store(), &tree_id), @r###" + tree 205f6b799e7d5c2524468ca006a0131aa57ecce7 + file "foo" (257cc5642cb1a054f08cc83f2d943e56fd3ebe99): "foo\n" + "###); + locked_wc.discard(); + } + + { + let mut locked_wc = wc.start_mutation(); + let tree_id = snapshot( + &mut locked_wc, + &[&foo_path, &bar_path, &nested_path, &ignored_path], + ); + insta::assert_snapshot!(testutils::dump_tree(repo.store(), &tree_id), @r###" + tree ab5a0465cc71725a723f28b685844a5bc0f5b599 + file "bar" (5716ca5987cbf97d6bb54920bea6adde242d87e6): "bar\n" + file "foo" (257cc5642cb1a054f08cc83f2d943e56fd3ebe99): "foo\n" + file "path/to/nested" (79c53955ef856f16f2107446bc721c8879a1bd2e): "nested\n" + "###); + locked_wc.finish(repo.op_id().clone()); + } + + { + testutils::write_working_copy_file(&workspace_root, &foo_path, "updated foo\n"); + testutils::write_working_copy_file(&workspace_root, &bar_path, "updated bar\n"); + let mut locked_wc = wc.start_mutation(); + let tree_id = snapshot(&mut locked_wc, &[&foo_path]); + insta::assert_snapshot!(testutils::dump_tree(repo.store(), &tree_id), @r###" + tree 2f57ab8f48ae62e3137079f2add9878dfa1d1bcc + file "bar" (5716ca5987cbf97d6bb54920bea6adde242d87e6): "bar\n" + file "foo" (9d053d7c8a18a286dce9b99a59bb058be173b463): "updated foo\n" + file "path/to/nested" (79c53955ef856f16f2107446bc721c8879a1bd2e): "nested\n" + "###); + locked_wc.discard(); + } + + { + std::fs::remove_file(foo_path.to_fs_path(&workspace_root)).unwrap(); + let mut locked_wc = wc.start_mutation(); + let tree_id = snapshot(&mut locked_wc, &[&foo_path]); + insta::assert_snapshot!(testutils::dump_tree(repo.store(), &tree_id), @r###" + tree 34b83765131477e1a7d72160079daec12c6144e3 + file "bar" (5716ca5987cbf97d6bb54920bea6adde242d87e6): "bar\n" + file "path/to/nested" (79c53955ef856f16f2107446bc721c8879a1bd2e): "nested\n" + "###); + locked_wc.finish(repo.op_id().clone()); + } +} diff --git a/lib/testutils/src/lib.rs b/lib/testutils/src/lib.rs index aecde60e6..3441a83c6 100644 --- a/lib/testutils/src/lib.rs +++ b/lib/testutils/src/lib.rs @@ -12,14 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::fs; -use std::fs::OpenOptions; +use std::fs::{self, OpenOptions}; use std::io::{Read, Write}; use std::path::{Path, PathBuf}; use std::sync::{Arc, Once}; use itertools::Itertools; -use jujutsu_lib::backend::{Backend, BackendInitError, FileId, TreeId, TreeValue}; +use jujutsu_lib::backend::{Backend, BackendInitError, FileId, ObjectId, TreeId, TreeValue}; use jujutsu_lib::commit::Commit; use jujutsu_lib::commit_builder::CommitBuilder; use jujutsu_lib::git_backend::GitBackend; @@ -240,16 +239,54 @@ pub fn create_random_commit<'repo>( .set_description(format!("random commit {number}")) } +pub fn dump_tree(store: &Arc, tree_id: &TreeId) -> String { + use std::fmt::Write; + let mut buf = String::new(); + writeln!(&mut buf, "tree {}", tree_id.hex()).unwrap(); + let tree = store.get_tree(&RepoPath::root(), tree_id).unwrap(); + for (path, value) in tree.entries() { + match value { + TreeValue::File { id, executable: _ } => { + let file_buf = read_file(store, &path, &id); + let file_contents = String::from_utf8_lossy(&file_buf); + writeln!( + &mut buf, + " file {path:?} ({}): {file_contents:?}", + id.hex() + ) + .unwrap(); + } + TreeValue::Symlink(id) => { + writeln!(&mut buf, " symlink {path:?} ({})", id.hex()).unwrap(); + } + TreeValue::Conflict(id) => { + writeln!(&mut buf, " conflict {path:?} ({})", id.hex()).unwrap(); + } + TreeValue::GitSubmodule(id) => { + writeln!(&mut buf, " submodule {path:?} ({})", id.hex()).unwrap(); + } + entry => { + unimplemented!("dumping tree entry {entry:?}"); + } + } + } + buf +} + pub fn write_random_commit(mut_repo: &mut MutableRepo, settings: &UserSettings) -> Commit { create_random_commit(mut_repo, settings).write().unwrap() } pub fn write_working_copy_file(workspace_root: &Path, path: &RepoPath, contents: &str) { + let path = path.to_fs_path(workspace_root); + if let Some(parent) = path.parent() { + fs::create_dir_all(parent).unwrap(); + } let mut file = OpenOptions::new() .write(true) .create(true) .truncate(true) - .open(path.to_fs_path(workspace_root)) + .open(path) .unwrap(); file.write_all(contents.as_bytes()).unwrap(); } diff --git a/src/cli_util.rs b/src/cli_util.rs index 9b4175856..27e07a644 100644 --- a/src/cli_util.rs +++ b/src/cli_util.rs @@ -1093,6 +1093,7 @@ See https://github.com/martinvonz/jj/blob/main/docs/working-copy.md#stale-workin let progress = crate::progress::snapshot_progress(ui); let new_tree_id = locked_wc.snapshot(SnapshotOptions { base_ignores, + fsmonitor_kind: self.settings.fsmonitor_kind()?, progress: progress.as_ref().map(|x| x as _), })?; drop(progress); diff --git a/src/commands/mod.rs b/src/commands/mod.rs index 23986e7c2..bd624ce81 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -1311,6 +1311,7 @@ fn cmd_untrack( // untracked because they're not ignored. let wc_tree_id = locked_working_copy.snapshot(SnapshotOptions { base_ignores, + fsmonitor_kind: command.settings().fsmonitor_kind()?, progress: None, })?; if wc_tree_id != new_tree_id { diff --git a/src/merge_tools.rs b/src/merge_tools.rs index b50136b0b..997253c85 100644 --- a/src/merge_tools.rs +++ b/src/merge_tools.rs @@ -77,6 +77,8 @@ pub enum DiffEditError { CheckoutError(#[from] CheckoutError), #[error("Failed to snapshot changes: {0:?}")] SnapshotError(#[from] SnapshotError), + #[error(transparent)] + ConfigError(#[from] config::ConfigError), } #[derive(Debug, Error)] @@ -368,6 +370,7 @@ pub fn edit_diff( right_tree_state.snapshot(SnapshotOptions { base_ignores, + fsmonitor_kind: settings.fsmonitor_kind()?, progress: None, })?; Ok(right_tree_state.current_tree_id().clone())