Retrieve git statuses in one batch when scanning dirs

This commit is contained in:
Max Brunsfeld 2023-07-20 13:22:36 -07:00
parent 4557adf693
commit 4bd415f2b6
3 changed files with 152 additions and 178 deletions

View file

@ -25,24 +25,13 @@ pub struct Branch {
#[async_trait::async_trait]
pub trait GitRepository: Send {
fn reload_index(&self);
fn load_index_text(&self, relative_file_path: &Path) -> Option<String>;
fn branch_name(&self) -> Option<String>;
fn statuses(&self) -> Option<TreeMap<RepoPath, GitFileStatus>>;
fn statuses(&self) -> TreeMap<RepoPath, GitFileStatus>;
fn status(&self, path: &RepoPath) -> Result<Option<GitFileStatus>>;
fn branches(&self) -> Result<Vec<Branch>> {
Ok(vec![])
}
fn change_branch(&self, _: &str) -> Result<()> {
Ok(())
}
fn create_branch(&self, _: &str) -> Result<()> {
Ok(())
}
fn branches(&self) -> Result<Vec<Branch>>;
fn change_branch(&self, _: &str) -> Result<()>;
fn create_branch(&self, _: &str) -> Result<()>;
}
impl std::fmt::Debug for dyn GitRepository {
@ -89,24 +78,22 @@ impl GitRepository for LibGitRepository {
Some(branch.to_string())
}
fn statuses(&self) -> Option<TreeMap<RepoPath, GitFileStatus>> {
let statuses = self.statuses(None).log_err()?;
fn statuses(&self) -> TreeMap<RepoPath, GitFileStatus> {
let mut map = TreeMap::default();
if let Some(statuses) = self.statuses(None).log_err() {
for status in statuses
.iter()
.filter(|status| !status.status().contains(git2::Status::IGNORED))
{
let path = RepoPath(PathBuf::from(OsStr::from_bytes(status.path_bytes())));
let Some(status) = read_status(status.status()) else {
continue
};
for status in statuses
.iter()
.filter(|status| !status.status().contains(git2::Status::IGNORED))
{
let path = RepoPath(PathBuf::from(OsStr::from_bytes(status.path_bytes())));
let Some(status) = read_status(status.status()) else {
continue
};
map.insert(path, status)
map.insert(path, status)
}
}
Some(map)
map
}
fn status(&self, path: &RepoPath) -> Result<Option<GitFileStatus>> {
@ -213,19 +200,35 @@ impl GitRepository for FakeGitRepository {
state.branch_name.clone()
}
fn statuses(&self) -> Option<TreeMap<RepoPath, GitFileStatus>> {
let state = self.state.lock();
fn statuses(&self) -> TreeMap<RepoPath, GitFileStatus> {
let mut map = TreeMap::default();
let state = self.state.lock();
for (repo_path, status) in state.worktree_statuses.iter() {
map.insert(repo_path.to_owned(), status.to_owned());
}
Some(map)
map
}
fn status(&self, path: &RepoPath) -> Result<Option<GitFileStatus>> {
let state = self.state.lock();
Ok(state.worktree_statuses.get(path).cloned())
}
fn branches(&self) -> Result<Vec<Branch>> {
Ok(vec![])
}
fn change_branch(&self, name: &str) -> Result<()> {
let mut state = self.state.lock();
state.branch_name = Some(name.to_owned());
Ok(())
}
fn create_branch(&self, name: &str) -> Result<()> {
let mut state = self.state.lock();
state.branch_name = Some(name.to_owned());
Ok(())
}
}
fn check_path_to_repo_path_errors(relative_file_path: &Path) -> Result<()> {

View file

@ -2015,37 +2015,6 @@ impl LocalSnapshot {
entry
}
#[must_use = "Changed paths must be used for diffing later"]
fn scan_statuses(
&mut self,
repo_ptr: &dyn GitRepository,
work_directory: &RepositoryWorkDirectory,
) -> Vec<Arc<Path>> {
let mut changes = vec![];
let mut edits = vec![];
let statuses = repo_ptr.statuses();
for mut entry in self
.descendent_entries(false, false, &work_directory.0)
.cloned()
{
let Ok(repo_path) = entry.path.strip_prefix(&work_directory.0) else {
continue;
};
let repo_path = RepoPath(repo_path.to_path_buf());
let git_file_status = statuses.as_ref().and_then(|s| s.get(&repo_path).copied());
if entry.git_status != git_file_status {
entry.git_status = git_file_status;
changes.push(entry.path.clone());
edits.push(Edit::Insert(entry));
}
}
self.entries_by_path.edit(edits, &());
changes
}
fn ancestor_inodes_for_path(&self, path: &Path) -> TreeSet<u64> {
let mut inodes = TreeSet::default();
for ancestor in path.ancestors().skip(1) {
@ -2189,6 +2158,30 @@ impl BackgroundScannerState {
.any(|p| entry.path.starts_with(p))
}
fn enqueue_scan_dir(&self, abs_path: Arc<Path>, entry: &Entry, scan_job_tx: &Sender<ScanJob>) {
let path = entry.path.clone();
let ignore_stack = self.snapshot.ignore_stack_for_abs_path(&abs_path, true);
let mut ancestor_inodes = self.snapshot.ancestor_inodes_for_path(&path);
let containing_repository = self
.snapshot
.local_repo_for_path(&path)
.map(|(path, repo)| (path, repo.repo_ptr.lock().statuses()));
if !ancestor_inodes.contains(&entry.inode) {
ancestor_inodes.insert(entry.inode);
scan_job_tx
.try_send(ScanJob {
abs_path,
path,
ignore_stack,
scan_queue: scan_job_tx.clone(),
ancestor_inodes,
is_external: entry.is_external,
containing_repository,
})
.unwrap();
}
}
fn reuse_entry_id(&mut self, entry: &mut Entry) {
if let Some(removed_entry_id) = self.removed_entry_ids.remove(&entry.inode) {
entry.id = removed_entry_id;
@ -2201,7 +2194,7 @@ impl BackgroundScannerState {
self.reuse_entry_id(&mut entry);
let entry = self.snapshot.insert_entry(entry, fs);
if entry.path.file_name() == Some(&DOT_GIT) {
self.build_repository(entry.path.clone(), fs);
self.build_git_repository(entry.path.clone(), fs);
}
#[cfg(test)]
@ -2215,7 +2208,6 @@ impl BackgroundScannerState {
parent_path: &Arc<Path>,
entries: impl IntoIterator<Item = Entry>,
ignore: Option<Arc<Gitignore>>,
fs: &dyn Fs,
) {
let mut parent_entry = if let Some(parent_entry) = self
.snapshot
@ -2244,16 +2236,12 @@ impl BackgroundScannerState {
.insert(abs_parent_path, (ignore, false));
}
self.scanned_dirs.insert(parent_entry.id);
let parent_entry_id = parent_entry.id;
self.scanned_dirs.insert(parent_entry_id);
let mut entries_by_path_edits = vec![Edit::Insert(parent_entry)];
let mut entries_by_id_edits = Vec::new();
let mut dotgit_path = None;
for entry in entries {
if entry.path.file_name() == Some(&DOT_GIT) {
dotgit_path = Some(entry.path.clone());
}
entries_by_id_edits.push(Edit::Insert(PathEntry {
id: entry.id,
path: entry.path.clone(),
@ -2268,9 +2256,6 @@ impl BackgroundScannerState {
.edit(entries_by_path_edits, &());
self.snapshot.entries_by_id.edit(entries_by_id_edits, &());
if let Some(dotgit_path) = dotgit_path {
self.build_repository(dotgit_path, fs);
}
if let Err(ix) = self.changed_paths.binary_search(parent_path) {
self.changed_paths.insert(ix, parent_path.clone());
}
@ -2346,7 +2331,7 @@ impl BackgroundScannerState {
});
match repository {
None => {
self.build_repository(dot_git_dir.into(), fs);
self.build_git_repository(dot_git_dir.into(), fs);
}
Some((entry_id, repository)) => {
if repository.git_dir_scan_id == scan_id {
@ -2370,13 +2355,8 @@ impl BackgroundScannerState {
.repository_entries
.update(&work_dir, |entry| entry.branch = branch.map(Into::into));
let changed_paths = self.snapshot.scan_statuses(&*repository, &work_dir);
util::extend_sorted(
&mut self.changed_paths,
changed_paths,
usize::MAX,
Ord::cmp,
)
let statuses = repository.statuses();
self.update_git_statuses(&work_dir, &statuses);
}
}
}
@ -2397,7 +2377,11 @@ impl BackgroundScannerState {
snapshot.repository_entries = repository_entries;
}
fn build_repository(&mut self, dot_git_path: Arc<Path>, fs: &dyn Fs) -> Option<()> {
fn build_git_repository(
&mut self,
dot_git_path: Arc<Path>,
fs: &dyn Fs,
) -> Option<(RepositoryWorkDirectory, TreeMap<RepoPath, GitFileStatus>)> {
log::info!("build git repository {:?}", dot_git_path);
let work_dir_path: Arc<Path> = dot_git_path.parent().unwrap().into();
@ -2429,9 +2413,8 @@ impl BackgroundScannerState {
},
);
let changed_paths = self
.snapshot
.scan_statuses(repo_lock.deref(), &work_directory);
let statuses = repo_lock.statuses();
self.update_git_statuses(&work_directory, &statuses);
drop(repo_lock);
self.snapshot.git_repositories.insert(
@ -2443,8 +2426,36 @@ impl BackgroundScannerState {
},
);
util::extend_sorted(&mut self.changed_paths, changed_paths, usize::MAX, Ord::cmp);
Some(())
Some((work_directory, statuses))
}
fn update_git_statuses(
&mut self,
work_directory: &RepositoryWorkDirectory,
statuses: &TreeMap<RepoPath, GitFileStatus>,
) {
let mut changes = vec![];
let mut edits = vec![];
for mut entry in self
.snapshot
.descendent_entries(false, false, &work_directory.0)
.cloned()
{
let Ok(repo_path) = entry.path.strip_prefix(&work_directory.0) else {
continue;
};
let repo_path = RepoPath(repo_path.to_path_buf());
let git_file_status = statuses.get(&repo_path).copied();
if entry.git_status != git_file_status {
entry.git_status = git_file_status;
changes.push(entry.path.clone());
edits.push(Edit::Insert(entry));
}
}
self.snapshot.entries_by_path.edit(edits, &());
util::extend_sorted(&mut self.changed_paths, changes, usize::MAX, Ord::cmp);
}
}
@ -3031,16 +3042,8 @@ impl BackgroundScanner {
) {
use futures::FutureExt as _;
let (root_abs_path, root_inode) = {
let snapshot = &self.state.lock().snapshot;
(
snapshot.abs_path.clone(),
snapshot.root_entry().map(|e| e.inode),
)
};
// Populate ignores above the root.
let ignore_stack;
let root_abs_path = self.state.lock().snapshot.abs_path.clone();
for ancestor in root_abs_path.ancestors().skip(1) {
if let Ok(ignore) = build_gitignore(&ancestor.join(&*GITIGNORE), self.fs.as_ref()).await
{
@ -3051,31 +3054,24 @@ impl BackgroundScanner {
.insert(ancestor.into(), (ignore.into(), false));
}
}
let (scan_job_tx, scan_job_rx) = channel::unbounded();
{
let mut state = self.state.lock();
state.snapshot.scan_id += 1;
ignore_stack = state
.snapshot
.ignore_stack_for_abs_path(&root_abs_path, true);
if ignore_stack.is_all() {
if let Some(mut root_entry) = state.snapshot.root_entry().cloned() {
if let Some(mut root_entry) = state.snapshot.root_entry().cloned() {
let ignore_stack = state
.snapshot
.ignore_stack_for_abs_path(&root_abs_path, true);
if ignore_stack.is_all() {
root_entry.is_ignored = true;
state.insert_entry(root_entry, self.fs.as_ref());
state.insert_entry(root_entry.clone(), self.fs.as_ref());
}
state.enqueue_scan_dir(root_abs_path, &root_entry, &scan_job_tx);
}
};
// Perform an initial scan of the directory.
let (scan_job_tx, scan_job_rx) = channel::unbounded();
smol::block_on(scan_job_tx.send(ScanJob {
abs_path: root_abs_path,
path: Arc::from(Path::new("")),
ignore_stack,
ancestor_inodes: TreeSet::from_ordered_entries(root_inode),
is_external: false,
scan_queue: scan_job_tx.clone(),
}))
.unwrap();
drop(scan_job_tx);
self.scan_dirs(true, scan_job_rx).await;
{
@ -3263,20 +3259,7 @@ impl BackgroundScanner {
if let Some(entry) = state.snapshot.entry_for_path(ancestor) {
if entry.kind == EntryKind::UnloadedDir {
let abs_path = root_path.join(ancestor);
let ignore_stack =
state.snapshot.ignore_stack_for_abs_path(&abs_path, true);
let ancestor_inodes =
state.snapshot.ancestor_inodes_for_path(&ancestor);
scan_job_tx
.try_send(ScanJob {
abs_path: abs_path.into(),
path: ancestor.into(),
ignore_stack,
scan_queue: scan_job_tx.clone(),
ancestor_inodes,
is_external: entry.is_external,
})
.unwrap();
state.enqueue_scan_dir(abs_path.into(), entry, &scan_job_tx);
state.paths_to_scan.insert(path.clone());
break;
}
@ -3391,18 +3374,16 @@ impl BackgroundScanner {
let mut ignore_stack = job.ignore_stack.clone();
let mut new_ignore = None;
let (root_abs_path, root_char_bag, next_entry_id, repository) = {
let (root_abs_path, root_char_bag, next_entry_id) = {
let snapshot = &self.state.lock().snapshot;
(
snapshot.abs_path().clone(),
snapshot.root_char_bag,
self.next_entry_id.clone(),
snapshot
.local_repo_for_path(&job.path)
.map(|(work_dir, repo)| (work_dir, repo.clone())),
)
};
let mut dotgit_path = None;
let mut root_canonical_path = None;
let mut new_entries: Vec<Entry> = Vec::new();
let mut new_jobs: Vec<Option<ScanJob>> = Vec::new();
@ -3465,6 +3446,10 @@ impl BackgroundScanner {
}
}
}
// If we find a .git, we'll need to load the repository.
else if child_name == *DOT_GIT {
dotgit_path = Some(child_path.clone());
}
let mut child_entry = Entry::new(
child_path.clone(),
@ -3525,22 +3510,17 @@ impl BackgroundScanner {
},
ancestor_inodes,
scan_queue: job.scan_queue.clone(),
containing_repository: job.containing_repository.clone(),
}));
} else {
new_jobs.push(None);
}
} else {
child_entry.is_ignored = ignore_stack.is_abs_path_ignored(&child_abs_path, false);
if !child_entry.is_ignored {
if let Some((repo_path, repo)) = &repository {
if let Ok(path) = child_path.strip_prefix(&repo_path.0) {
child_entry.git_status = repo
.repo_ptr
.lock()
.status(&RepoPath(path.into()))
.log_err()
.flatten();
}
if let Some((repository_dir, statuses)) = &job.containing_repository {
if let Ok(repo_path) = child_entry.path.strip_prefix(&repository_dir.0) {
child_entry.git_status = statuses.get(&RepoPath(repo_path.into())).copied();
}
}
}
@ -3549,27 +3529,39 @@ impl BackgroundScanner {
}
let mut state = self.state.lock();
let mut new_jobs = new_jobs.into_iter();
// Identify any subdirectories that should not be scanned.
let mut job_ix = 0;
for entry in &mut new_entries {
state.reuse_entry_id(entry);
if entry.is_dir() {
let new_job = new_jobs.next().expect("missing scan job for entry");
if state.should_scan_directory(&entry) {
if let Some(new_job) = new_job {
job.scan_queue
.try_send(new_job)
.expect("channel is unbounded");
}
job_ix += 1;
} else {
log::debug!("defer scanning directory {:?}", entry.path);
entry.kind = EntryKind::UnloadedDir;
new_jobs.remove(job_ix);
}
}
}
assert!(new_jobs.next().is_none());
state.populate_dir(&job.path, new_entries, new_ignore, self.fs.as_ref());
state.populate_dir(&job.path, new_entries, new_ignore);
let repository =
dotgit_path.and_then(|path| state.build_git_repository(path, self.fs.as_ref()));
for new_job in new_jobs {
if let Some(mut new_job) = new_job {
if let Some(containing_repository) = &repository {
new_job.containing_repository = Some(containing_repository.clone());
}
job.scan_queue
.try_send(new_job)
.expect("channel is unbounded");
}
}
Ok(())
}
@ -3652,20 +3644,7 @@ impl BackgroundScanner {
if let (Some(scan_queue_tx), true) = (&scan_queue_tx, fs_entry.is_dir()) {
if state.should_scan_directory(&fs_entry) {
let mut ancestor_inodes =
state.snapshot.ancestor_inodes_for_path(&path);
if !ancestor_inodes.contains(&metadata.inode) {
ancestor_inodes.insert(metadata.inode);
smol::block_on(scan_queue_tx.send(ScanJob {
abs_path,
path: path.clone(),
ignore_stack,
ancestor_inodes,
is_external: fs_entry.is_external,
scan_queue: scan_queue_tx.clone(),
}))
.unwrap();
}
state.enqueue_scan_dir(abs_path, &fs_entry, scan_queue_tx);
} else {
fs_entry.kind = EntryKind::UnloadedDir;
}
@ -3822,18 +3801,7 @@ impl BackgroundScanner {
if was_ignored && !entry.is_ignored && entry.kind.is_unloaded() {
let state = self.state.lock();
if state.should_scan_directory(&entry) {
job.scan_queue
.try_send(ScanJob {
abs_path: abs_path.clone(),
path: entry.path.clone(),
ignore_stack: child_ignore_stack.clone(),
scan_queue: job.scan_queue.clone(),
ancestor_inodes: state
.snapshot
.ancestor_inodes_for_path(&entry.path),
is_external: false,
})
.unwrap();
state.enqueue_scan_dir(abs_path.clone(), &entry, &job.scan_queue);
}
}
@ -4022,6 +3990,7 @@ struct ScanJob {
scan_queue: Sender<ScanJob>,
ancestor_inodes: TreeSet<u64>,
is_external: bool,
containing_repository: Option<(RepositoryWorkDirectory, TreeMap<RepoPath, GitFileStatus>)>,
}
struct UpdateIgnoreStatusJob {

View file

@ -1 +1,3 @@
#!/bin/bash
ZED_ADMIN_API_TOKEN=secret ZED_SERVER_URL=http://localhost:3000 cargo run $@