index: add all_heads_for_gc() that iterates heads of all indexed commits

GitBackend::gc() will recreate no-gc refs for the indexed heads. We could
collect all historical heads by traversing operation log, but it isn't enough
because there may be predecessor links to hidden commits, and "git gc" isn't
aware of predecessors.
This commit is contained in:
Yuya Nishihara 2024-01-09 20:37:46 +09:00
parent afa72ff496
commit c7be4d019c
5 changed files with 68 additions and 9 deletions

View file

@ -30,7 +30,7 @@ use super::revset_engine;
use crate::backend::{ChangeId, CommitId};
use crate::hex_util;
use crate::id_prefix::{IdIndex, IdIndexSource, IdIndexSourceEntry};
use crate::index::{ChangeIdIndex, Index};
use crate::index::{AllHeadsForGcUnsupported, ChangeIdIndex, Index};
use crate::object_id::{HexPrefix, ObjectId, PrefixResolution};
use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError};
use crate::store::Store;
@ -118,7 +118,6 @@ impl<'a> CompositeIndex<'a> {
let num_commits = self.num_commits();
let mut num_merges = 0;
let mut max_generation_number = 0;
let mut is_head = vec![true; num_commits as usize];
let mut change_ids = HashSet::new();
for pos in 0..num_commits {
let entry = self.entry_by_pos(IndexPosition(pos));
@ -126,12 +125,9 @@ impl<'a> CompositeIndex<'a> {
if entry.num_parents() > 1 {
num_merges += 1;
}
for parent_pos in entry.parent_positions() {
is_head[parent_pos.0 as usize] = false;
}
change_ids.insert(entry.change_id());
}
let num_heads = u32::try_from(is_head.iter().filter(|is_head| **is_head).count()).unwrap();
let num_heads = u32::try_from(self.all_heads_pos().count()).unwrap();
let mut levels = self
.ancestor_index_segments()
@ -262,6 +258,28 @@ impl<'a> CompositeIndex<'a> {
rev_walk
}
pub(super) fn all_heads(self) -> impl Iterator<Item = CommitId> + 'a {
self.all_heads_pos()
.map(move |pos| self.entry_by_pos(pos).commit_id())
}
pub(super) fn all_heads_pos(&self) -> impl Iterator<Item = IndexPosition> {
// TODO: can be optimized to use bit vec and leading/trailing_ones()
let num_commits = self.num_commits();
let mut not_head: Vec<bool> = vec![false; num_commits as usize];
for pos in 0..num_commits {
let entry = self.entry_by_pos(IndexPosition(pos));
for IndexPosition(parent_pos) in entry.parent_positions() {
not_head[parent_pos as usize] = true;
}
}
not_head
.into_iter()
.enumerate()
.filter(|&(_, b)| !b)
.map(|(i, _)| IndexPosition(u32::try_from(i).unwrap()))
}
pub fn heads_pos(
&self,
mut candidate_positions: BTreeSet<IndexPosition>,
@ -364,6 +382,12 @@ impl Index for CompositeIndex<'_> {
.collect()
}
fn all_heads_for_gc(
&self,
) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> {
Ok(Box::new(self.all_heads()))
}
fn heads(&self, candidate_ids: &mut dyn Iterator<Item = &CommitId>) -> Vec<CommitId> {
let candidate_positions: BTreeSet<_> = candidate_ids
.map(|id| self.commit_id_to_pos(id).unwrap())

View file

@ -1143,7 +1143,12 @@ mod tests {
// Merge commit and other commit
assert_eq!(
index.heads(&mut [id_5.clone(), id_3.clone()].iter()),
vec![id_3, id_5]
vec![id_3.clone(), id_5.clone()]
);
assert_eq!(
index.all_heads_for_gc().unwrap().collect_vec(),
vec![id_3.clone(), id_5.clone()]
);
}
}

View file

@ -35,7 +35,7 @@ use super::readonly::{DefaultReadonlyIndex, ReadonlyIndexSegment};
use crate::backend::{ChangeId, CommitId};
use crate::commit::Commit;
use crate::file_util::persist_content_addressed_temp_file;
use crate::index::{ChangeIdIndex, Index, MutableIndex, ReadonlyIndex};
use crate::index::{AllHeadsForGcUnsupported, ChangeIdIndex, Index, MutableIndex, ReadonlyIndex};
use crate::object_id::{HexPrefix, ObjectId, PrefixResolution};
use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError};
use crate::store::Store;
@ -434,6 +434,12 @@ impl Index for DefaultMutableIndex {
self.as_composite().common_ancestors(set1, set2)
}
fn all_heads_for_gc(
&self,
) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> {
Ok(Box::new(self.as_composite().all_heads()))
}
fn heads(&self, candidates: &mut dyn Iterator<Item = &CommitId>) -> Vec<CommitId> {
self.as_composite().heads(candidates)
}

View file

@ -30,7 +30,7 @@ use super::composite::{AsCompositeIndex, ChangeIdIndexImpl, CompositeIndex, Inde
use super::entry::{IndexPosition, LocalPosition, SmallIndexPositionsVec};
use super::mutable::DefaultMutableIndex;
use crate::backend::{ChangeId, CommitId};
use crate::index::{ChangeIdIndex, Index, MutableIndex, ReadonlyIndex};
use crate::index::{AllHeadsForGcUnsupported, ChangeIdIndex, Index, MutableIndex, ReadonlyIndex};
use crate::object_id::{HexPrefix, ObjectId, PrefixResolution};
use crate::revset::{ResolvedExpression, Revset, RevsetEvaluationError};
use crate::store::Store;
@ -501,6 +501,12 @@ impl Index for DefaultReadonlyIndex {
self.as_composite().common_ancestors(set1, set2)
}
fn all_heads_for_gc(
&self,
) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported> {
Ok(Box::new(self.as_composite().all_heads()))
}
fn heads(&self, candidates: &mut dyn Iterator<Item = &CommitId>) -> Vec<CommitId> {
self.as_composite().heads(candidates)
}

View file

@ -38,6 +38,12 @@ pub struct IndexReadError(pub Box<dyn std::error::Error + Send + Sync>);
#[error(transparent)]
pub struct IndexWriteError(pub Box<dyn std::error::Error + Send + Sync>);
/// Error to be returned if `Index::all_heads_for_gc()` is not supported by the
/// index backend.
#[derive(Debug, Error)]
#[error("Cannot collect all heads by index of this type")]
pub struct AllHeadsForGcUnsupported;
pub trait IndexStore: Send + Sync + Debug {
fn as_any(&self) -> &dyn Any;
@ -67,6 +73,18 @@ pub trait Index: Send + Sync {
fn common_ancestors(&self, set1: &[CommitId], set2: &[CommitId]) -> Vec<CommitId>;
/// Heads among all indexed commits at the associated operation.
///
/// Suppose the index contains all the historical heads and their
/// ancestors/predecessors reachable from the associated operation, this
/// function returns the heads that should be preserved on garbage
/// collection.
///
/// The iteration order is unspecified.
fn all_heads_for_gc(
&self,
) -> Result<Box<dyn Iterator<Item = CommitId> + '_>, AllHeadsForGcUnsupported>;
fn heads(&self, candidates: &mut dyn Iterator<Item = &CommitId>) -> Vec<CommitId>;
/// Parents before children