graphlog: use IndexPosition until transitive edges get eliminated

This partially reverts 4c8f484278 "graphlog: key by commit id (not index
position)." As Martin pointed out, it made "log -r 'tags()' -T.." in git
repo super slow. Apparently, both clone() and hash map insertion/lookup costs
increased by that change. Since we don't need CommitId inside the graph
iterator, we can simply replace it with IndexPosition, and resolve it to
CommitId later.
This commit is contained in:
Yuya Nishihara 2023-07-24 00:31:00 +09:00
parent 2bbaa4352a
commit 817713c921
3 changed files with 68 additions and 39 deletions

View file

@ -78,7 +78,7 @@ impl<'index> RevsetImpl<'index> {
}
pub fn iter_graph_impl(&self) -> RevsetGraphIterator<'_, 'index> {
RevsetGraphIterator::new(self.inner.iter())
RevsetGraphIterator::new(self.index, self.inner.iter())
}
}
@ -104,7 +104,7 @@ impl<'index> Revset<'index> for RevsetImpl<'index> {
}
fn iter_graph(&self) -> Box<dyn Iterator<Item = (CommitId, Vec<RevsetGraphEdge>)> + '_> {
Box::new(RevsetGraphIterator::new(self.inner.iter()))
Box::new(self.iter_graph_impl())
}
fn change_id_index(&self) -> Box<dyn ChangeIdIndex + 'index> {

View file

@ -14,13 +14,46 @@
#![allow(missing_docs)]
use std::cmp::min;
use std::cmp::{min, Reverse};
use std::collections::{BTreeMap, HashSet};
use crate::backend::CommitId;
use crate::default_index_store::{IndexEntry, IndexPosition};
use crate::default_index_store::{CompositeIndex, IndexEntry, IndexPosition};
use crate::revset::{RevsetGraphEdge, RevsetGraphEdgeType};
/// Like `RevsetGraphEdge`, but stores `IndexPosition` instead.
///
/// This can be cheaply allocated and hashed compared to `CommitId`-based type.
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
struct IndexGraphEdge {
target: IndexPosition,
edge_type: RevsetGraphEdgeType,
}
impl IndexGraphEdge {
fn missing(target: IndexPosition) -> Self {
let edge_type = RevsetGraphEdgeType::Missing;
IndexGraphEdge { target, edge_type }
}
fn direct(target: IndexPosition) -> Self {
let edge_type = RevsetGraphEdgeType::Direct;
IndexGraphEdge { target, edge_type }
}
fn indirect(target: IndexPosition) -> Self {
let edge_type = RevsetGraphEdgeType::Indirect;
IndexGraphEdge { target, edge_type }
}
fn to_revset_edge(&self, index: CompositeIndex<'_>) -> RevsetGraphEdge {
RevsetGraphEdge {
target: index.entry_by_pos(self.target).commit_id(),
edge_type: self.edge_type,
}
}
}
/// Given an iterator over some set of revisions, yields the same revisions with
/// associated edge types.
///
@ -88,6 +121,7 @@ use crate::revset::{RevsetGraphEdge, RevsetGraphEdgeType};
/// could lead to "D", but that would require extra book-keeping to remember for
/// later that the edges from "f" and "H" are only partially computed.
pub struct RevsetGraphIterator<'revset, 'index> {
index: CompositeIndex<'index>,
input_set_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>,
/// Commits in the input set we had to take out of the iterator while
/// walking external edges. Does not necessarily include the commit
@ -98,15 +132,17 @@ pub struct RevsetGraphIterator<'revset, 'index> {
min_position: IndexPosition,
/// Edges for commits not in the input set.
// TODO: Remove unneeded entries here as we go (that's why it's an ordered map)?
edges: BTreeMap<IndexPosition, HashSet<(IndexPosition, RevsetGraphEdge)>>,
edges: BTreeMap<IndexPosition, HashSet<IndexGraphEdge>>,
skip_transitive_edges: bool,
}
impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> {
pub fn new(
index: CompositeIndex<'index>,
input_set_iter: Box<dyn Iterator<Item = IndexEntry<'index>> + 'revset>,
) -> RevsetGraphIterator<'revset, 'index> {
RevsetGraphIterator {
index,
input_set_iter,
look_ahead: Default::default(),
min_position: IndexPosition::MAX,
@ -130,24 +166,23 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> {
fn edges_from_internal_commit(
&mut self,
index_entry: &IndexEntry<'index>,
) -> HashSet<(IndexPosition, RevsetGraphEdge)> {
) -> HashSet<IndexGraphEdge> {
if let Some(edges) = self.edges.get(&index_entry.position()) {
return edges.clone();
}
let mut edges = HashSet::new();
for parent in index_entry.parents() {
let parent_position = parent.position();
let parent_commit_id = parent.commit_id();
self.consume_to(parent_position);
if self.look_ahead.contains_key(&parent_position) {
edges.insert((parent_position, RevsetGraphEdge::direct(parent_commit_id)));
edges.insert(IndexGraphEdge::direct(parent_position));
} else {
let parent_edges = self.edges_from_external_commit(parent);
if parent_edges
.iter()
.all(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Missing)
.all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing)
{
edges.insert((parent_position, RevsetGraphEdge::missing(parent_commit_id)));
edges.insert(IndexGraphEdge::missing(parent_position));
} else {
edges.extend(parent_edges);
}
@ -160,7 +195,7 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> {
fn edges_from_external_commit(
&mut self,
index_entry: IndexEntry<'index>,
) -> HashSet<(IndexPosition, RevsetGraphEdge)> {
) -> HashSet<IndexGraphEdge> {
let position = index_entry.position();
let mut stack = vec![index_entry];
while let Some(entry) = stack.last() {
@ -172,28 +207,19 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> {
self.consume_to(parent_position);
if self.look_ahead.contains_key(&parent_position) {
// We have found a path back into the input set
edges.insert((
parent_position,
RevsetGraphEdge::indirect(parent.commit_id()),
));
edges.insert(IndexGraphEdge::indirect(parent_position));
} else if let Some(parent_edges) = self.edges.get(&parent_position) {
if parent_edges
.iter()
.all(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Missing)
.all(|edge| edge.edge_type == RevsetGraphEdgeType::Missing)
{
edges.insert((
parent_position,
RevsetGraphEdge::missing(parent.commit_id()),
));
edges.insert(IndexGraphEdge::missing(parent_position));
} else {
edges.extend(parent_edges.iter().cloned());
}
} else if parent_position < self.min_position {
// The parent is not in the input set
edges.insert((
parent_position,
RevsetGraphEdge::missing(parent.commit_id()),
));
edges.insert(IndexGraphEdge::missing(parent_position));
} else {
// The parent is not in the input set but it's somewhere in the range
// where we have commits in the input set, so continue searching.
@ -211,11 +237,11 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> {
fn remove_transitive_edges(
&mut self,
edges: HashSet<(IndexPosition, RevsetGraphEdge)>,
) -> HashSet<(IndexPosition, RevsetGraphEdge)> {
edges: HashSet<IndexGraphEdge>,
) -> HashSet<IndexGraphEdge> {
if !edges
.iter()
.any(|(_, edge)| edge.edge_type == RevsetGraphEdgeType::Indirect)
.any(|edge| edge.edge_type == RevsetGraphEdgeType::Indirect)
{
return edges;
}
@ -223,29 +249,29 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> {
let mut initial_targets = HashSet::new();
let mut work = vec![];
// To start with, add the edges one step after the input edges.
for (target, edge) in &edges {
initial_targets.insert(target);
for edge in &edges {
initial_targets.insert(edge.target);
if edge.edge_type != RevsetGraphEdgeType::Missing {
let entry = self.look_ahead.get(target).unwrap().clone();
let entry = self.look_ahead.get(&edge.target).unwrap().clone();
min_generation = min(min_generation, entry.generation_number());
work.extend(self.edges_from_internal_commit(&entry));
}
}
// Find commits reachable transitively and add them to the `unwanted` set.
let mut unwanted = HashSet::new();
while let Some((target, edge)) = work.pop() {
if edge.edge_type == RevsetGraphEdgeType::Missing || target < self.min_position {
while let Some(edge) = work.pop() {
if edge.edge_type == RevsetGraphEdgeType::Missing || edge.target < self.min_position {
continue;
}
if !unwanted.insert(target) {
if !unwanted.insert(edge.target) {
// Already visited
continue;
}
if initial_targets.contains(&target) {
if initial_targets.contains(&edge.target) {
// Already visited
continue;
}
let entry = self.look_ahead.get(&target).unwrap().clone();
let entry = self.look_ahead.get(&edge.target).unwrap().clone();
if entry.generation_number() < min_generation {
continue;
}
@ -254,7 +280,7 @@ impl<'revset, 'index> RevsetGraphIterator<'revset, 'index> {
edges
.into_iter()
.filter(|(target, _)| !unwanted.contains(target))
.filter(|edge| !unwanted.contains(&edge.target))
.collect()
}
@ -281,8 +307,11 @@ impl<'revset, 'index> Iterator for RevsetGraphIterator<'revset, 'index> {
edges = self.remove_transitive_edges(edges);
}
let mut edges: Vec<_> = edges.into_iter().collect();
edges.sort_by(|(target_pos1, _), (target_pos2, _)| target_pos2.cmp(target_pos1));
let edges = edges.into_iter().map(|(_, edge)| edge).collect();
edges.sort_unstable_by_key(|edge| Reverse(edge.target));
let edges = edges
.iter()
.map(|edge| edge.to_revset_edge(self.index))
.collect();
Some((index_entry.commit_id(), edges))
}
}

View file

@ -2185,7 +2185,7 @@ impl RevsetGraphEdge {
}
}
#[derive(Debug, PartialEq, Eq, Clone, Hash)]
#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)]
pub enum RevsetGraphEdgeType {
Missing,
Direct,