// Copyright 2020 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use std::collections::HashSet; use std::iter::Iterator; use crate::commit::Commit; use crate::store::CommitId; use std::hash::Hash; pub struct AncestorsIter { bfs_iter: BfsIter<'static, 'static, Commit, CommitId, Vec>, } impl Iterator for AncestorsIter { type Item = Commit; fn next(&mut self) -> Option { self.bfs_iter.next() } } pub fn walk_ancestors(start: II) -> AncestorsIter where II: IntoIterator, { let bfs_iter = bfs( start, Box::new(|commit| commit.id().clone()), Box::new(|commit| commit.parents()), ); AncestorsIter { bfs_iter } } pub struct BfsIter<'id_fn, 'neighbors_fn, T, ID, NI> { id_fn: Box ID + 'id_fn>, neighbors_fn: Box NI + 'neighbors_fn>, work: Vec, visited: HashSet, } impl Iterator for BfsIter<'_, '_, T, ID, NI> where ID: Hash + Eq, NI: IntoIterator, { type Item = T; fn next(&mut self) -> Option { while !self.work.is_empty() { let c = self.work.pop().unwrap(); let id = (self.id_fn)(&c); if self.visited.contains(&id) { continue; } for p in (self.neighbors_fn)(&c) { self.work.push(p); } self.visited.insert(id); return Some(c); } None } } pub fn bfs<'id_fn, 'neighbors_fn, T, ID, II, NI>( start: II, id_fn: Box ID + 'id_fn>, neighbors_fn: Box NI + 'neighbors_fn>, ) -> BfsIter<'id_fn, 'neighbors_fn, T, ID, NI> where ID: Hash + Eq, II: IntoIterator, NI: IntoIterator, { BfsIter { id_fn, neighbors_fn, work: start.into_iter().collect(), visited: Default::default(), } } pub struct TopoIter<'id_fn, 'neighbors_fn, T, ID, NI> { id_fn: Box ID + 'id_fn>, neighbors_fn: Box NI + 'neighbors_fn>, work: Vec, visited: HashSet, } impl Iterator for TopoIter<'_, '_, T, ID, NI> where ID: Hash + Eq, NI: IntoIterator, { type Item = T; fn next(&mut self) -> Option { while !self.work.is_empty() { let c = self.work.pop().unwrap(); let id = (self.id_fn)(&c); if self.visited.contains(&id) { continue; } for p in (self.neighbors_fn)(&c) { self.work.push(p); } self.visited.insert(id); return Some(c); } None } } /// Returns neighbors before the node itself. pub fn topo_order_reverse( start: II, id_fn: Box ID>, mut neighbors_fn: Box NI>, ) -> Vec where T: Hash + Eq + Clone, ID: Hash + Eq + Clone, II: IntoIterator, NI: IntoIterator, { let mut visiting = HashSet::new(); let mut emitted = HashSet::new(); let mut result = vec![]; let mut start_nodes: Vec<_> = start.into_iter().collect(); start_nodes.reverse(); for start_node in start_nodes { let mut stack = vec![(start_node, false)]; while !stack.is_empty() { let (node, neighbors_visited) = stack.pop().unwrap(); let id = id_fn(&node); if emitted.contains(&id) { continue; } if !neighbors_visited { assert!(visiting.insert(id.clone()), "graph has cycle"); let neighbors = neighbors_fn(&node); stack.push((node, true)); for neighbor in neighbors { stack.push((neighbor, false)); } } else { visiting.remove(&id); emitted.insert(id); result.push(node); } } } result.reverse(); result } pub fn leaves( start: II, neighbors_fn: &mut impl FnMut(&T) -> NI, id_fn: &impl Fn(&T) -> ID, ) -> HashSet where T: Hash + Eq + Clone, ID: Hash + Eq, II: IntoIterator, NI: IntoIterator, { let mut visited = HashSet::new(); let mut work: Vec = start.into_iter().collect(); let mut leaves: HashSet = work.iter().cloned().collect(); let mut non_leaves = HashSet::new(); while !work.is_empty() { // TODO: make this not waste so much memory on the sets let mut new_work = vec![]; for c in work { let id: ID = id_fn(&c); if visited.contains(&id) { continue; } for p in neighbors_fn(&c) { non_leaves.insert(c.clone()); new_work.push(p); } visited.insert(id); leaves.insert(c); } work = new_work; } leaves.difference(&non_leaves).cloned().collect() } /// Find nodes in the start set that are not reachable from other nodes in the /// start set. pub fn unreachable( start: II, neighbors_fn: &impl Fn(&T) -> NI, id_fn: &impl Fn(&T) -> ID, ) -> HashSet where T: Hash + Eq + Clone, ID: Hash + Eq, II: IntoIterator, NI: IntoIterator, { let start: Vec = start.into_iter().collect(); let mut reachable: HashSet = start.iter().cloned().collect(); for _node in bfs( start.into_iter(), Box::new(id_fn), Box::new(|node| { let neighbors: Vec = neighbors_fn(node).into_iter().collect(); for neighbor in &neighbors { reachable.remove(&neighbor); } neighbors }), ) {} reachable } pub fn common_ancestor<'a, I1, I2>(set1: I1, set2: I2) -> Commit where I1: IntoIterator, I2: IntoIterator, { let set1: Vec = set1.into_iter().cloned().collect(); let set2: Vec = set2.into_iter().cloned().collect(); closest_common_node(set1, set2, &|commit| commit.parents(), &|commit| { commit.id().clone() }) .unwrap() } pub fn closest_common_node( set1: II1, set2: II2, neighbors_fn: &impl Fn(&T) -> NI, id_fn: &impl Fn(&T) -> ID, ) -> Option where T: Hash + Eq + Clone, ID: Hash + Eq, II1: IntoIterator, II2: IntoIterator, NI: IntoIterator, { let mut visited1 = HashSet::new(); let mut visited2 = HashSet::new(); let mut work1: Vec = set1.into_iter().collect(); let mut work2: Vec = set2.into_iter().collect(); while !work1.is_empty() || !work2.is_empty() { let mut new_work1 = vec![]; for node in work1 { let id: ID = id_fn(&node); if visited2.contains(&id) { return Some(node); } if visited1.insert(id) { for neighbor in neighbors_fn(&node) { new_work1.push(neighbor); } } } work1 = new_work1; let mut new_work2 = vec![]; for node in work2 { let id: ID = id_fn(&node); if visited1.contains(&id) { return Some(node); } if visited2.insert(id) { for neighbor in neighbors_fn(&node) { new_work2.push(neighbor); } } } work2 = new_work2; } None } #[cfg(test)] mod tests { use super::*; use std::collections::HashMap; #[test] fn topo_order_reverse_linear() { // This graph: // o C // o B // o A let mut neighbors = HashMap::new(); neighbors.insert('A', vec![]); neighbors.insert('B', vec!['A']); neighbors.insert('C', vec!['B']); let common = topo_order_reverse( vec!['C'], Box::new(|node| *node), Box::new(move |node| neighbors[node].clone()), ); assert_eq!(common, vec!['C', 'B', 'A']); } #[test] fn topo_order_reverse_merge() { // This graph: // o F // |\ // o | E // | o D // | o C // | o B // |/ // o A let mut neighbors = HashMap::new(); neighbors.insert('A', vec![]); neighbors.insert('B', vec!['A']); neighbors.insert('C', vec!['B']); neighbors.insert('D', vec!['C']); neighbors.insert('E', vec!['A']); neighbors.insert('F', vec!['E', 'D']); let common = topo_order_reverse( vec!['F'], Box::new(|node| *node), Box::new(move |node| neighbors[node].clone()), ); assert_eq!(common, vec!['F', 'E', 'D', 'C', 'B', 'A']); } #[test] fn topo_order_reverse_multiple_heads() { // This graph: // o F // |\ // o | E // | o D // | | o C // | | | // | | o B // | |/ // |/ // o A let mut neighbors = HashMap::new(); neighbors.insert('A', vec![]); neighbors.insert('B', vec!['A']); neighbors.insert('C', vec!['B']); neighbors.insert('D', vec!['A']); neighbors.insert('E', vec!['A']); neighbors.insert('F', vec!['E', 'D']); let common = topo_order_reverse( vec!['F', 'C'], Box::new(|node| *node), Box::new(move |node| neighbors[node].clone()), ); assert_eq!(common, vec!['F', 'E', 'D', 'C', 'B', 'A']); } #[test] fn closest_common_node_tricky() { // Test this case where A is the shortest distance away, but we still want the // result to be B because A is an ancestor of B. In other words, we want // to minimize the longest distance. // // E H // |\ /| // | D G | // | C F | // \ \ / / // \ B / // \|/ // A let mut neighbors = HashMap::new(); neighbors.insert('A', vec![]); neighbors.insert('B', vec!['A']); neighbors.insert('C', vec!['B']); neighbors.insert('D', vec!['C']); neighbors.insert('E', vec!['A', 'D']); neighbors.insert('F', vec!['B']); neighbors.insert('G', vec!['F']); neighbors.insert('H', vec!['A', 'G']); let common = closest_common_node( vec!['E'], vec!['H'], &|node| neighbors[node].clone(), &|node| *node, ); // TODO: fix the implementation to return B assert_eq!(common, Some('A')); } #[test] fn unreachable_mixed() { // Test the uppercase letters are in the start set // // D F // |/| // C e // |/ // b // | // A let mut neighbors = HashMap::new(); neighbors.insert('A', vec![]); neighbors.insert('b', vec!['A']); neighbors.insert('C', vec!['b']); neighbors.insert('D', vec!['C']); neighbors.insert('e', vec!['b']); neighbors.insert('F', vec!['C', 'e']); let expected: HashSet = vec!['D', 'F'].into_iter().collect(); let actual = unreachable( vec!['A', 'C', 'D', 'F'], &|node| neighbors[node].clone(), &|node| *node, ); assert_eq!(actual, expected); // Check with a different order in the start set let actual = unreachable( vec!['F', 'D', 'C', 'A'], &|node| neighbors[node].clone(), &|node| *node, ); assert_eq!(actual, expected); } }