feat: dag find path

This commit is contained in:
Zixuan Chen 2022-08-03 21:10:20 +08:00
parent 2d2ca76201
commit ed145367e0
10 changed files with 364 additions and 150 deletions

View file

@ -97,7 +97,7 @@ impl Mergable<ChangeMergeCfg> for Change {
}
if other.deps.is_empty()
|| (other.deps.len() == 1 && self.id.is_connected_id(&other.deps[0], self.len() as u32))
|| (other.deps.len() == 1 && self.id.is_connected_id(&other.deps[0], self.len()))
{
return false;
}
@ -111,7 +111,7 @@ impl Mergable<ChangeMergeCfg> for Change {
}
self.id.client_id == other.id.client_id
&& self.id.counter + self.len() as u32 == other.id.counter
&& self.id.counter + self.len() as Counter == other.id.counter
&& self.lamport + self.len() as Lamport == other.lamport
}
}

View file

@ -1,4 +1,4 @@
use crate::{ContentType, InsertContent, ID};
use crate::{id::Counter, ContentType, InsertContent, ID};
use rle::{HasLength, Mergable, Sliceable};
#[derive(Debug, Clone)]
@ -12,9 +12,9 @@ pub struct TextContent {
impl Mergable for TextContent {
fn is_mergable(&self, other: &Self, _: &()) -> bool {
other.id.client_id == self.id.client_id
&& self.id.counter + self.len() as u32 == other.id.counter
&& self.id.counter + self.len() as Counter == other.id.counter
&& self.id.client_id == other.origin_left.client_id
&& self.id.counter + self.len() as u32 - 1 == other.origin_left.counter
&& self.id.counter + self.len() as Counter - 1 == other.origin_left.counter
&& self.origin_right == other.origin_right
}
@ -36,12 +36,12 @@ impl Sliceable for TextContent {
TextContent {
origin_left: ID {
client_id: self.id.client_id,
counter: self.id.counter + from as u32 - 1,
counter: self.id.counter + from as Counter - 1,
},
origin_right: self.origin_right,
id: ID {
client_id: self.id.client_id,
counter: self.id.counter + from as u32,
counter: self.id.counter + from as Counter,
},
text: self.text[from..to].to_owned(),
}

View file

@ -45,7 +45,21 @@ pub trait DagNode {
#[inline]
fn get_lamport_from_counter(&self, c: Counter) -> Lamport {
self.lamport_start() + c - self.dag_id_start().counter
self.lamport_start() + c as Lamport - self.dag_id_start().counter as Lamport
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct Path {
retreat: Vec<IdSpan>,
forward: Vec<IdSpan>,
}
#[allow(clippy::ptr_arg)]
fn reverse_path(path: &mut Vec<IdSpan>) {
path.reverse();
for span in path.iter_mut() {
span.counter.reverse();
}
}
@ -59,6 +73,7 @@ pub(crate) trait Dag {
//
// TODO: Maybe use Result return type
// TODO: Maybe we only need one heap?
// TODO: benchmark
// how to test better?
// - converge through other nodes
@ -66,135 +81,226 @@ pub(crate) trait Dag {
/// only returns a single root.
/// but the least common ancestor may be more than one root.
/// But that is a rare case.
///
#[inline]
fn find_common_ancestor(&self, a_id: ID, b_id: ID) -> Option<ID> {
if a_id.client_id == b_id.client_id {
if a_id.counter <= b_id.counter {
Some(a_id)
} else {
Some(b_id)
}
} else {
#[derive(Debug, PartialEq, Eq)]
struct OrdId<'a> {
id: ID,
lamport: Lamport,
deps: &'a [ID],
}
find_common_ancestor(
&|id| self.get(id).map(|x| x as &dyn DagNode),
a_id,
b_id,
|_, _, _| {},
)
}
impl<'a> PartialOrd for OrdId<'a> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.lamport.cmp(&other.lamport))
}
}
#[inline]
fn find_path(&self, from: ID, to: ID) -> Option<Path> {
let mut ans: Option<Path> = None;
impl<'a> Ord for OrdId<'a> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.lamport.cmp(&other.lamport)
}
}
let mut _a_vv: HashMap<ClientID, Counter, _> = FxHashMap::default();
let mut _b_vv: HashMap<ClientID, Counter, _> = FxHashMap::default();
let mut _a_heap: BinaryHeap<OrdId> = BinaryHeap::new();
let mut _b_heap: BinaryHeap<OrdId> = BinaryHeap::new();
let mut _a_visited: FxHashSet<ID> = FxHashSet::default();
let mut _b_visited: FxHashSet<ID> = FxHashSet::default();
{
let a = self.get(a_id).unwrap();
let b = self.get(b_id).unwrap();
_a_heap.push(OrdId {
id: a_id,
lamport: a.get_lamport_from_counter(a_id.counter),
deps: a.deps(),
});
_b_heap.push(OrdId {
id: b_id,
lamport: b.get_lamport_from_counter(b_id.counter),
deps: b.deps(),
});
_a_vv.insert(a_id.client_id, a_id.counter + 1);
_b_vv.insert(b_id.client_id, b_id.counter + 1);
}
while !_a_heap.is_empty() || !_b_heap.is_empty() {
let (a_heap, b_heap, a_vv, b_vv, a_visited, b_visited, _swapped) = if _a_heap
.is_empty()
|| (_a_heap.peek().map(|x| x.lamport).unwrap_or(0)
< _b_heap.peek().map(|x| x.lamport).unwrap_or(0))
{
// swap
(
&mut _b_heap,
&mut _a_heap,
&mut _b_vv,
&mut _a_vv,
&mut _b_visited,
&mut _a_visited,
true,
)
} else {
(
&mut _a_heap,
&mut _b_heap,
&mut _a_vv,
&mut _b_vv,
&mut _a_visited,
&mut _b_visited,
false,
)
};
while !a_heap.is_empty()
&& a_heap.peek().map(|x| x.lamport).unwrap_or(0)
>= b_heap.peek().map(|x| x.lamport).unwrap_or(0)
{
let a = a_heap.pop().unwrap();
let id = a.id;
if let Some(counter_end) = b_vv.get(&id.client_id) {
if id.counter < *counter_end {
return Some(id);
}
}
// if swapped {
// println!("A");
// } else {
// println!("B");
// }
// dbg!(&a);
#[cfg(debug_assertions)]
{
if let Some(v) = a_vv.get(&a.id.client_id) {
assert!(*v > a.id.counter)
}
}
for dep_id in a.deps {
if a_visited.contains(dep_id) {
continue;
}
let dep = self.get(*dep_id).unwrap();
a_heap.push(OrdId {
id: *dep_id,
lamport: dep.get_lamport_from_counter(dep_id.counter),
deps: dep.deps(),
#[inline]
fn get_rev_path(target: ID, from: ID, to_from_map: &FxHashMap<ID, ID>) -> Vec<IdSpan> {
let mut last_visited: Option<ID> = None;
let mut a_rev_path = vec![];
let mut node_id = target;
node_id = *to_from_map.get(&node_id).unwrap();
loop {
if let Some(last_id) = last_visited {
if last_id.client_id == node_id.client_id {
debug_assert!(last_id.counter < node_id.counter);
a_rev_path.push(IdSpan {
client_id: last_id.client_id,
counter: CounterSpan::new(last_id.counter, node_id.counter + 1),
});
a_visited.insert(*dep_id);
if let Some(v) = a_vv.get_mut(&dep_id.client_id) {
if *v < dep_id.counter + 1 {
*v = dep_id.counter + 1;
}
} else {
a_vv.insert(dep_id.client_id, dep_id.counter + 1);
last_visited = None;
} else {
a_rev_path.push(IdSpan {
client_id: last_id.client_id,
counter: CounterSpan::new(last_id.counter, last_id.counter + 1),
});
last_visited = Some(node_id);
}
} else {
last_visited = Some(node_id);
}
if node_id == from {
break;
}
node_id = *to_from_map.get(&node_id).unwrap();
}
if let Some(last_id) = last_visited {
a_rev_path.push(IdSpan {
client_id: last_id.client_id,
counter: CounterSpan::new(last_id.counter, last_id.counter + 1),
});
}
a_rev_path
}
find_common_ancestor(
&|id| self.get(id).map(|x| x as &dyn DagNode),
from,
to,
|ancestor, a_path, b_path| {
let mut a_path = get_rev_path(ancestor, from, a_path);
let b_path = get_rev_path(ancestor, to, b_path);
reverse_path(&mut a_path);
ans = Some(Path {
retreat: a_path,
forward: b_path,
});
},
);
ans
}
}
fn find_common_ancestor<'a, F, G>(get: &'a F, a_id: ID, b_id: ID, mut on_found: G) -> Option<ID>
where
F: Fn(ID) -> Option<&'a dyn DagNode>,
G: FnMut(ID, &FxHashMap<ID, ID>, &FxHashMap<ID, ID>),
{
if a_id.client_id == b_id.client_id {
if a_id.counter <= b_id.counter {
Some(a_id)
} else {
Some(b_id)
}
} else {
#[derive(Debug, PartialEq, Eq)]
struct OrdId<'a> {
id: ID,
lamport: Lamport,
deps: &'a [ID],
}
impl<'a> PartialOrd for OrdId<'a> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.lamport.cmp(&other.lamport))
}
}
impl<'a> Ord for OrdId<'a> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.lamport.cmp(&other.lamport)
}
}
let mut _a_vv: HashMap<ClientID, Counter, _> = FxHashMap::default();
let mut _b_vv: HashMap<ClientID, Counter, _> = FxHashMap::default();
// Invariant: every op id inserted to the a_heap is a key in a_path map, except for a_id
let mut _a_heap: BinaryHeap<OrdId> = BinaryHeap::new();
// Likewise
let mut _b_heap: BinaryHeap<OrdId> = BinaryHeap::new();
// FxHashMap<To, From> is used to track the deps path of each node
let mut _a_path: FxHashMap<ID, ID> = FxHashMap::default();
let mut _b_path: FxHashMap<ID, ID> = FxHashMap::default();
{
let a = get(a_id).unwrap();
let b = get(b_id).unwrap();
_a_heap.push(OrdId {
id: a_id,
lamport: a.get_lamport_from_counter(a_id.counter),
deps: a.deps(),
});
_b_heap.push(OrdId {
id: b_id,
lamport: b.get_lamport_from_counter(b_id.counter),
deps: b.deps(),
});
_a_vv.insert(a_id.client_id, a_id.counter + 1);
_b_vv.insert(b_id.client_id, b_id.counter + 1);
}
while !_a_heap.is_empty() || !_b_heap.is_empty() {
let (a_heap, b_heap, a_vv, b_vv, a_path, b_path, _swapped) = if _a_heap.is_empty()
|| (_a_heap.peek().map(|x| x.lamport).unwrap_or(0)
< _b_heap.peek().map(|x| x.lamport).unwrap_or(0))
{
// swap
(
&mut _b_heap,
&mut _a_heap,
&mut _b_vv,
&mut _a_vv,
&mut _b_path,
&mut _a_path,
true,
)
} else {
(
&mut _a_heap,
&mut _b_heap,
&mut _a_vv,
&mut _b_vv,
&mut _a_path,
&mut _b_path,
false,
)
};
while !a_heap.is_empty()
&& a_heap.peek().map(|x| x.lamport).unwrap_or(0)
>= b_heap.peek().map(|x| x.lamport).unwrap_or(0)
{
let a = a_heap.pop().unwrap();
let id = a.id;
if let Some(counter_end) = b_vv.get(&id.client_id) {
if id.counter < *counter_end {
b_path
.entry(id)
.or_insert_with(|| ID::new(id.client_id, counter_end - 1));
on_found(id, &_a_path, &_b_path);
return Some(id);
}
}
// if swapped {
// println!("A");
// } else {
// println!("B");
// }
// dbg!(&a);
#[cfg(debug_assertions)]
{
if let Some(v) = a_vv.get(&a.id.client_id) {
assert!(*v > a.id.counter)
}
}
for dep_id in a.deps {
if a_path.contains_key(dep_id) {
continue;
}
let dep = get(*dep_id).unwrap();
a_heap.push(OrdId {
id: *dep_id,
lamport: dep.get_lamport_from_counter(dep_id.counter),
deps: dep.deps(),
});
a_path.insert(*dep_id, a.id);
if dep.dag_id_start() != *dep_id {
a_path.insert(dep.dag_id_start(), *dep_id);
}
if let Some(v) = a_vv.get_mut(&dep_id.client_id) {
if *v < dep_id.counter + 1 {
*v = dep_id.counter + 1;
}
} else {
a_vv.insert(dep_id.client_id, dep_id.counter + 1);
}
}
}
None
}
None
}
}

View file

@ -62,6 +62,7 @@ impl Dag for TestDag {
id.counter >= node.id.counter && id.counter < node.id.counter + node.len as Counter
})
}
fn frontier(&self) -> &[ID] {
&self.frontier
}
@ -89,11 +90,19 @@ impl TestDag {
}
}
fn get_last_node(&mut self) -> &mut TestNode {
self.nodes
.get_mut(&self.client_id)
.unwrap()
.last_mut()
.unwrap()
}
fn push(&mut self, len: usize) {
let client_id = self.client_id;
let counter = self.version_vec.entry(client_id).or_insert(0);
let id = ID::new(client_id, *counter);
*counter += len as u32;
*counter += len as Counter;
let deps = std::mem::replace(&mut self.frontier, vec![id]);
self.nodes
.entry(client_id)
@ -147,7 +156,7 @@ impl TestDag {
}
update_frontier(
&mut self.frontier,
node.id.inc((node.len() - 1) as u32),
node.id.inc((node.len() - 1) as Counter),
&node.deps,
);
self.nodes
@ -155,7 +164,7 @@ impl TestDag {
.or_insert(vec![])
.push(node.clone());
self.version_vec
.insert(client_id, node.id.counter + node.len as u32);
.insert(client_id, node.id.counter + node.len as Counter);
self.next_lamport = self.next_lamport.max(node.lamport + node.len as u32);
false
}
@ -207,6 +216,63 @@ struct Interaction {
len: usize,
}
mod find_path {
use super::*;
#[test]
fn no_path() {
let mut a = TestDag::new(0);
let mut b = TestDag::new(1);
a.push(1);
b.push(1);
a.merge(&b);
let actual = a.find_path(ID::new(0, 0), ID::new(1, 0));
assert_eq!(actual, None);
}
#[test]
fn one_path() {
let mut a = TestDag::new(0);
let mut b = TestDag::new(1);
// 0 - 0
a.push(1);
b.merge(&a);
// 1 - 0
b.push(1);
// 0 - 1
a.push(1);
a.merge(&b);
let actual = a.find_path(ID::new(0, 1), ID::new(1, 0));
assert_eq!(
actual,
Some(Path {
retreat: vec![IdSpan::new(0, CounterSpan::new(1, 0))],
forward: vec![IdSpan::new(1, CounterSpan::new(0, 1))],
})
);
}
#[test]
fn middle() {
let mut a = TestDag::new(0);
let mut b = TestDag::new(1);
a.push(4);
b.push(1);
b.push(1);
let node = b.get_last_node();
node.deps.push(ID::new(0, 2));
b.merge(&a);
let actual = b.find_path(ID::new(0, 3), ID::new(1, 1));
assert_eq!(
actual,
Some(Path {
retreat: vec![IdSpan::new(0, CounterSpan::new(3, 2))],
forward: vec![IdSpan::new(1, CounterSpan::new(1, 2))],
})
);
}
}
mod find_common_ancestors {
use super::*;

View file

@ -1,21 +1,21 @@
use serde::Serialize;
pub type ClientID = u64;
pub type Counter = u32;
pub type Counter = i32;
#[derive(PartialEq, Eq, Hash, Clone, Debug, Copy, PartialOrd, Ord, Serialize)]
pub struct ID {
pub client_id: u64,
pub counter: u32,
pub counter: Counter,
}
pub const ROOT_ID: ID = ID {
client_id: u64::MAX,
counter: u32::MAX,
counter: i32::MAX,
};
impl ID {
pub fn new(client_id: u64, counter: u32) -> Self {
pub fn new(client_id: u64, counter: Counter) -> Self {
ID { client_id, counter }
}
@ -28,11 +28,11 @@ impl ID {
}
#[inline]
pub(crate) fn is_connected_id(&self, other: &Self, self_len: u32) -> bool {
self.client_id == other.client_id && self.counter + self_len == other.counter
pub(crate) fn is_connected_id(&self, other: &Self, self_len: usize) -> bool {
self.client_id == other.client_id && self.counter + self_len as Counter == other.counter
}
pub fn inc(&self, inc: u32) -> Self {
pub fn inc(&self, inc: i32) -> Self {
ID {
client_id: self.client_id,
counter: self.counter + inc,

View file

@ -1,7 +1,7 @@
//! # Loro
//!
//!
#![allow(dead_code, unused_imports, clippy::explicit_auto_deref)]
#![allow(dead_code, unused_imports)]
pub mod change;
pub mod configure;

View file

@ -61,7 +61,7 @@ impl<'a> Iterator for OpIter<'a> {
self.heap.push(Reverse(OpProxy::new(
change,
op,
Some(start..(op.len() as u32)),
Some(start..(op.len() as Counter)),
)));
}
}

View file

@ -1,6 +1,6 @@
use crate::{
container::ContainerID,
id::ID,
id::{Counter, ID},
span::{CounterSpan, IdSpan},
};
use rle::{HasLength, Mergable, RleVec, Sliceable};
@ -78,7 +78,7 @@ impl Op {
impl Mergable for Op {
fn is_mergable(&self, other: &Self, cfg: &()) -> bool {
self.id.is_connected_id(&other.id, self.len() as u32)
self.id.is_connected_id(&other.id, self.len())
&& self.content.is_mergable(&other.content, cfg)
&& self.container == other.container
}
@ -124,7 +124,7 @@ impl Sliceable for Op {
Op {
id: ID {
client_id: self.id.client_id,
counter: (self.id.counter + from as u32),
counter: (self.id.counter + from as Counter),
},
content,
container: self.container.clone(),

View file

@ -2,14 +2,16 @@ use std::ops::Range;
use rle::{HasLength, Sliceable};
use crate::{container::ContainerID, Change, Lamport, Op, OpContent, OpType, Timestamp, ID};
use crate::{
container::ContainerID, id::Counter, Change, Lamport, Op, OpContent, OpType, Timestamp, ID,
};
/// OpProxy represents a slice of an Op
pub struct OpProxy<'a> {
change: &'a Change,
op: &'a Op,
/// slice range of the op, op[slice_range]
slice_range: Range<u32>,
slice_range: Range<Counter>,
}
impl PartialEq for OpProxy<'_> {
@ -43,20 +45,21 @@ impl Ord for OpProxy<'_> {
}
impl<'a> OpProxy<'a> {
pub fn new(change: &'a Change, op: &'a Op, range: Option<Range<u32>>) -> Self {
pub fn new(change: &'a Change, op: &'a Op, range: Option<Range<Counter>>) -> Self {
OpProxy {
change,
op,
slice_range: if let Some(range) = range {
range
} else {
0..op.len() as u32
0..op.len() as Counter
},
}
}
pub fn lamport(&self) -> Lamport {
self.change.lamport + self.op.id.counter - self.change.id.counter + self.slice_range.start
self.change.lamport + self.op.id.counter as Lamport - self.change.id.counter as Lamport
+ self.slice_range.start as Lamport
}
pub fn id(&self) -> ID {
@ -74,7 +77,7 @@ impl<'a> OpProxy<'a> {
self.op
}
pub fn slice_range(&self) -> &Range<u32> {
pub fn slice_range(&self) -> &Range<Counter> {
&self.slice_range
}

View file

@ -15,6 +15,28 @@ impl CounterSpan {
CounterSpan { from, to }
}
#[inline]
pub fn from_inclusive(from: Counter, to: Counter) -> Self {
if from <= to {
CounterSpan { from, to: to + 1 }
} else {
CounterSpan { from, to: to - 1 }
}
}
#[inline]
pub fn reverse(&mut self) {
if self.from == self.to {
return;
}
if self.from < self.to {
(self.from, self.to) = (self.to - 1, self.from - 1);
} else {
(self.from, self.to) = (self.to + 1, self.from + 1);
}
}
#[inline]
pub fn min(&self) -> Counter {
if self.from < self.to {
@ -94,13 +116,30 @@ impl Mergable for CounterSpan {
}
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct IdSpan {
pub client_id: ClientID,
pub counter: CounterSpan,
}
impl std::fmt::Debug for IdSpan {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(
format!(
"IdSpan{{{:?}, {} ~ {}}}",
self.client_id, self.counter.from, self.counter.to
)
.as_str(),
)
}
}
impl IdSpan {
#[inline]
pub fn new(client_id: ClientID, counter: CounterSpan) -> Self {
IdSpan { client_id, counter }
}
#[inline]
pub fn min(&self) -> Counter {
self.counter.min()