fix: avoid import issue on rich text doc

by avoid splitting doc on a version that splits the stylestart and styleend
This commit is contained in:
Zixuan Chen 2024-09-05 17:48:59 +08:00
parent 6e8048bc28
commit c04da30572
No known key found for this signature in database
3 changed files with 110 additions and 49 deletions

View file

@ -822,7 +822,8 @@ pub(crate) struct RichtextDiffCalculator {
enum RichtextCalcMode {
Crdt {
tracker: Box<RichtextTracker>,
styles: Vec<StyleOp>,
/// (op, end_pos)
styles: Vec<(StyleOp, usize)>,
start_vv: VersionVector,
},
Linear {
@ -1011,14 +1012,17 @@ impl DiffCalculatorTrait for RichtextDiffCalculator {
} => {
debug_assert!(start < end, "start: {}, end: {}", start, end);
let style_id = styles.len();
styles.push(StyleOp {
lamport: op.lamport(),
peer: op.peer,
cnt: op.id_start().counter,
key: key.clone(),
value: value.clone(),
info: *info,
});
styles.push((
StyleOp {
lamport: op.lamport(),
peer: op.peer,
cnt: op.id_start().counter,
key: key.clone(),
value: value.clone(),
info: *info,
},
*end as usize,
));
tracker.insert(
op.id_full(),
*start as usize,
@ -1027,42 +1031,61 @@ impl DiffCalculatorTrait for RichtextDiffCalculator {
}
crate::container::list::list_op::InnerListOp::StyleEnd => {
let id = op.id();
// PERF: this can be sped up by caching the last style op
let start_op = oplog.get_op_that_includes(op.id().inc(-1)).unwrap();
let InnerListOp::StyleStart {
start: _,
end,
key,
value,
info,
} = start_op.content.as_list().unwrap()
else {
unreachable!()
};
let style_id = match styles.last() {
Some(last)
if last.peer == id.peer && last.cnt == id.counter - 1 =>
{
styles.len() - 1
}
_ => {
styles.push(StyleOp {
if let Some(pos) = styles.iter().rev().position(|(op, _pos)| {
op.peer == id.peer && op.cnt == id.counter - 1
}) {
let style_id = styles.len() - pos - 1;
let (_start_op, end_pos) = &styles[style_id];
tracker.insert(
op.id_full(),
// need to shift 1 because we insert the start style anchor before this pos
*end_pos + 1,
RichtextChunk::new_style_anchor(
style_id as u32,
AnchorType::End,
),
);
} else {
let Some(start_op) = oplog.get_op_that_includes(op.id().inc(-1))
else {
// Checkout on richtext that export at a gc version that split
// start style op and end style op apart. Won't fix for now.
// It's such a rare case...
unimplemented!("Unhandled checkout case")
};
let InnerListOp::StyleStart {
start: _,
end,
key,
value,
info,
} = start_op.content.as_list().unwrap()
else {
unreachable!()
};
styles.push((
StyleOp {
lamport: op.lamport() - 1,
peer: id.peer,
cnt: id.counter - 1,
key: key.clone(),
value: value.clone(),
info: *info,
});
styles.len() - 1
}
};
tracker.insert(
op.id_full(),
// need to shift 1 because we insert the start style anchor before this pos
*end as usize + 1,
RichtextChunk::new_style_anchor(style_id as u32, AnchorType::End),
);
},
*end as usize,
));
let style_id = styles.len() - 1;
tracker.insert(
op.id_full(),
// need to shift 1 because we insert the start style anchor before this pos
*end as usize + 1,
RichtextChunk::new_style_anchor(
style_id as u32,
AnchorType::End,
),
);
}
}
},
_ => unreachable!(),
@ -1116,7 +1139,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator {
RichtextChunkValue::StyleAnchor { id, anchor_type } => {
delta.push_insert(
RichtextStateChunk::Style {
style: Arc::new(styles[id as usize].clone()),
style: Arc::new(styles[id as usize].0.clone()),
anchor_type,
},
(),

View file

@ -2,9 +2,10 @@ use rle::HasLength;
use std::collections::BTreeSet;
use loro_common::LoroResult;
use tracing::{debug, trace};
use tracing::debug;
use crate::{
container::list::list_op::InnerListOp,
dag::DagUtils,
encoding::fast_snapshot::{Snapshot, _encode_snapshot},
state::container_store::FRONTIERS_KEY,
@ -25,10 +26,8 @@ pub(crate) fn export_gc_snapshot<W: std::io::Write>(
) -> LoroResult<Frontiers> {
assert!(!doc.is_detached());
let oplog = doc.oplog().lock().unwrap();
trace!("start_from: {:?}", &start_from);
let start_from = calc_actual_start(&oplog, start_from);
let start_from = calc_gc_doc_start(&oplog, start_from);
let mut start_vv = oplog.dag().frontiers_to_vv(&start_from).unwrap();
trace!("start_from: {:?}", &start_from);
for id in start_from.iter() {
// we need to include the ops in start_from, this can make things easier
start_vv.insert(id.peer, id.counter);
@ -75,13 +74,28 @@ pub(crate) fn export_gc_snapshot<W: std::io::Write>(
Ok(start_from)
}
/// The real start version should be the lca of the given one and the latest frontiers
fn calc_actual_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers {
/// Calculates optimal starting version for the trimmed doc
///
/// It should be the LCA of the user given version and the latest version.
/// Otherwise, users cannot replay the history from the initial version till the latest version.
fn calc_gc_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers {
// start is the real start frontiers
let (start, _) = oplog
let (mut start, _) = oplog
.dag()
.find_common_ancestor(frontiers, oplog.frontiers());
for id in start.iter_mut() {
if let Some(op) = oplog.get_op_that_includes(*id) {
if let crate::op::InnerContent::List(InnerListOp::StyleStart { .. }) = &op.content {
// StyleStart and StyleEnd operations must be kept together in the GC snapshot.
// Splitting them could lead to an weird document state that cannot be
// properly encoded. To ensure they stay together, we advance the frontier by
// one step to include both operations.
let cur_f = oplog.frontiers();
oplog.dag.find_common_ancestor(&start, cur_f).0
// > Id.counter + 1 is guaranteed to be the StyleEnd Op
id.counter += 1;
}
}
}
start
}

View file

@ -147,3 +147,27 @@ fn export_snapshot_on_a_trimmed_doc() -> anyhow::Result<()> {
assert_eq!(new_doc.get_deep_value(), doc.get_deep_value());
Ok(())
}
#[test]
fn test_richtext_gc() -> anyhow::Result<()> {
let doc = LoroDoc::new();
doc.set_peer_id(1)?;
let text = doc.get_text("text");
text.insert(0, "1")?; // 0
text.insert(0, "2")?; // 1
text.insert(0, "3")?; // 2
text.mark(0..2, "bold", "value")?; // 3, 4
doc.commit();
text.insert(3, "456")?; // 5, 6, 7
let bytes = doc.export(loro::ExportMode::GcSnapshot(&Frontiers::from(ID::new(
1, 3,
))));
let new_doc = LoroDoc::new();
new_doc.import(&bytes)?;
new_doc.checkout(&Frontiers::from(ID::new(1, 4)))?;
assert_eq!(new_doc.get_text("text").to_string(), "321");
new_doc.checkout_to_latest();
assert_eq!(new_doc.get_text("text").to_string(), "321456");
Ok(())
}