fix: avoid import issue on rich text doc

by avoid splitting doc on a version that splits the stylestart and styleend
2025-01-22 12:57:20 +00:00 · 2024-09-05 17:48:59 +08:00 · 2024-09-05 17:48:59 +08:00 · c04da30572
commit c04da30572
parent 6e8048bc28
3 changed files with 110 additions and 49 deletions
--- a/crates/loro-internal/src/diff_calc.rs
+++ b/crates/loro-internal/src/diff_calc.rs
@ -822,7 +822,8 @@ pub(crate) struct RichtextDiffCalculator {
 enum RichtextCalcMode {
    Crdt {
        tracker: Box<RichtextTracker>,
-        styles: Vec<StyleOp>,
+        /// (op, end_pos)
+        styles: Vec<(StyleOp, usize)>,
        start_vv: VersionVector,
    },
    Linear {
@ -1011,14 +1012,17 @@ impl DiffCalculatorTrait for RichtextDiffCalculator {
                        } => {
                            debug_assert!(start < end, "start: {}, end: {}", start, end);
                            let style_id = styles.len();
-                            styles.push(StyleOp {
-                                lamport: op.lamport(),
-                                peer: op.peer,
-                                cnt: op.id_start().counter,
-                                key: key.clone(),
-                                value: value.clone(),
-                                info: *info,
-                            });
+                            styles.push((
+                                StyleOp {
+                                    lamport: op.lamport(),
+                                    peer: op.peer,
+                                    cnt: op.id_start().counter,
+                                    key: key.clone(),
+                                    value: value.clone(),
+                                    info: *info,
+                                },
+                                *end as usize,
+                            ));
                            tracker.insert(
                                op.id_full(),
                                *start as usize,
@ -1027,42 +1031,61 @@ impl DiffCalculatorTrait for RichtextDiffCalculator {
                        }
                        crate::container::list::list_op::InnerListOp::StyleEnd => {
                            let id = op.id();
-                            // PERF: this can be sped up by caching the last style op
-                            let start_op = oplog.get_op_that_includes(op.id().inc(-1)).unwrap();
-                            let InnerListOp::StyleStart {
-                                start: _,
-                                end,
-                                key,
-                                value,
-                                info,
-                            } = start_op.content.as_list().unwrap()
-                            else {
-                                unreachable!()
-                            };
-                            let style_id = match styles.last() {
-                                Some(last)
-                                    if last.peer == id.peer && last.cnt == id.counter - 1 =>
-                                {
-                                    styles.len() - 1
-                                }
-                                _ => {
-                                    styles.push(StyleOp {
+                            if let Some(pos) = styles.iter().rev().position(|(op, _pos)| {
+                                op.peer == id.peer && op.cnt == id.counter - 1
+                            }) {
+                                let style_id = styles.len() - pos - 1;
+                                let (_start_op, end_pos) = &styles[style_id];
+                                tracker.insert(
+                                    op.id_full(),
+                                    // need to shift 1 because we insert the start style anchor before this pos
+                                    *end_pos + 1,
+                                    RichtextChunk::new_style_anchor(
+                                        style_id as u32,
+                                        AnchorType::End,
+                                    ),
+                                );
+                            } else {
+                                let Some(start_op) = oplog.get_op_that_includes(op.id().inc(-1))
+                                else {
+                                    // Checkout on richtext that export at a gc version that split
+                                    // start style op and end style op apart. Won't fix for now.
+                                    // It's such a rare case...
+                                    unimplemented!("Unhandled checkout case")
+                                };
+                                let InnerListOp::StyleStart {
+                                    start: _,
+                                    end,
+                                    key,
+                                    value,
+                                    info,
+                                } = start_op.content.as_list().unwrap()
+                                else {
+                                    unreachable!()
+                                };
+
+                                styles.push((
+                                    StyleOp {
                                        lamport: op.lamport() - 1,
                                        peer: id.peer,
                                        cnt: id.counter - 1,
                                        key: key.clone(),
                                        value: value.clone(),
                                        info: *info,
-                                    });
-                                    styles.len() - 1
-                                }
-                            };
-                            tracker.insert(
-                                op.id_full(),
-                                // need to shift 1 because we insert the start style anchor before this pos
-                                *end as usize + 1,
-                                RichtextChunk::new_style_anchor(style_id as u32, AnchorType::End),
-                            );
+                                    },
+                                    *end as usize,
+                                ));
+                                let style_id = styles.len() - 1;
+                                tracker.insert(
+                                    op.id_full(),
+                                    // need to shift 1 because we insert the start style anchor before this pos
+                                    *end as usize + 1,
+                                    RichtextChunk::new_style_anchor(
+                                        style_id as u32,
+                                        AnchorType::End,
+                                    ),
+                                );
+                            }
                        }
                    },
                    _ => unreachable!(),
@ -1116,7 +1139,7 @@ impl DiffCalculatorTrait for RichtextDiffCalculator {
                            RichtextChunkValue::StyleAnchor { id, anchor_type } => {
                                delta.push_insert(
                                    RichtextStateChunk::Style {
-                                        style: Arc::new(styles[id as usize].clone()),
+                                        style: Arc::new(styles[id as usize].0.clone()),
                                        anchor_type,
                                    },
                                    (),
--- a/crates/loro-internal/src/encoding/gc.rs
+++ b/crates/loro-internal/src/encoding/gc.rs
@ -2,9 +2,10 @@ use rle::HasLength;
 use std::collections::BTreeSet;

 use loro_common::LoroResult;
-use tracing::{debug, trace};
+use tracing::debug;

 use crate::{
+    container::list::list_op::InnerListOp,
    dag::DagUtils,
    encoding::fast_snapshot::{Snapshot, _encode_snapshot},
    state::container_store::FRONTIERS_KEY,
@ -25,10 +26,8 @@ pub(crate) fn export_gc_snapshot<W: std::io::Write>(
 ) -> LoroResult<Frontiers> {
    assert!(!doc.is_detached());
    let oplog = doc.oplog().lock().unwrap();
-    trace!("start_from: {:?}", &start_from);
-    let start_from = calc_actual_start(&oplog, start_from);
+    let start_from = calc_gc_doc_start(&oplog, start_from);
    let mut start_vv = oplog.dag().frontiers_to_vv(&start_from).unwrap();
-    trace!("start_from: {:?}", &start_from);
    for id in start_from.iter() {
        // we need to include the ops in start_from, this can make things easier
        start_vv.insert(id.peer, id.counter);
@ -75,13 +74,28 @@ pub(crate) fn export_gc_snapshot<W: std::io::Write>(
    Ok(start_from)
 }

-/// The real start version should be the lca of the given one and the latest frontiers
-fn calc_actual_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers {
+/// Calculates optimal starting version for the trimmed doc
+///
+/// It should be the LCA of the user given version and the latest version.
+/// Otherwise, users cannot replay the history from the initial version till the latest version.
+fn calc_gc_doc_start(oplog: &crate::OpLog, frontiers: &Frontiers) -> Frontiers {
    // start is the real start frontiers
-    let (start, _) = oplog
+    let (mut start, _) = oplog
        .dag()
        .find_common_ancestor(frontiers, oplog.frontiers());
+    for id in start.iter_mut() {
+        if let Some(op) = oplog.get_op_that_includes(*id) {
+            if let crate::op::InnerContent::List(InnerListOp::StyleStart { .. }) = &op.content {
+                // StyleStart and StyleEnd operations must be kept together in the GC snapshot.
+                // Splitting them could lead to an weird document state that cannot be
+                // properly encoded. To ensure they stay together, we advance the frontier by
+                // one step to include both operations.

-    let cur_f = oplog.frontiers();
-    oplog.dag.find_common_ancestor(&start, cur_f).0
+                // > Id.counter + 1 is guaranteed to be the StyleEnd Op
+                id.counter += 1;
+            }
+        }
+    }
+
+    start
 }
--- a/crates/loro/tests/integration_test/gc_test.rs
+++ b/crates/loro/tests/integration_test/gc_test.rs
@ -147,3 +147,27 @@ fn export_snapshot_on_a_trimmed_doc() -> anyhow::Result<()> {
    assert_eq!(new_doc.get_deep_value(), doc.get_deep_value());
    Ok(())
 }
+
+#[test]
+fn test_richtext_gc() -> anyhow::Result<()> {
+    let doc = LoroDoc::new();
+    doc.set_peer_id(1)?;
+    let text = doc.get_text("text");
+    text.insert(0, "1")?; // 0
+    text.insert(0, "2")?; // 1
+    text.insert(0, "3")?; // 2
+    text.mark(0..2, "bold", "value")?; // 3, 4
+    doc.commit();
+    text.insert(3, "456")?; // 5, 6, 7
+    let bytes = doc.export(loro::ExportMode::GcSnapshot(&Frontiers::from(ID::new(
+        1, 3,
+    ))));
+
+    let new_doc = LoroDoc::new();
+    new_doc.import(&bytes)?;
+    new_doc.checkout(&Frontiers::from(ID::new(1, 4)))?;
+    assert_eq!(new_doc.get_text("text").to_string(), "321");
+    new_doc.checkout_to_latest();
+    assert_eq!(new_doc.get_text("text").to_string(), "321456");
+    Ok(())
+}