feat: basic snapshot encoding

This commit is contained in:
Zixuan Chen 2023-07-17 12:27:11 +08:00
parent 6983a2b00c
commit e993f1b155
27 changed files with 1117 additions and 499 deletions

6
Cargo.lock generated
View file

@ -989,6 +989,7 @@ dependencies = [
"arref",
"bench-utils",
"color-backtrace",
"compact-bytes",
"crdt-list",
"criterion 0.4.0",
"ctor",
@ -1039,6 +1040,11 @@ dependencies = [
[[package]]
name = "loro-preload"
version = "0.1.0"
dependencies = [
"loro-common",
"serde",
"serde_columnar",
]
[[package]]
name = "loro-wasm"

View file

@ -86,6 +86,7 @@ impl CompactBytes {
self.bytes.as_bytes()
}
// PERF: may use iterator to speed up
pub fn alloc_advance(&mut self, bytes: &[u8]) -> Vec<Range<usize>> {
let mut ans: Vec<Range<usize>> = vec![];
// this push will try to merge the new range with the last range in the ans

View file

@ -2,6 +2,8 @@ use thiserror::Error;
use crate::{PeerID, ID};
pub type LoroResult<T> = Result<T, LoroError>;
#[derive(Error, Debug)]
pub enum LoroError {
#[error("Context's client_id({found:?}) does not match Container's client_id({expected:?})")]

View file

@ -7,7 +7,7 @@ mod id;
mod span;
mod value;
pub use error::LoroError;
pub use error::{LoroError, LoroResult};
pub use span::*;
pub use value::LoroValue;
pub type PeerID = u64;

View file

@ -56,6 +56,8 @@ impl Hash for LoroValue {
}
}
impl Eq for LoroValue {}
impl<S: Into<String>, M> From<HashMap<S, LoroValue, M>> for LoroValue {
fn from(map: HashMap<S, LoroValue, M>) -> Self {
let mut new_map = FxHashMap::default();
@ -237,274 +239,6 @@ pub mod wasm {
})
}
}
// impl From<Diff> for JsValue {
// fn from(value: Diff) -> Self {
// // create a obj
// let obj = Object::new();
// match value {
// Diff::List(list) => {
// // set type as "list"
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("list"),
// )
// .unwrap();
// // set diff as array
// let arr = Array::new_with_length(list.len() as u32);
// for (i, v) in list.iter().enumerate() {
// arr.set(i as u32, JsValue::from(v.clone()));
// }
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("diff"),
// &arr.into_js_result().unwrap(),
// )
// .unwrap();
// }
// Diff::Text(text) => {
// // set type as "text"
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("text"),
// )
// .unwrap();
// // set diff as array
// js_sys::Reflect::set(&obj, &JsValue::from_str("diff"), &text.into()).unwrap();
// }
// Diff::Map(map) => {
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("map"),
// )
// .unwrap();
// js_sys::Reflect::set(&obj, &JsValue::from_str("diff"), &map.into()).unwrap();
// }
// Diff::NewMap(map) => {
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("map"),
// )
// .unwrap();
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("diff"),
// &serde_wasm_bindgen::to_value(&map).unwrap(),
// )
// .unwrap();
// }
// Diff::SeqRaw(text) => {
// // set type as "text"
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("seq_raw"),
// )
// .unwrap();
// // set diff as array
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("diff"),
// &serde_wasm_bindgen::to_value(&text).unwrap(),
// )
// .unwrap();
// }
// };
// // convert object to js value
// obj.into_js_result().unwrap()
// }
// }
// impl From<MapDiff<LoroValue>> for JsValue {
// fn from(value: MapDiff<LoroValue>) -> Self {
// let obj = Object::new();
// {
// let added = Object::new();
// for (key, value) in value.added.iter() {
// js_sys::Reflect::set(
// &added,
// &JsValue::from_str(key),
// &JsValue::from(value.clone()),
// )
// .unwrap();
// }
// js_sys::Reflect::set(&obj, &JsValue::from_str("added"), &added).unwrap();
// }
// {
// let deleted = Object::new();
// for (key, value) in value.deleted.iter() {
// js_sys::Reflect::set(
// &deleted,
// &JsValue::from_str(key),
// &JsValue::from(value.clone()),
// )
// .unwrap();
// }
// js_sys::Reflect::set(&obj, &JsValue::from_str("deleted"), &deleted).unwrap();
// }
// {
// let updated = Object::new();
// for (key, pair) in value.updated.iter() {
// let pair_obj = Object::new();
// js_sys::Reflect::set(
// &pair_obj,
// &JsValue::from_str("old"),
// &pair.old.clone().into(),
// )
// .unwrap();
// js_sys::Reflect::set(
// &pair_obj,
// &JsValue::from_str("new"),
// &pair.new.clone().into(),
// )
// .unwrap();
// js_sys::Reflect::set(
// &updated,
// &JsValue::from_str(key),
// &pair_obj.into_js_result().unwrap(),
// )
// .unwrap();
// }
// js_sys::Reflect::set(&obj, &JsValue::from_str("updated"), &updated).unwrap();
// }
// obj.into_js_result().unwrap()
// }
// }
// impl From<Delta<String, Utf16Meta>> for JsValue {
// fn from(value: Delta<String, Utf16Meta>) -> Self {
// let arr = Array::new_with_length(value.len() as u32);
// for (i, v) in value.iter().enumerate() {
// arr.set(i as u32, JsValue::from(v.clone()));
// }
// arr.into_js_result().unwrap()
// }
// }
// impl From<DeltaItem<String, Utf16Meta>> for JsValue {
// fn from(value: DeltaItem<String, Utf16Meta>) -> Self {
// let obj = Object::new();
// match value {
// DeltaItem::Retain { len: _len, meta } => {
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("retain"),
// )
// .unwrap();
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("len"),
// &JsValue::from_f64(meta.utf16_len.unwrap() as f64),
// )
// .unwrap();
// }
// DeltaItem::Insert { value, .. } => {
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("insert"),
// )
// .unwrap();
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("value"),
// &JsValue::from_str(value.as_str()),
// )
// .unwrap();
// }
// DeltaItem::Delete { len: _len, meta } => {
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("delete"),
// )
// .unwrap();
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("len"),
// &JsValue::from_f64(meta.utf16_len.unwrap() as f64),
// )
// .unwrap();
// }
// }
// obj.into_js_result().unwrap()
// }
// }
// impl From<DeltaItem<Vec<LoroValue>, ()>> for JsValue {
// fn from(value: DeltaItem<Vec<LoroValue>, ()>) -> Self {
// let obj = Object::new();
// match value {
// DeltaItem::Retain { len, .. } => {
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("retain"),
// )
// .unwrap();
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("len"),
// &JsValue::from_f64(len as f64),
// )
// .unwrap();
// }
// DeltaItem::Insert { value, .. } => {
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("insert"),
// )
// .unwrap();
// let arr = Array::new_with_length(value.len() as u32);
// for (i, v) in value.into_iter().enumerate() {
// arr.set(i as u32, convert(v));
// }
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("value"),
// &arr.into_js_result().unwrap(),
// )
// .unwrap();
// }
// DeltaItem::Delete { len, .. } => {
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("type"),
// &JsValue::from_str("delete"),
// )
// .unwrap();
// js_sys::Reflect::set(
// &obj,
// &JsValue::from_str("len"),
// &JsValue::from_f64(len as f64),
// )
// .unwrap();
// }
// }
// obj.into_js_result().unwrap()
// }
// }
}
impl Serialize for LoroValue {

View file

@ -37,6 +37,7 @@ enum_dispatch = "0.3.11"
im = "15.1.0"
jumprope = { version = "1.1.2", features = ["wchar_conversion"] }
generic-btree = "0.3.1"
compact-bytes = { path = "../compact-bytes" }
[dev-dependencies]
serde_json = "1.0.87"

View file

@ -141,7 +141,7 @@ impl ListContainer {
let id = store.next_id();
let op = Op::new(
id,
InnerContent::List(InnerListOp::new_del(pos, len)),
InnerContent::List(InnerListOp::new_del(pos, len as isize)),
self.idx,
);
store.append_local_ops(&[op]);

View file

@ -43,13 +43,28 @@ impl<'a> ListOp<'a> {
}
impl InnerListOp {
pub fn new_del(pos: usize, len: usize) -> Self {
pub fn new_del(pos: usize, len: isize) -> Self {
assert!(len != 0);
Self::Delete(DeleteSpan {
pos: pos as isize,
len: len as isize,
len,
})
}
pub fn new_unknown(pos: usize, len: usize) -> Self {
assert!(len != 0);
Self::Insert {
slice: SliceRange::new_unknown(len as u32),
pos,
}
}
pub fn new_insert(slice: Range<u32>, pos: usize) -> Self {
Self::Insert {
slice: SliceRange(slice),
pos,
}
}
}
impl HasLength for DeleteSpan {

View file

@ -13,6 +13,7 @@ pub struct MapSet {
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct InnerMapSet {
pub(crate) key: InternalString,
// FIXME: how to set None?
pub(crate) value: u32,
}

View file

@ -51,6 +51,7 @@ mod container_idx {
pub(crate) const TYPE_MASK: u32 = 0b1111 << 28;
pub(crate) const INDEX_MASK: u32 = !Self::TYPE_MASK;
#[allow(unused)]
pub(crate) fn get_type(self) -> ContainerType {
match (self.0 & Self::TYPE_MASK) >> 28 {
0 => ContainerType::Map,

View file

@ -155,7 +155,7 @@ impl TextContainer {
let id = store.next_id();
let op = Op::new(
id,
InnerContent::List(InnerListOp::new_del(pos, len)),
InnerContent::List(InnerListOp::new_del(pos, len as isize)),
self.idx,
);
store.append_local_ops(&[op]);

View file

@ -1,7 +1,7 @@
//! [LogStore] stores all the [Change]s and [Op]s. It's also a [DAG][crate::dag];
//!
//!
mod encoding;
pub(crate) mod encoding;
mod import;
mod iter;

View file

@ -19,9 +19,9 @@ use super::RemoteClientChanges;
pub(crate) use encode_updates::encode_oplog_updates;
// TODO: Test this threshold
const UPDATE_ENCODE_THRESHOLD: usize = 512;
const MAGIC_BYTES: [u8; 4] = [0x6c, 0x6f, 0x72, 0x6f];
const ENCODE_SCHEMA_VERSION: u8 = 0;
pub(crate) const UPDATE_ENCODE_THRESHOLD: usize = 512;
pub(crate) const MAGIC_BYTES: [u8; 4] = [0x6c, 0x6f, 0x72, 0x6f];
pub(crate) const ENCODE_SCHEMA_VERSION: u8 = 0;
pub enum EncodeMode {
Auto(VersionVector),
Updates(VersionVector),
@ -30,7 +30,7 @@ pub enum EncodeMode {
}
impl EncodeMode {
fn to_byte(&self) -> u8 {
pub fn to_byte(&self) -> u8 {
match self {
EncodeMode::Auto(_) => unreachable!(),
EncodeMode::Updates(_) => 0,
@ -40,7 +40,7 @@ impl EncodeMode {
}
}
enum ConcreteEncodeMode {
pub enum ConcreteEncodeMode {
Updates = 0,
RleUpdates = 1,
Snapshot = 2,

View file

@ -85,6 +85,14 @@ impl SharedArena {
}
}
pub fn alloc_str_fast(&self, bytes: &[u8]) {
let mut text_lock = self.text.lock().unwrap();
let utf16_len = count_utf16_chars(bytes);
self.text_utf16_len
.fetch_add(utf16_len, std::sync::atomic::Ordering::SeqCst);
text_lock.push_slice(bytes);
}
pub fn utf16_len(&self) -> usize {
self.text_utf16_len
.load(std::sync::atomic::Ordering::SeqCst)
@ -132,6 +140,14 @@ impl SharedArena {
self.inner_convert_op(content, counter, container)
}
pub fn is_empty(&self) -> bool {
self.container_idx_to_id.lock().unwrap().is_empty()
&& self.container_id_to_idx.lock().unwrap().is_empty()
&& self.text.lock().unwrap().is_empty()
&& self.values.lock().unwrap().is_empty()
&& self.parents.lock().unwrap().is_empty()
}
fn inner_convert_op(
&self,
content: RawOpContent<'_>,
@ -210,4 +226,18 @@ impl SharedArena {
pub fn export_containers(&self) -> Vec<ContainerID> {
self.container_idx_to_id.lock().unwrap().clone()
}
pub fn export_parents(&self) -> Vec<Option<ContainerIdx>> {
let parents = self.parents.lock().unwrap();
let containers = self.container_idx_to_id.lock().unwrap();
containers
.iter()
.enumerate()
.map(|(x, id)| {
let idx = ContainerIdx::from_index_and_type(x as u32, id.container_type());
let parent_idx = parents.get(&idx)?;
*parent_idx
})
.collect()
}
}

View file

@ -1,176 +0,0 @@
#![allow(warnings)]
use fxhash::FxHashMap;
use serde::{Deserialize, Serialize};
use serde_columnar::{columnar, to_vec};
use crate::{
change::Timestamp,
container::ContainerID,
id::{Counter, PeerID},
InternalString, LoroError, LoroValue,
};
use super::oplog::OpLog;
type Containers = Vec<ContainerID>;
type ClientIdx = u32;
type Clients = Vec<PeerID>;
#[columnar(ser, de)]
#[derive(Debug, Serialize, Deserialize)]
pub(super) struct SnapshotEncoded {
#[columnar(type = "vec")]
pub(crate) changes: Vec<ChangeEncoding>,
#[columnar(type = "vec")]
ops: Vec<SnapshotOpEncoding>,
#[columnar(type = "vec")]
deps: Vec<DepsEncoding>,
clients: Clients,
containers: Containers,
bytes: Vec<u8>,
keys: Vec<InternalString>,
values: Vec<LoroValue>,
}
#[columnar(vec, ser, de)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChangeEncoding {
#[columnar(strategy = "Rle", original_type = "u32")]
pub(super) client_idx: ClientIdx,
#[columnar(strategy = "DeltaRle", original_type = "i64")]
pub(super) timestamp: Timestamp,
pub(super) op_len: u32,
/// The length of deps that exclude the dep on the same client
#[columnar(strategy = "Rle")]
pub(super) deps_len: u32,
/// Whether the change has a dep on the same client.
/// It can save lots of space by using this field instead of [`DepsEncoding`]
#[columnar(strategy = "BoolRle")]
pub(super) dep_on_self: bool,
}
#[columnar(vec, ser, de)]
#[derive(Debug, Clone, Serialize, Deserialize)]
struct SnapshotOpEncoding {
#[columnar(strategy = "Rle", original_type = "usize")]
container: usize,
/// key index or insert/delete pos
#[columnar(strategy = "DeltaRle")]
prop: usize,
// list range start or del len or map value index, maybe negative
value: i64,
// List: the length of content when inserting, -2 when the inserted content is unknown, and -1 when deleting.
// Map: always -1
#[columnar(strategy = "Rle")]
value2: i64,
}
#[columnar(vec, ser, de)]
#[derive(Debug, Copy, Clone, Serialize, Deserialize)]
pub(super) struct DepsEncoding {
#[columnar(strategy = "Rle", original_type = "u32")]
pub(super) client_idx: ClientIdx,
#[columnar(strategy = "DeltaRle", original_type = "i32")]
pub(super) counter: Counter,
}
impl DepsEncoding {
pub(super) fn new(client_idx: ClientIdx, counter: Counter) -> Self {
Self {
client_idx,
counter,
}
}
}
// pub(super) fn encode_snapshot(store: &OpLog, gc: bool) -> Result<Vec<u8>, LoroError> {
// let mut client_id_to_idx: FxHashMap<PeerID, ClientIdx> = FxHashMap::default();
// let mut clients = Vec::with_capacity(store.changes.len());
// let mut change_num = 0;
// for (key, changes) in store.changes.iter() {
// client_id_to_idx.insert(*key, clients.len() as ClientIdx);
// clients.push(*key);
// change_num += changes.merged_len();
// }
// let containers = store.arena.export_containers();
// // During a transaction, we may create some containers which are deleted later. And these containers also need a unique ContainerIdx.
// // So when we encode snapshot, we need to sort the containers by ContainerIdx and change the `container` of ops to the index of containers.
// // An empty store decodes the snapshot, it will create these containers in a sequence of natural numbers so that containers and ops can correspond one-to-one
// let container_to_new_idx: FxHashMap<_, _> = containers
// .iter()
// .enumerate()
// .map(|(i, id)| (id, i))
// .collect();
// let mut changes = Vec::with_capacity(change_num);
// let mut ops = Vec::with_capacity(change_num);
// let mut keys = Vec::new();
// let mut key_to_idx = FxHashMap::default();
// let mut deps = Vec::with_capacity(change_num);
// for (client_idx, (_, change_vec)) in store.changes.iter().enumerate() {
// for change in change_vec.iter() {
// let client_id = change.id.peer;
// let mut op_len = 0;
// let mut deps_len = 0;
// let mut dep_on_self = false;
// for dep in change.deps.iter() {
// // the first change will encode the self-client deps
// if dep.peer == client_id {
// dep_on_self = true;
// } else {
// deps.push(DepsEncoding::new(
// *client_id_to_idx.get(&dep.peer).unwrap(),
// dep.counter,
// ));
// deps_len += 1;
// }
// }
// for op in change.ops.iter() {
// let container_idx = op.container;
// let container_id = store.reg.get_id(container_idx).unwrap();
// let container = store.reg.get(container_id).unwrap();
// let new_ops = container
// .upgrade()
// .unwrap()
// .try_lock()
// .unwrap()
// .to_export_snapshot(&op.content, gc);
// let new_idx = *container_to_new_idx.get(container_id).unwrap();
// op_len += new_ops.len();
// for op_content in new_ops {
// let (prop, value, value2) =
// convert_inner_content(&op_content, &mut key_to_idx, &mut keys);
// ops.push(SnapshotOpEncoding {
// container: new_idx,
// prop,
// value,
// value2,
// });
// }
// }
// changes.push(ChangeEncoding {
// client_idx: client_idx as ClientIdx,
// timestamp: change.timestamp,
// deps_len,
// dep_on_self,
// op_len: op_len as u32,
// });
// }
// }
// todo!("compress bytes");
// let encoded = SnapshotEncoded {
// changes,
// ops,
// deps,
// clients,
// containers,
// keys,
// bytes: todo!(),
// values: todo!(),
// };
// to_vec(&encoded).map_err(|e| LoroError::DecodeError(e.to_string().into()))
// }

View file

@ -1,10 +1,18 @@
use std::sync::{Arc, Mutex};
use std::{
borrow::Cow,
sync::{Arc, Mutex},
};
use crate::{id::PeerID, LoroError, VersionVector};
use crate::{
id::PeerID,
log_store::encoding::{ConcreteEncodeMode, ENCODE_SCHEMA_VERSION, MAGIC_BYTES},
EncodeMode, LoroError, VersionVector,
};
use super::{
diff_calc::DiffCalculator,
oplog::OpLog,
snapshot_encode::{decode_app_snapshot, encode_app_snapshot},
state::{AppState, AppStateDiff, ContainerStateDiff},
txn::Transaction,
};
@ -15,6 +23,8 @@ use super::{
/// - [OpLog] encompasses all operations, signifying the document history.
/// - [AppState] signifies the current document state.
///
/// They will share a [super::arena::SharedArena]
///
/// # Detached Mode
///
/// This mode enables separate usage of [OpLog] and [AppState].
@ -33,6 +43,7 @@ pub struct LoroApp {
impl LoroApp {
pub fn new() -> Self {
let oplog = OpLog::new();
// share arena
let state = Arc::new(Mutex::new(AppState::new(&oplog)));
Self {
oplog: Arc::new(Mutex::new(oplog)),
@ -41,6 +52,18 @@ impl LoroApp {
}
}
pub fn is_empty(&self) -> bool {
self.oplog.lock().unwrap().is_empty() && self.state.lock().unwrap().is_empty()
}
pub(super) fn from_existing(oplog: OpLog, state: AppState) -> Self {
Self {
oplog: Arc::new(Mutex::new(oplog)),
state: Arc::new(Mutex::new(state)),
detached: false,
}
}
pub fn set_peer_id(&self, peer: PeerID) {
self.state.lock().unwrap().peer = peer;
}
@ -63,8 +86,8 @@ impl LoroApp {
Some(oplog.frontiers()),
);
state.apply_diff(AppStateDiff {
diff: &diff,
frontiers: oplog.frontiers(),
diff: (&diff).into(),
frontiers: Cow::Borrowed(oplog.frontiers()),
});
}
@ -89,6 +112,19 @@ impl LoroApp {
}
pub fn import(&self, bytes: &[u8]) -> Result<Vec<ContainerStateDiff>, LoroError> {
let (magic_bytes, input) = bytes.split_at(4);
let magic_bytes: [u8; 4] = magic_bytes.try_into().unwrap();
if magic_bytes != MAGIC_BYTES {
return Err(LoroError::DecodeError("Invalid header bytes".into()));
}
let (version, input) = input.split_at(1);
if version != [ENCODE_SCHEMA_VERSION] {
return Err(LoroError::DecodeError("Invalid version".into()));
}
let mode: ConcreteEncodeMode = input[0].into();
match mode {
ConcreteEncodeMode::Updates | ConcreteEncodeMode::RleUpdates => {
// TODO: need to throw error if state is in transaction
debug_log::group!("import");
let mut oplog = self.oplog.lock().unwrap();
@ -106,17 +142,40 @@ impl LoroApp {
if !self.detached {
let mut state = self.state.lock().unwrap();
state.apply_diff(AppStateDiff {
diff: &diff,
frontiers: oplog.frontiers(),
diff: (&diff).into(),
frontiers: Cow::Borrowed(oplog.frontiers()),
});
}
debug_log::group_end!();
Ok(diff)
}
ConcreteEncodeMode::Snapshot => {
if self.is_empty() {
decode_app_snapshot(self, &input[1..])?;
Ok(vec![]) // TODO: return diff
} else {
let app = LoroApp::new();
decode_app_snapshot(&app, &input[1..])?;
dbg!(&app.oplog.lock().unwrap());
dbg!(&app.state.lock().unwrap().states);
let oplog = self.oplog.lock().unwrap();
let updates = app.export_from(oplog.vv());
drop(oplog);
self.import(&updates)
}
}
}
}
pub fn encode_snapshot(&self) -> Vec<u8> {
unimplemented!();
pub fn export_snapshot(&self) -> Vec<u8> {
let version = ENCODE_SCHEMA_VERSION;
let mut ans = Vec::from(MAGIC_BYTES);
// maybe u8 is enough
ans.push(version);
ans.push((EncodeMode::Snapshot).to_byte());
ans.extend(encode_app_snapshot(self));
ans
}
pub(crate) fn vv_cloned(&self) -> VersionVector {

View file

@ -3,9 +3,9 @@
pub(super) mod arena;
mod container;
pub(super) mod diff_calc;
pub mod encoding;
pub mod handler;
pub mod loro;
pub mod oplog;
pub mod snapshot_encode;
mod state;
pub mod txn;

View file

@ -95,6 +95,21 @@ impl OpLog {
}
}
pub fn new_with_arena(arena: SharedArena) -> Self {
Self {
dag: AppDag::default(),
arena,
changes: ClientChanges::default(),
next_lamport: 0,
latest_timestamp: Timestamp::default(),
pending_changes: Default::default(),
}
}
pub fn is_empty(&self) -> bool {
self.dag.map.is_empty() && self.arena.is_empty()
}
/// Import a change.
///
/// Pending changes that haven't been applied to the dag.
@ -117,7 +132,7 @@ impl OpLog {
self.next_lamport = self.next_lamport.max(change.lamport_end());
self.latest_timestamp = self.latest_timestamp.max(change.timestamp);
self.dag.frontiers.retain_non_included(&change.deps);
self.dag.frontiers.filter_included(change.id);
self.dag.frontiers.filter_peer(change.id.peer);
self.dag.frontiers.push(change.id_last());
let vv = self.dag.frontiers_to_im_vv(&change.deps);
let len = change.content_len();
@ -442,6 +457,10 @@ impl OpLog {
}),
)
}
pub(crate) fn len_changes(&self) -> usize {
self.changes.values().map(|x| x.len()).sum()
}
}
impl Default for OpLog {

View file

@ -0,0 +1,745 @@
#![allow(warnings)]
use std::{borrow::Cow, collections::VecDeque, mem::take};
use compact_bytes::CompactBytes;
use debug_log::debug_dbg;
use fxhash::{FxHashMap, FxHashSet};
use loro_common::{HasLamport, ID};
use loro_preload::{
CommonArena, EncodedAppState, EncodedContainerState, FinalPhase, MapEntry, TempArena,
};
use postcard::to_allocvec;
use rle::{HasLength, RleVec};
use serde::{Deserialize, Serialize};
use serde_columnar::{columnar, to_vec};
use smallvec::smallvec;
use crate::{
change::{Change, Lamport, Timestamp},
container::{
list::list_op::InnerListOp, map::InnerMapSet, registry::ContainerIdx, ContainerID,
},
delta::{MapDelta, MapValue},
id::{Counter, PeerID},
log_store::encoding::{ENCODE_SCHEMA_VERSION, MAGIC_BYTES},
op::{InnerContent, Op},
version::Frontiers,
EncodeMode, InternalString, LoroError, LoroValue,
};
use super::{
arena::SharedArena,
loro::LoroApp,
oplog::OpLog,
state::{AppState, AppStateDiff, ListState, MapState, State, TextState},
};
type Containers = Vec<ContainerID>;
type ClientIdx = u32;
type Clients = Vec<PeerID>;
#[columnar(ser, de)]
#[derive(Debug, Serialize, Deserialize)]
pub(super) struct OplogEncoded {
#[columnar(type = "vec")]
pub(crate) changes: Vec<EncodedChange>,
#[columnar(type = "vec")]
ops: Vec<EncodedSnapshotOp>,
#[columnar(type = "vec")]
deps: Vec<DepsEncoding>,
}
impl OplogEncoded {
fn decode(data: &FinalPhase) -> Result<Self, LoroError> {
serde_columnar::from_bytes(&data.oplog)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}
}
#[columnar(vec, ser, de)]
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EncodedChange {
#[columnar(strategy = "Rle", original_type = "u32")]
pub(super) peer_idx: ClientIdx,
#[columnar(strategy = "DeltaRle", original_type = "i64")]
pub(super) timestamp: Timestamp,
pub(super) op_len: u32,
/// The length of deps that exclude the dep on the same client
#[columnar(strategy = "Rle")]
pub(super) deps_len: u32,
/// Whether the change has a dep on the same client.
/// It can save lots of space by using this field instead of [`DepsEncoding`]
#[columnar(strategy = "BoolRle")]
pub(super) dep_on_self: bool,
}
#[columnar(vec, ser, de)]
#[derive(Debug, Clone, Serialize, Deserialize)]
struct EncodedSnapshotOp {
#[columnar(strategy = "Rle", original_type = "usize")]
container: u32,
/// key index or insert/delete pos
#[columnar(strategy = "DeltaRle")]
prop: usize,
// List range start | del len | map value index
// This value can be negative
value: i64,
// List: the length of content when inserting. -2 when the inserted content is unknown. -1 when it's a deletion
// Map: always -1
#[columnar(strategy = "Rle")]
value2: i64,
}
enum SnapshotOp {
ListInsert { pos: usize, start: u32, end: u32 },
ListDelete { pos: usize, len: isize },
ListUnknown { pos: usize, len: usize },
Map { key: usize, value: u32 },
}
impl EncodedSnapshotOp {
pub fn get_list(&self) -> SnapshotOp {
if self.value2 == -1 {
SnapshotOp::ListDelete {
pos: self.prop as usize,
len: self.value as isize,
}
} else if self.value2 == -2 {
SnapshotOp::ListUnknown {
pos: self.prop,
len: self.value as usize,
}
} else {
SnapshotOp::ListInsert {
pos: self.prop,
start: self.value as u32,
end: self.value2 as u32,
}
}
}
pub fn get_map(&self) -> SnapshotOp {
SnapshotOp::Map {
key: self.prop,
value: self.value as u32,
}
}
pub fn from(value: SnapshotOp, container: u32) -> Self {
match value {
SnapshotOp::ListInsert { pos, start, end } => Self {
container,
prop: pos,
value: start as i64,
value2: end as i64,
},
SnapshotOp::ListDelete { pos, len } => Self {
container,
prop: pos as usize,
value: len as i64,
value2: -1,
},
SnapshotOp::ListUnknown { pos, len } => Self {
container,
prop: pos,
value: len as i64,
value2: -2,
},
SnapshotOp::Map { key, value } => Self {
container,
prop: key,
value: value as i64,
value2: -1,
},
}
}
}
#[columnar(vec, ser, de)]
#[derive(Debug, Copy, Clone, Serialize, Deserialize)]
pub(super) struct DepsEncoding {
#[columnar(strategy = "Rle", original_type = "u32")]
pub(super) peer_idx: ClientIdx,
#[columnar(strategy = "DeltaRle", original_type = "i32")]
pub(super) counter: Counter,
}
impl DepsEncoding {
pub(super) fn new(client_idx: ClientIdx, counter: Counter) -> Self {
Self {
peer_idx: client_idx,
counter,
}
}
}
pub fn encode_app_snapshot(app: &LoroApp) -> Vec<u8> {
let pre_encoded_state = preprocess_app_state(&app.app_state().lock().unwrap());
encode_oplog(&app.oplog().lock().unwrap(), Some(pre_encoded_state)).encode()
}
pub fn decode_app_snapshot(app: &LoroApp, bytes: &[u8]) -> Result<(), LoroError> {
assert!(app.is_empty());
let data = FinalPhase::decode(bytes)?;
let mut app_state = app.app_state().lock().unwrap();
decode_state(&mut app_state, &data)?;
let arena = app_state.arena.clone();
let oplog = decode_oplog(&mut app.oplog().lock().unwrap(), &data, Some(arena))?;
Ok(())
}
#[derive(Default)]
struct PreEncodedState {
common: CommonArena<'static>,
arena: TempArena<'static>,
key_lookup: FxHashMap<InternalString, usize>,
value_lookup: FxHashMap<LoroValue, usize>,
peer_lookup: FxHashMap<PeerID, usize>,
app_state: EncodedAppState,
}
fn preprocess_app_state(app_state: &AppState) -> PreEncodedState {
let mut peers = Vec::new();
let mut peer_lookup = FxHashMap::default();
let mut bytes = Vec::new();
let mut keywords = Vec::new();
let mut values = Vec::new();
let mut key_lookup = FxHashMap::default();
let mut value_lookup = FxHashMap::default();
let mut encoded = EncodedAppState {
frontiers: app_state.frontiers.iter().cloned().collect(),
states: Vec::new(),
parents: app_state
.arena
.export_parents()
.into_iter()
.map(|x| x.map(|x| x.to_index()))
.collect(),
};
let mut record_key = |key: &InternalString| {
if let Some(idx) = key_lookup.get(key) {
return *idx;
}
keywords.push(key.clone());
key_lookup
.entry(key.clone())
.or_insert_with(|| keywords.len() - 1);
keywords.len() - 1
};
let mut record_value = |value: &LoroValue| {
if let Some(idx) = value_lookup.get(value) {
return *idx;
}
let idx = values.len();
values.push(value.clone());
value_lookup.entry(value.clone()).or_insert_with(|| idx);
idx
};
let mut record_peer = |peer: PeerID| {
if let Some(idx) = peer_lookup.get(&peer) {
return *idx as u32;
}
peers.push(peer);
peer_lookup.entry(peer).or_insert_with(|| peers.len() - 1);
peers.len() as u32 - 1
};
for (container_idx, state) in app_state.states.iter() {
match state {
State::ListState(list) => {
let v = list.iter().map(|value| record_value(&value)).collect();
encoded.states.push(EncodedContainerState::List((v)))
}
State::MapState(map) => {
let v = map
.iter()
.map(|(key, value)| {
let key = record_key(key);
let peer = value;
MapEntry {
key,
value: if let Some(value) = &value.value {
record_value(value) + 1
} else {
0
},
peer: record_peer(value.lamport.1),
counter: value.counter as u32,
lamport: value.lamport(),
}
})
.collect();
encoded.states.push(EncodedContainerState::Map(v))
}
State::TextState(text) => {
for span in text.iter() {
bytes.extend_from_slice(span.as_bytes());
}
encoded
.states
.push(EncodedContainerState::Text { len: text.len() })
}
}
}
let mut common = CommonArena {
peer_ids: peers.into(),
container_ids: app_state.arena.export_containers(),
};
let mut arena = TempArena {
text: bytes.into(),
keywords,
values,
};
PreEncodedState {
common,
arena,
key_lookup,
value_lookup,
peer_lookup,
app_state: encoded,
}
}
fn encode_oplog(oplog: &OpLog, state_ref: Option<PreEncodedState>) -> FinalPhase<'static> {
let state_ref = state_ref.unwrap_or_default();
let PreEncodedState {
mut common,
arena,
mut key_lookup,
mut value_lookup,
mut peer_lookup,
mut app_state,
} = state_ref;
if common.container_ids.is_empty() {
common.container_ids = oplog.arena.export_containers();
}
let mut bytes = CompactBytes::new();
bytes.append(&arena.text);
let mut extra_keys = Vec::new();
let mut extra_values = Vec::new();
let mut record_key = |key: &InternalString| {
if let Some(idx) = key_lookup.get(key) {
return *idx;
}
let idx = extra_keys.len() + arena.keywords.len();
extra_keys.push(key.clone());
key_lookup.entry(key.clone()).or_insert_with(|| idx);
idx
};
let mut record_value = |value: &LoroValue| {
if let Some(idx) = value_lookup.get(value) {
return *idx;
}
let idx = extra_values.len() + arena.values.len();
extra_values.push(value.clone());
value_lookup.entry(value.clone()).or_insert_with(|| idx);
idx
};
let Cow::Owned(mut peers) = take(&mut common.peer_ids) else {unreachable!()};
let mut record_peer = |peer: PeerID| {
if let Some(idx) = peer_lookup.get(&peer) {
return *idx as u32;
}
peers.push(peer);
peer_lookup.entry(peer).or_insert_with(|| peers.len() - 1);
peers.len() as u32 - 1
};
let mut record_str = |s: &[u8], mut pos: usize, container_idx: u32| {
let slices = bytes.alloc_advance(s);
slices
.into_iter()
.map(|range| {
let ans = SnapshotOp::ListInsert {
pos,
start: range.start as u32,
end: range.end as u32,
};
pos += range.len();
EncodedSnapshotOp::from(ans, container_idx)
})
.collect::<Vec<_>>()
};
// Add all changes
let mut changes: Vec<&Change> = Vec::with_capacity(oplog.len_changes());
for (peer, peer_changes) in oplog.changes.iter() {
for change in peer_changes.iter() {
changes.push(change);
}
}
// Sort changes by lamport. So it's in causal order
changes.sort_by_key(|x| x.lamport());
let mut encoded_changes = Vec::with_capacity(changes.len());
let mut encoded_ops: Vec<EncodedSnapshotOp> =
Vec::with_capacity(changes.iter().map(|x| x.ops.len()).sum());
let mut deps = Vec::with_capacity(changes.iter().map(|x| x.deps.len()).sum());
for change in changes {
let peer_idx = record_peer(change.id.peer);
let mut lamport = change.lamport();
let op_index_start = encoded_ops.len();
for op in change.ops.iter() {
let counter = op.counter;
match &op.content {
InnerContent::List(list) => match list {
InnerListOp::Insert { slice, pos } => {
if slice.is_unknown() {
encoded_ops.push(EncodedSnapshotOp::from(
SnapshotOp::ListUnknown {
pos: *pos as usize,
len: slice.atom_len(),
},
op.container.to_index(),
));
} else {
let slice = oplog
.arena
.slice_bytes(slice.0.start as usize..slice.0.end as usize);
encoded_ops.extend(record_str(
&slice,
*pos as usize,
op.container.to_index(),
));
}
}
InnerListOp::Delete(del) => {
encoded_ops.push(EncodedSnapshotOp::from(
SnapshotOp::ListDelete {
pos: del.pos as usize,
len: del.len,
},
op.container.to_index(),
));
}
},
InnerContent::Map(map) => {
let key = record_key(&map.key);
let value = oplog.arena.get_value(map.value as usize);
// FIXME: delete in map
let value = if let Some(value) = value {
record_value(&value) + 1
} else {
0
};
encoded_ops.push(EncodedSnapshotOp::from(
SnapshotOp::Map {
key,
value: value as u32,
},
op.container.to_index(),
));
}
}
lamport += op.atom_len() as Lamport;
}
let op_len = encoded_ops.len() - op_index_start;
let mut dep_on_self = false;
let dep_start = deps.len();
for dep in change.deps.iter() {
if dep.peer == change.id.peer {
dep_on_self = true;
} else {
let peer_idx = record_peer(dep.peer);
deps.push(DepsEncoding {
peer_idx,
counter: dep.counter,
});
}
}
debug_dbg!(&change);
let deps_len = deps.len() - dep_start;
encoded_changes.push(EncodedChange {
peer_idx,
timestamp: change.timestamp,
op_len: op_len as u32,
deps_len: deps_len as u32,
dep_on_self,
})
}
common.peer_ids = Cow::Owned(peers);
let bytes = bytes.take();
let mut extra_text = (&bytes[arena.text.len()..]).to_vec();
let ans = FinalPhase {
common: Cow::Owned(common.encode()),
app_state: Cow::Owned(app_state.encode()),
state_arena: Cow::Owned(arena.encode()),
additional_arena: Cow::Owned(
TempArena {
text: Cow::Owned(extra_text),
keywords: extra_keys,
values: extra_values,
}
.encode(),
),
oplog: Cow::Owned(
OplogEncoded {
changes: encoded_changes,
ops: encoded_ops,
deps,
}
.encode(),
),
};
ans
}
pub fn decode_oplog(
oplog: &mut OpLog,
data: &FinalPhase,
arena: Option<SharedArena>,
) -> Result<(), LoroError> {
let arena = arena.unwrap_or_else(SharedArena::default);
oplog.arena = arena.clone();
let state_arena = TempArena::decode_state_arena(&data)?;
let mut extra_arena = TempArena::decode_additional_arena(&data)?;
arena.alloc_str_fast(&*state_arena.text);
arena.alloc_str_fast(&*extra_arena.text);
arena.alloc_values(state_arena.values.into_iter());
arena.alloc_values(extra_arena.values.into_iter());
let mut keys = state_arena.keywords;
keys.append(&mut extra_arena.keywords);
let common = CommonArena::decode(&data)?;
let oplog_data = OplogEncoded::decode(data)?;
let mut changes = Vec::new();
let mut dep_iter = oplog_data.deps.iter();
let mut op_iter = oplog_data.ops.iter();
let mut counters = FxHashMap::default();
for change in oplog_data.changes.iter() {
let peer_idx = change.peer_idx as usize;
let peer_id = common.peer_ids[peer_idx];
let timestamp = change.timestamp;
let deps_len = change.deps_len;
let dep_on_self = change.dep_on_self;
let mut ops = RleVec::new();
let counter_mut = counters.entry(peer_idx).or_insert(0);
let start_counter = *counter_mut;
// calc ops
let mut total_len = 0;
for _ in 0..change.op_len {
// calc op
let id = ID::new(peer_id, *counter_mut);
let encoded_op = op_iter.next().unwrap();
let container = common.container_ids[encoded_op.container as usize].clone();
let container_idx = arena.register_container(&container);
let op = match container.container_type() {
loro_common::ContainerType::Text | loro_common::ContainerType::List => {
let op = encoded_op.get_list();
match op {
SnapshotOp::ListInsert { start, end, pos } => Op::new(
id,
InnerContent::List(InnerListOp::new_insert(start..end, pos)),
container_idx,
),
SnapshotOp::ListDelete { len, pos } => Op::new(
id,
InnerContent::List(InnerListOp::new_del(pos, len)),
container_idx,
),
SnapshotOp::ListUnknown { len, pos } => Op::new(
id,
InnerContent::List(InnerListOp::new_unknown(pos, len)),
container_idx,
),
SnapshotOp::Map { .. } => {
unreachable!()
}
}
}
loro_common::ContainerType::Map => {
let op = encoded_op.get_map();
match op {
SnapshotOp::Map { key, value } => Op::new(
id,
InnerContent::Map(InnerMapSet {
key: (&*keys[key]).into(),
value,
}),
container_idx,
),
_ => unreachable!(),
}
}
};
*counter_mut += op.content_len() as Counter;
ops.push(op);
}
// calc deps
let mut deps: smallvec::SmallVec<[ID; 2]> = smallvec![];
if dep_on_self {
assert!(start_counter > 0);
deps.push(ID::new(peer_id, start_counter - 1));
}
for _ in 0..deps_len {
let dep = dep_iter.next().unwrap();
let peer = common.peer_ids[dep.peer_idx as usize];
deps.push(ID::new(peer, dep.counter));
}
changes.push(Change {
deps: Frontiers::from(deps),
ops,
timestamp,
id: ID::new(peer_id, start_counter),
lamport: 0, // calculate lamport when importing
});
}
// we assume changes are already sorted by lamport already
for mut change in changes {
let lamport = oplog.dag.frontiers_to_next_lamport(&change.deps);
change.lamport = lamport;
oplog.import_local_change(change);
}
Ok(())
}
pub fn decode_state(app_state: &mut AppState, data: &FinalPhase) -> Result<(), LoroError> {
assert!(app_state.is_empty());
let arena = app_state.arena.clone();
let common = CommonArena::decode(&data)?;
let state_arena = TempArena::decode_state_arena(&data)?;
let encoded_app_state = EncodedAppState::decode(&data)?;
app_state.frontiers = Frontiers::from(&encoded_app_state.frontiers);
let mut text_index = 0;
// this part should be moved to encode.rs in preload
for ((id, parent), state) in common
.container_ids
.iter()
.zip(encoded_app_state.parents.iter())
.zip(encoded_app_state.states.iter())
{
let idx = arena.register_container(id);
let parent_idx =
(*parent).map(|x| ContainerIdx::from_index_and_type(x, state.container_type()));
arena.set_parent(idx, parent_idx);
match state {
loro_preload::EncodedContainerState::Text { len } => {
let index = text_index;
app_state.set_state(
idx,
State::TextState(TextState::from_str(
std::str::from_utf8(&state_arena.text[index..index + len]).unwrap(),
)),
);
text_index += len;
}
loro_preload::EncodedContainerState::Map(map_data) => {
let mut map = MapState::new();
for entry in map_data.iter() {
map.insert(
InternalString::from(&*state_arena.keywords[entry.key]),
MapValue {
counter: entry.counter as Counter,
value: if entry.value == 0 {
None
} else {
Some(state_arena.values[entry.value as usize - 1].clone())
},
lamport: (entry.lamport, common.peer_ids[entry.peer as usize]),
},
)
}
app_state.set_state(idx, State::MapState(map));
}
loro_preload::EncodedContainerState::List(list_data) => {
let mut list = ListState::new();
list.insert_batch(0, list_data.iter().map(|&x| state_arena.values[x].clone()));
app_state.set_state(idx, State::ListState(list));
}
}
}
Ok(())
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_snapshot_encode() {
use std::borrow::Cow;
dbg!(FinalPhase {
common: Cow::Owned(vec![0, 1, 2, 253, 254, 255]),
app_state: Cow::Owned(vec![255]),
state_arena: Cow::Owned(vec![255]),
additional_arena: Cow::Owned(vec![255]),
oplog: Cow::Owned(vec![255]),
}
.encode());
}
#[test]
fn text_edit_snapshot_encode_decode() {
// test import snapshot directly
let mut app = LoroApp::new();
let mut txn = app.txn().unwrap();
let text = txn.get_text("id").unwrap();
text.insert(&mut txn, 0, "hello");
txn.commit();
let snapshot = app.export_snapshot();
let mut app2 = LoroApp::new();
app2.import(&snapshot);
let actual = app2
.app_state()
.lock()
.unwrap()
.get_text("id")
.unwrap()
.to_string();
assert_eq!("hello", &actual);
dbg!(&app.oplog().lock().unwrap());
dbg!(&app2.oplog().lock().unwrap());
// test import snapshot to a LoroApp that is already changed
let mut txn = app2.txn().unwrap();
let text = txn.get_text("id").unwrap();
text.insert(&mut txn, 2, " ");
txn.commit();
debug_log::group!("app2 export");
let snapshot = app2.export_snapshot();
debug_log::group_end!();
debug_log::group!("import snapshot to a LoroApp that is already changed");
app.import(&snapshot).unwrap();
debug_log::group_end!();
let actual = app
.app_state()
.lock()
.unwrap()
.get_text("id")
.unwrap()
.to_string();
assert_eq!("he llo", &actual);
}
}

View file

@ -1,3 +1,5 @@
use std::borrow::Cow;
use enum_as_inner::EnumAsInner;
use enum_dispatch::enum_dispatch;
use fxhash::{FxHashMap, FxHashSet};
@ -17,9 +19,9 @@ mod list_state;
mod map_state;
mod text_state;
use list_state::ListState;
use map_state::MapState;
use text_state::TextState;
pub(crate) use list_state::ListState;
pub(crate) use map_state::MapState;
pub(crate) use text_state::TextState;
use super::{arena::SharedArena, oplog::OpLog};
@ -29,7 +31,7 @@ pub struct AppState {
pub(super) next_counter: Counter,
pub(super) frontiers: Frontiers,
states: FxHashMap<ContainerIdx, State>,
pub(super) states: FxHashMap<ContainerIdx, State>,
pub(super) arena: SharedArena,
in_txn: bool,
@ -53,7 +55,7 @@ pub trait ContainerState: Clone {
#[allow(clippy::enum_variant_names)]
#[enum_dispatch(ContainerState)]
#[derive(EnumAsInner, Clone)]
#[derive(EnumAsInner, Clone, Debug)]
pub enum State {
ListState,
MapState,
@ -74,15 +76,15 @@ impl State {
}
}
#[derive(Debug)]
#[derive(Debug, Clone)]
pub struct ContainerStateDiff {
pub idx: ContainerIdx,
pub diff: Diff,
}
pub struct AppStateDiff<'a> {
pub(crate) diff: &'a [ContainerStateDiff],
pub(crate) frontiers: &'a Frontiers,
pub(crate) diff: Cow<'a, [ContainerStateDiff]>,
pub(crate) frontiers: Cow<'a, Frontiers>,
}
impl AppState {
@ -100,6 +102,20 @@ impl AppState {
}
}
pub fn new_from_arena(arena: SharedArena) -> Self {
let peer = SystemRandom::new().next_u64();
// TODO: maybe we should switch to certain version in oplog
Self {
peer,
arena,
next_counter: 0,
frontiers: Frontiers::default(),
states: FxHashMap::default(),
in_txn: false,
changed_in_txn: FxHashSet::default(),
}
}
pub fn set_peer_id(&mut self, peer: PeerID) {
self.peer = peer;
}
@ -109,7 +125,7 @@ impl AppState {
panic!("apply_diff should not be called in a transaction");
}
for diff in diff {
for diff in diff.iter() {
let state = self.states.entry(diff.idx).or_insert_with(|| {
let id = self.arena.get_container_id(diff.idx).unwrap();
create_state(id.container_type())
@ -123,7 +139,7 @@ impl AppState {
state.apply_diff(&diff.diff, &self.arena);
}
self.frontiers = frontiers.clone();
self.frontiers = frontiers.into_owned();
}
pub fn apply_local_op(&mut self, op: RawOp) {
@ -173,6 +189,10 @@ impl AppState {
.unwrap_or(LoroValue::Null)
}
pub(super) fn set_state(&mut self, idx: ContainerIdx, state: State) {
assert!(self.states.insert(idx, state).is_none(), "overiding states")
}
/// id can be a str, ContainerID, or ContainerIdRaw.
/// if it's str it will use Root container, which will not be None
pub fn get_text<I: Into<ContainerIdRaw>>(&mut self, id: I) -> Option<&text_state::TextState> {
@ -199,6 +219,10 @@ impl AppState {
pub(super) fn is_in_txn(&self) -> bool {
self.in_txn
}
pub fn is_empty(&self) -> bool {
!self.in_txn && self.states.is_empty() && self.arena.is_empty()
}
}
pub fn create_state(kind: ContainerType) -> State {

View file

@ -19,6 +19,7 @@ use super::ContainerState;
type ContainerMapping = Arc<Mutex<FxHashMap<ContainerID, ArenaIndex>>>;
#[derive(Debug)]
pub struct ListState {
list: BTree<ListImpl>,
in_txn: bool,
@ -37,6 +38,7 @@ impl Clone for ListState {
}
}
#[derive(Debug)]
enum UndoItem {
Insert { index: usize, len: usize },
Delete { index: usize, value: LoroValue },
@ -210,6 +212,10 @@ impl ListState {
}
}
pub fn iter(&self) -> impl Iterator<Item = &LoroValue> {
self.list.iter()
}
pub fn len(&self) -> usize {
*self.list.root_cache() as usize
}

View file

@ -12,7 +12,7 @@ use crate::{
use super::ContainerState;
#[derive(Clone)]
#[derive(Debug, Clone)]
pub struct MapState {
map: FxHashMap<InternalString, MapValue>,
in_txn: bool,
@ -100,6 +100,10 @@ impl MapState {
}
}
pub fn iter(&self) -> impl Iterator<Item = (&InternalString, &MapValue)> {
self.map.iter()
}
fn len(&self) -> usize {
self.map.len()
}

View file

@ -13,7 +13,7 @@ use crate::{
use super::ContainerState;
#[derive(Default)]
#[derive(Debug, Default)]
pub struct TextState {
pub(crate) rope: JumpRope,
in_txn: bool,
@ -32,6 +32,7 @@ impl Clone for TextState {
}
}
#[derive(Debug)]
enum UndoItem {
Insert {
index: u32,
@ -160,6 +161,12 @@ impl TextState {
}
}
pub fn from_str(s: &str) -> Self {
let mut state = Self::new();
state.insert(0, s);
state
}
pub fn insert(&mut self, pos: usize, s: &str) {
if self.in_txn {
self.record_insert(pos, s.len());
@ -223,6 +230,14 @@ impl TextState {
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn to_string(&self) -> String {
self.rope.to_string()
}
pub fn iter(&self) -> impl Iterator<Item = &str> {
self.rope.slice_substrings(0..self.len())
}
}
#[cfg(test)]

View file

@ -133,8 +133,8 @@ impl Frontiers {
self.retain(|id| !other.contains(id));
}
pub fn filter_included(&mut self, end_id: ID) {
self.retain(|id| id.peer != end_id.peer || id.counter >= end_id.counter);
pub fn filter_peer(&mut self, peer: PeerID) {
self.retain(|id| id.peer != peer);
}
}
@ -164,6 +164,13 @@ impl From<&[ID]> for Frontiers {
}
}
impl From<&Vec<ID>> for Frontiers {
fn from(value: &Vec<ID>) -> Self {
let ids: &[ID] = value;
Self(ids.into())
}
}
impl From<Vec<ID>> for Frontiers {
fn from(value: Vec<ID>) -> Self {
Self(value.into())

View file

@ -6,3 +6,6 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
serde = {version="1.0.171", features=["derive"]}
serde_columnar = "0.2.5"
loro-common = {path="../loro-common"}

View file

@ -0,0 +1,118 @@
use loro_common::{ContainerID, InternalString, LoroError, LoroValue, ID};
use serde_columnar::{columnar, to_vec};
use std::borrow::Cow;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FinalPhase<'a> {
pub common: Cow<'a, [u8]>, // -> CommonArena
pub app_state: Cow<'a, [u8]>, // -> EncodedAppState
pub state_arena: Cow<'a, [u8]>, // -> TempArena<'a>
pub additional_arena: Cow<'a, [u8]>, // -> TempArena<'a>,抛弃这部分则不能回溯历史
pub oplog: Cow<'a, [u8]>, // -> OpLog. Can be ignored if we only need state
}
impl<'a> FinalPhase<'a> {
#[inline(always)]
pub fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}
#[inline(always)]
pub fn decode(bytes: &'a [u8]) -> Result<Self, LoroError> {
serde_columnar::from_bytes(bytes)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
}
#[columnar(ser, de)]
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct CommonArena<'a> {
pub peer_ids: Cow<'a, [u64]>,
pub container_ids: Vec<ContainerID>,
}
impl<'a> CommonArena<'a> {
pub fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}
pub fn decode(data: &'a FinalPhase) -> Result<Self, LoroError> {
serde_columnar::from_bytes(&data.common)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct EncodedAppState {
pub frontiers: Vec<ID>,
/// container states
pub states: Vec<EncodedContainerState>,
/// containers' parents
pub parents: Vec<Option<u32>>,
}
impl EncodedAppState {
pub fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}
pub fn decode(data: &FinalPhase) -> Result<Self, LoroError> {
serde_columnar::from_bytes(&data.app_state)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum EncodedContainerState {
Text { len: usize },
Map(Vec<MapEntry>),
List(Vec<usize>),
}
impl EncodedContainerState {
pub fn container_type(&self) -> loro_common::ContainerType {
match self {
EncodedContainerState::Text { .. } => loro_common::ContainerType::Text,
EncodedContainerState::Map(_) => loro_common::ContainerType::Map,
EncodedContainerState::List(_) => loro_common::ContainerType::List,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MapEntry {
pub key: usize, // index to the state arena
pub value: usize, // index to the state arena + 1. 0 means None
pub peer: u32, // index to the peer ids
pub counter: u32, // index to the peer ids
pub lamport: u32,
}
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct TempArena<'a> {
pub text: Cow<'a, [u8]>,
pub keywords: Vec<InternalString>,
pub values: Vec<LoroValue>,
}
impl<'a> TempArena<'a> {
pub fn encode(&self) -> Vec<u8> {
to_vec(self).unwrap()
}
pub fn decode_state_arena(data: &'a FinalPhase) -> Result<Self, LoroError> {
serde_columnar::from_bytes(&data.state_arena)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
pub fn decode_additional_arena(data: &'a FinalPhase) -> Result<Self, LoroError> {
serde_columnar::from_bytes(&data.additional_arena)
.map_err(|e| LoroError::DecodeError(e.to_string().into_boxed_str()))
}
}
/// returns a deep LoroValue that wraps the whole state
pub fn decode_state(_bytes: &[u8]) -> LoroValue {
unimplemented!()
}

View file

@ -1,3 +1,6 @@
mod encode;
pub use encode::*;
pub fn add(left: usize, right: usize) -> usize {
left + right
}