diff --git a/.vscode/settings.json b/.vscode/settings.json index 648b1b5f..f3479022 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -53,7 +53,7 @@ "DEBUG": "*" }, "rust-analyzer.cargo.features": [ - // "test_utils", + "jsonpath", "counter" ], "editor.defaultFormatter": "rust-lang.rust-analyzer", diff --git a/Cargo.lock b/Cargo.lock index d6f67779..d22191fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1055,6 +1055,7 @@ dependencies = [ "either", "enum-as-inner 0.6.0", "generic-btree", + "loro-common 0.16.12", "loro-delta 0.16.12", "loro-internal 0.16.12", "loro-kv-store", @@ -1109,6 +1110,7 @@ dependencies = [ "nonmax", "serde", "serde_columnar", + "serde_json", "string_cache", "thiserror", "wasm-bindgen", diff --git a/crates/loro-common/Cargo.toml b/crates/loro-common/Cargo.toml index 04fc1b04..528a01d1 100644 --- a/crates/loro-common/Cargo.toml +++ b/crates/loro-common/Cargo.toml @@ -16,6 +16,7 @@ keywords = ["crdt", "local-first"] [dependencies] rle = { path = "../rle", version = "0.16.12", package = "loro-rle" } serde = { workspace = true } +serde_json = { workspace = true, optional=true } thiserror = "1.0.43" wasm-bindgen = { version = "=0.2.92", optional = true } fxhash = "0.2.1" diff --git a/crates/loro-common/src/value.rs b/crates/loro-common/src/value.rs index d1469820..c64f7d7f 100644 --- a/crates/loro-common/src/value.rs +++ b/crates/loro-common/src/value.rs @@ -56,9 +56,15 @@ impl LoroValue { } } - pub fn get_by_index(&self, index: usize) -> Option<&LoroValue> { + pub fn get_by_index(&self, index: isize) -> Option<&LoroValue> { match self { - LoroValue::List(list) => list.get(index), + LoroValue::List(list) => { + if index < 0 { + list.get(list.len() - (-index) as usize) + } else { + list.get(index as usize) + } + } _ => None, } } @@ -738,3 +744,60 @@ impl<'de> serde::de::Visitor<'de> for LoroValueEnumVisitor { pub fn to_value>(value: T) -> LoroValue { value.into() } + +#[cfg(feature = "serde_json")] +mod serde_json_impl { + use std::sync::Arc; + + use serde_json::{Number, Value}; + + use super::LoroValue; + + impl From for LoroValue { + fn from(value: Value) -> Self { + match value { + Value::Null => LoroValue::Null, + Value::Bool(b) => LoroValue::Bool(b), + Value::Number(n) => { + if let Some(i) = n.as_i64() { + LoroValue::I64(i) + } else { + LoroValue::Double(n.as_f64().unwrap()) + } + } + Value::String(s) => LoroValue::String(Arc::new(s)), + Value::Array(arr) => { + LoroValue::List(Arc::new(arr.into_iter().map(LoroValue::from).collect())) + } + Value::Object(obj) => LoroValue::Map(Arc::new( + obj.into_iter() + .map(|(k, v)| (k, LoroValue::from(v))) + .collect(), + )), + } + } + } + + use super::LORO_CONTAINER_ID_PREFIX; + impl From for Value { + fn from(value: LoroValue) -> Self { + match value { + LoroValue::Null => Value::Null, + LoroValue::Bool(b) => Value::Bool(b), + LoroValue::Double(d) => Value::Number(Number::from_f64(d).unwrap()), + LoroValue::I64(i) => Value::Number(Number::from(i)), + LoroValue::String(s) => Value::String(s.to_string()), + LoroValue::List(l) => Value::Array(l.iter().cloned().map(Value::from).collect()), + LoroValue::Map(m) => Value::Object( + m.iter() + .map(|(k, v)| (k.clone(), Value::from(v.clone()))) + .collect(), + ), + LoroValue::Container(id) => { + Value::String(format!("{}{}", LORO_CONTAINER_ID_PREFIX, id)) + } + LoroValue::Binary(b) => Value::Array(b.iter().copied().map(Value::from).collect()), + } + } + } +} diff --git a/crates/loro-internal/Cargo.toml b/crates/loro-internal/Cargo.toml index 8ce2b071..b1fb8475 100644 --- a/crates/loro-internal/Cargo.toml +++ b/crates/loro-internal/Cargo.toml @@ -84,6 +84,7 @@ wasm = ["wasm-bindgen", "js-sys", "serde-wasm-bindgen", "loro-common/wasm"] test_utils = ["arbitrary", "tabled"] # whether enable the counter container counter = ["loro-common/counter"] +jsonpath = [] [[bench]] name = "text_r" diff --git a/crates/loro-internal/src/jsonpath.rs b/crates/loro-internal/src/jsonpath.rs new file mode 100644 index 00000000..c5a49517 --- /dev/null +++ b/crates/loro-internal/src/jsonpath.rs @@ -0,0 +1,840 @@ +use loro_common::{ContainerID, LoroValue}; +use thiserror::Error; +use tracing::trace; + +use crate::handler::{ + Handler, ListHandler, MapHandler, MovableListHandler, TextHandler, TreeHandler, ValueOrHandler, +}; +use crate::loro::LoroDoc; +use std::ops::ControlFlow; + +#[derive(Error, Debug)] +pub enum JsonPathError { + #[error("Invalid JSONPath: {0}")] + InvalidJsonPath(String), + #[error("JSONPath evaluation error: {0}")] + EvaluationError(String), +} + +impl LoroDoc { + #[inline] + pub fn jsonpath(&self, jsonpath: &str) -> Result, JsonPathError> { + evaluate_jsonpath(self, jsonpath) + } +} + +// Define JSONPath tokens +enum JSONPathToken { + Root, + Child(String), + RecursiveDescend, + Wildcard, + Index(isize), + UnionIndex(Vec), + UnionKey(Vec), + Slice(Option, Option, Option), + Filter(Box Fn(&'a ValueOrHandler) -> bool>), +} + +use std::fmt; + +impl fmt::Debug for JSONPathToken { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + JSONPathToken::Root => write!(f, "Root"), + JSONPathToken::Child(s) => write!(f, "Child({})", s), + JSONPathToken::RecursiveDescend => write!(f, "RecursiveDescend"), + JSONPathToken::Wildcard => write!(f, "Wildcard"), + JSONPathToken::Index(i) => write!(f, "Index({})", i), + JSONPathToken::Slice(start, end, step) => { + write!(f, "Slice({:?}, {:?}, {:?})", start, end, step) + } + JSONPathToken::UnionIndex(indices) => write!(f, "UnionIndex({:?})", indices), + JSONPathToken::UnionKey(keys) => write!(f, "UnionKey({:?})", keys), + JSONPathToken::Filter(_) => write!(f, "Filter()"), + } + } +} + +impl PartialEq for JSONPathToken { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (JSONPathToken::Root, JSONPathToken::Root) => true, + (JSONPathToken::Child(a), JSONPathToken::Child(b)) => a == b, + (JSONPathToken::RecursiveDescend, JSONPathToken::RecursiveDescend) => true, + (JSONPathToken::Wildcard, JSONPathToken::Wildcard) => true, + (JSONPathToken::Index(a), JSONPathToken::Index(b)) => a == b, + (JSONPathToken::Slice(a1, a2, a3), JSONPathToken::Slice(b1, b2, b3)) => { + a1 == b1 && a2 == b2 && a3 == b3 + } + (JSONPathToken::Filter(_), JSONPathToken::Filter(_)) => { + // We can't compare functions for equality, so we'll consider all filters unequal + false + } + _ => false, + } + } +} + +// Parse JSONPath string into tokens +fn parse_jsonpath(path: &str) -> Result, JsonPathError> { + let mut tokens = Vec::new(); + let chars = path.chars().collect::>(); + let mut iter = chars.iter().peekable(); + + while let Some(&c) = iter.next() { + match c { + '$' => tokens.push(JSONPathToken::Root), + '.' => { + if iter.peek() == Some(&&'.') { + iter.next(); + tokens.push(JSONPathToken::RecursiveDescend); + } else if iter.peek() == Some(&&'*') { + iter.next(); + tokens.push(JSONPathToken::Wildcard); + } else { + let mut key = String::new(); + while let Some(&c) = iter.peek() { + if c.is_alphanumeric() || *c == '_' { + key.push(*c); + iter.next(); + } else { + break; + } + } + tokens.push(JSONPathToken::Child(key)); + } + } + '[' => { + // Handle array index, slice, filter, or wildcard + let mut content = String::new(); + let mut in_quotes = false; + while let Some(&c) = iter.next() { + if c == ']' && !in_quotes { + break; + } + if c == '\'' { + in_quotes = !in_quotes; + } + content.push(c); + } + + if content == "*" { + tokens.push(JSONPathToken::Wildcard); + } else if let Ok(index) = content.parse::() { + tokens.push(JSONPathToken::Index(index)); + } else if content.contains(':') { + let slice: Vec<&str> = content.split(':').collect(); + let start = slice.first().and_then(|s| s.parse().ok()); + let end = slice.get(1).and_then(|s| s.parse().ok()); + let step = slice.get(2).and_then(|s| s.parse().ok()).unwrap_or(1); + tokens.push(JSONPathToken::Slice(start, end, Some(step as isize))); + } else if content.starts_with('?') { + // let predicate = content[1..].to_string(); + tokens.push(JSONPathToken::Filter(Box::new(|_v| { + // let result = evaluate_predicate(predicate, v); + // result + unimplemented!("JSONPath filter not implemented") + }))); + } else if content.starts_with('\'') && content.ends_with('\'') { + // Handle quoted keys + tokens.push(JSONPathToken::Child( + content[1..content.len() - 1].to_string(), + )); + } else if let Some(ans) = try_parse_union_index(&content) { + tokens.push(JSONPathToken::UnionIndex(ans)); + } else if let Some(ans) = try_parse_union_key(&content) { + tokens.push(JSONPathToken::UnionKey(ans)); + } else { + return Err(JsonPathError::InvalidJsonPath(format!( + "Invalid array accessor: [{}]", + content + ))); + } + } + '*' => { + tokens.push(JSONPathToken::Wildcard); + } + c if c.is_alphabetic() => { + // Handle cases like "$.books.store[0]" where there's no dot before "books" + let mut key = String::new(); + key.push(c); + while let Some(&c) = iter.peek() { + if c.is_alphanumeric() || *c == '_' { + key.push(*c); + iter.next(); + } else { + break; + } + } + tokens.push(JSONPathToken::Child(key)); + } + _ => { + return Err(JsonPathError::InvalidJsonPath(format!( + "Unexpected character '{}' in JSONPath: {}", + c, path + ))) + } + } + } + + Ok(tokens) +} + +fn try_parse_union_key(content: &str) -> Option> { + let keys = content + .split(',') + .map(|s| { + let trimmed = s.trim(); + if trimmed.starts_with('\'') || trimmed.starts_with('"') { + let stripped = trimmed.trim_matches(|c| c == '\'' || c == '"'); + if stripped.chars().all(|c| c.is_alphanumeric() || c == '_') { + Some(stripped.to_string()) + } else { + None + } + } else { + None + } + }) + .collect::>>(); + keys +} + +fn try_parse_union_index(content: &str) -> Option> { + let indices = content + .split(',') + .map(|s| s.trim().parse().ok()) + .collect::>>(); + + indices +} + +// Evaluate JSONPath against a LoroDoc +pub fn evaluate_jsonpath( + doc: &dyn PathValue, + path: &str, +) -> Result, JsonPathError> { + let tokens = parse_jsonpath(path)?; + trace!("tokens: {:#?}", tokens); + let mut results = Vec::new(); + + // Start with the root + if let Some(JSONPathToken::Root) = tokens.first() { + evaluate_tokens(doc, &tokens[1..], &mut results); + } else { + return Err(JsonPathError::InvalidJsonPath( + "JSONPath must start with $".to_string(), + )); + } + + Ok(results) +} + +fn evaluate_tokens( + value: &dyn PathValue, + tokens: &[JSONPathToken], + results: &mut Vec, +) { + if tokens.is_empty() { + results.push(value.clone_this().unwrap()); + return; + } + + match &tokens[0] { + JSONPathToken::Child(key) => { + if let Some(child) = value.get_by_key(key) { + evaluate_tokens(&child, &tokens[1..], results); + } + } + JSONPathToken::RecursiveDescend => { + // Implement recursive descent + value.for_each_for_path(&mut |child| { + evaluate_tokens(&child, tokens, results); + ControlFlow::Continue(()) + }); + evaluate_tokens(value, &tokens[1..], results); + } + JSONPathToken::Wildcard => { + value.for_each_for_path(&mut |child| { + evaluate_tokens(&child, &tokens[1..], results); + ControlFlow::Continue(()) + }); + } + JSONPathToken::Index(index) => { + if let Some(child) = value.get_by_index(*index) { + evaluate_tokens(&child, &tokens[1..], results); + } + } + JSONPathToken::UnionIndex(indices) => { + for index in indices { + if let Some(child) = value.get_by_index(*index) { + evaluate_tokens(&child, &tokens[1..], results); + } + } + } + JSONPathToken::UnionKey(keys) => { + for key in keys { + if let Some(child) = value.get_by_key(key) { + evaluate_tokens(&child, &tokens[1..], results); + } + } + } + JSONPathToken::Slice(start, end, step) => { + let len = value.length_for_path() as isize; + let start = start.unwrap_or(0); + let start = if start < 0 { + (len + start).max(0).min(len) + } else { + start.max(0).min(len) + }; + + let end = end.unwrap_or(len); + let end = if end < 0 { + (len + end).max(0).min(len) + } else { + end.max(0).min(len) + }; + + let step = step.unwrap_or(1); + if step > 0 { + for i in (start..end).step_by(step as usize) { + if let Some(child) = value.get_by_index(i) { + evaluate_tokens(&child, &tokens[1..], results); + } + } + } else { + for i in (start..end).rev().step_by((-step) as usize) { + if let Some(child) = value.get_by_index(i) { + evaluate_tokens(&child, &tokens[1..], results); + } + } + } + } + JSONPathToken::Filter(predicate) => { + // Implement filter logic + value.for_each_for_path(&mut |child| { + if predicate(&child) { + evaluate_tokens(&child, &tokens[1..], results); + } + ControlFlow::Continue(()) + }); + } + JSONPathToken::Root => { + // Root should only appear at the beginning, which is handled in evaluate_jsonpath + panic!("Unexpected root token in path"); + } + } +} + +// Implement necessary trait bounds for PathValue +pub trait PathValue { + fn get_by_key(&self, key: &str) -> Option; + fn get_by_index(&self, index: isize) -> Option; + fn for_each_for_path(&self, f: &mut dyn FnMut(ValueOrHandler) -> ControlFlow<()>); + fn length_for_path(&self) -> usize; + fn get_child_by_id(&self, id: ContainerID) -> Option; + fn clone_this(&self) -> Result; +} + +// Implement PathValue for ValueOrHandler +impl PathValue for ValueOrHandler { + fn get_by_key(&self, key: &str) -> Option { + match self { + ValueOrHandler::Value(v) => v.get_by_key(key).cloned().map(ValueOrHandler::Value), + ValueOrHandler::Handler(h) => h.get_by_key(key), + } + } + + fn get_by_index(&self, index: isize) -> Option { + match self { + ValueOrHandler::Value(v) => v.get_by_index(index).cloned().map(ValueOrHandler::Value), + ValueOrHandler::Handler(h) => h.get_by_index(index), + } + } + + fn for_each_for_path(&self, f: &mut dyn FnMut(ValueOrHandler) -> ControlFlow<()>) { + match self { + ValueOrHandler::Value(v) => v.for_each_for_path(f), + ValueOrHandler::Handler(h) => h.for_each_for_path(f), + } + } + + fn length_for_path(&self) -> usize { + match self { + ValueOrHandler::Value(v) => v.length_for_path(), + ValueOrHandler::Handler(h) => h.length_for_path(), + } + } + + fn get_child_by_id(&self, id: ContainerID) -> Option { + match self { + ValueOrHandler::Handler(h) => h.get_child_by_id(id), + _ => None, + } + } + + fn clone_this(&self) -> Result { + match self { + ValueOrHandler::Value(v) => Ok(ValueOrHandler::Value(v.clone())), + ValueOrHandler::Handler(h) => Ok(ValueOrHandler::Handler(h.clone())), + } + } +} + +// Implement PathValue for LoroDoc +impl PathValue for LoroDoc { + fn get_by_key(&self, key: &str) -> Option { + self.get_by_str_path(key) + } + + fn get_by_index(&self, _index: isize) -> Option { + None // LoroDoc doesn't support index-based access + } + + fn for_each_for_path(&self, f: &mut dyn FnMut(ValueOrHandler) -> ControlFlow<()>) { + let arena = self.arena(); + for c in arena.root_containers() { + let cid = arena.idx_to_id(c).unwrap(); + let h = self.get_handler(cid); + if f(ValueOrHandler::Handler(h)) == ControlFlow::Break(()) { + break; + } + } + } + + fn length_for_path(&self) -> usize { + let state = self.app_state().lock().unwrap(); + state.arena.root_containers().len() + } + + fn get_child_by_id(&self, id: ContainerID) -> Option { + Some(self.get_handler(id)) + } + + fn clone_this(&self) -> Result { + Ok(ValueOrHandler::Value(self.get_deep_value())) + } +} + +// Implement PathValue for Handler +impl PathValue for Handler { + fn get_by_key(&self, key: &str) -> Option { + match self { + Handler::Map(h) => h.get_by_key(key), + Handler::Tree(h) => h.get_by_key(key), + _ => None, + } + } + + fn get_by_index(&self, index: isize) -> Option { + match self { + Handler::List(h) => h.get_by_index(index), + Handler::MovableList(h) => h.get_by_index(index), + _ => None, + } + } + + fn for_each_for_path(&self, f: &mut dyn FnMut(ValueOrHandler) -> ControlFlow<()>) { + match self { + Handler::Map(h) => h.for_each_for_path(f), + Handler::List(h) => h.for_each_for_path(f), + Handler::MovableList(h) => h.for_each_for_path(f), + Handler::Tree(h) => h.for_each_for_path(f), + _ => {} + } + } + + fn length_for_path(&self) -> usize { + match self { + Handler::Map(h) => h.length_for_path(), + Handler::List(h) => h.length_for_path(), + Handler::MovableList(h) => h.length_for_path(), + Handler::Tree(h) => h.length_for_path(), + Handler::Text(h) => h.length_for_path(), + _ => 0, + } + } + + fn get_child_by_id(&self, id: ContainerID) -> Option { + match self { + Handler::Map(h) => h.get_child_by_id(id), + Handler::List(h) => h.get_child_by_id(id), + Handler::MovableList(h) => h.get_child_by_id(id), + Handler::Tree(h) => h.get_child_by_id(id), + _ => None, + } + } + + fn clone_this(&self) -> Result { + Ok(ValueOrHandler::Handler(self.clone())) + } +} + +// Implementations for specific handlers +impl PathValue for MapHandler { + fn get_by_key(&self, key: &str) -> Option { + self.get_(key) + } + + fn get_by_index(&self, _index: isize) -> Option { + None + } + + fn for_each_for_path(&self, f: &mut dyn FnMut(ValueOrHandler) -> ControlFlow<()>) { + let mut all = vec![]; + self.for_each(|_, v| { + all.push(v); + }); + for v in all { + if let ControlFlow::Break(_) = f(v) { + break; + } + } + } + + fn length_for_path(&self) -> usize { + self.len() + } + + fn get_child_by_id(&self, id: ContainerID) -> Option { + self.get_child_handler(id.to_string().as_str()).ok() + } + + fn clone_this(&self) -> Result { + Ok(ValueOrHandler::Handler(Handler::Map(self.clone()))) + } +} + +impl PathValue for ListHandler { + fn get_by_key(&self, _key: &str) -> Option { + None + } + + fn get_by_index(&self, index: isize) -> Option { + if index < 0 { + self.get_(self.len() - (-index) as usize) + } else { + self.get_(index as usize) + } + } + + fn for_each_for_path(&self, f: &mut dyn FnMut(ValueOrHandler) -> ControlFlow<()>) { + let mut all = vec![]; + self.for_each(|v| { + all.push(v.1); + }); + for v in all { + if let ControlFlow::Break(_) = f(v) { + break; + } + } + } + + fn length_for_path(&self) -> usize { + self.len() + } + + fn get_child_by_id(&self, id: ContainerID) -> Option { + unimplemented!() + } + + fn clone_this(&self) -> Result { + Ok(ValueOrHandler::Handler(Handler::List(self.clone()))) + } +} + +impl PathValue for MovableListHandler { + fn get_by_key(&self, _key: &str) -> Option { + None + } + + fn get_by_index(&self, index: isize) -> Option { + if index < 0 { + if self.len() > (-index) as usize { + self.get_(self.len() - (-index) as usize) + } else { + None + } + } else { + self.get_(index as usize) + } + } + + fn for_each_for_path(&self, f: &mut dyn FnMut(ValueOrHandler) -> ControlFlow<()>) { + let mut all = vec![]; + self.for_each(|v| { + all.push(v); + }); + for v in all { + if let ControlFlow::Break(_) = f(v) { + break; + } + } + } + + fn length_for_path(&self) -> usize { + self.len() + } + + fn get_child_by_id(&self, id: ContainerID) -> Option { + unimplemented!() + } + + fn clone_this(&self) -> Result { + Ok(ValueOrHandler::Handler(Handler::MovableList(self.clone()))) + } +} + +impl PathValue for TextHandler { + fn get_by_key(&self, _key: &str) -> Option { + None + } + + fn get_by_index(&self, _index: isize) -> Option { + None + } + + fn for_each_for_path(&self, _f: &mut dyn FnMut(ValueOrHandler) -> ControlFlow<()>) { + // TextHandler doesn't have children to iterate over + } + + fn length_for_path(&self) -> usize { + self.len_unicode() + } + + fn get_child_by_id(&self, _id: ContainerID) -> Option { + None + } + + fn clone_this(&self) -> Result { + Ok(ValueOrHandler::Handler(Handler::Text(self.clone()))) + } +} + +impl PathValue for TreeHandler { + fn get_by_key(&self, key: &str) -> Option { + None + } + + fn get_by_index(&self, _index: isize) -> Option { + None + } + + fn for_each_for_path(&self, f: &mut dyn FnMut(ValueOrHandler) -> ControlFlow<()>) { + unimplemented!() + } + + fn length_for_path(&self) -> usize { + unimplemented!() + } + + fn get_child_by_id(&self, id: ContainerID) -> Option { + unimplemented!() + } + + fn clone_this(&self) -> Result { + Ok(ValueOrHandler::Handler(Handler::Tree(self.clone()))) + } +} + +impl PathValue for LoroValue { + fn get_by_key(&self, key: &str) -> Option { + match self { + LoroValue::Map(map) => map.get(key).map(|v| ValueOrHandler::Value(v.clone())), + _ => None, + } + } + + fn get_by_index(&self, index: isize) -> Option { + match self { + LoroValue::List(list) => { + let index = if index < 0 { + if list.len() > (-index) as usize { + list.len() - (-index) as usize + } else { + return None; + } + } else { + index as usize + }; + list.get(index).map(|v| ValueOrHandler::Value(v.clone())) + } + _ => None, + } + } + + fn for_each_for_path(&self, f: &mut dyn FnMut(ValueOrHandler) -> ControlFlow<()>) { + match self { + LoroValue::List(list) => { + for item in list.iter() { + if let ControlFlow::Break(_) = f(ValueOrHandler::Value(item.clone())) { + break; + } + } + } + LoroValue::Map(map) => { + for (_, value) in map.iter() { + if let ControlFlow::Break(_) = f(ValueOrHandler::Value(value.clone())) { + break; + } + } + } + _ => {} + } + } + + fn length_for_path(&self) -> usize { + match self { + LoroValue::List(list) => list.len(), + LoroValue::Map(map) => map.len(), + LoroValue::String(s) => s.len(), + _ => 0, + } + } + + fn get_child_by_id(&self, _id: ContainerID) -> Option { + None + } + + fn clone_this(&self) -> Result { + Ok(ValueOrHandler::Value(self.clone())) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::loro::LoroDoc; + + #[test] + fn test_parse_jsonpath() -> Result<(), JsonPathError> { + let path = "$.store.book[0].title"; + let tokens = parse_jsonpath(path)?; + assert_eq!( + tokens, + vec![ + JSONPathToken::Root, + JSONPathToken::Child("store".to_string()), + JSONPathToken::Child("book".to_string()), + JSONPathToken::Index(0), + JSONPathToken::Child("title".to_string()), + ] + ); + Ok(()) + } + + #[test] + fn test_evaluate_jsonpath() -> Result<(), JsonPathError> { + let doc = LoroDoc::new(); + doc.start_auto_commit(); + let map = doc.get_map("map"); + map.insert("key", "value").unwrap(); + let books = map + .insert_container("books", ListHandler::new_detached()) + .unwrap(); + let book = books + .insert_container(0, MapHandler::new_detached()) + .unwrap(); + book.insert("title", "1984").unwrap(); + book.insert("author", "George Orwell").unwrap(); + let path = "$['map'].books[0].title"; + let result = evaluate_jsonpath(&doc, path)?; + assert_eq!(result.len(), 1); + assert_eq!( + &**result[0].as_value().unwrap().as_string().unwrap(), + "1984" + ); + Ok(()) + } + + #[test] + fn test_jsonpath_on_loro_doc() -> Result<(), JsonPathError> { + let doc = LoroDoc::new(); + doc.start_auto_commit(); + let map = doc.get_map("map"); + map.insert("key", "value").unwrap(); + let books = map + .insert_container("books", ListHandler::new_detached()) + .unwrap(); + let book = books + .insert_container(0, MapHandler::new_detached()) + .unwrap(); + book.insert("title", "1984").unwrap(); + book.insert("author", "George Orwell").unwrap(); + + // Test child selectors + let path = "$['map'].books[0].title"; + let result = evaluate_jsonpath(&doc, path).unwrap(); + assert_eq!(result.len(), 1); + assert_eq!( + &**result[0].as_value().unwrap().as_string().unwrap(), + "1984" + ); + + // Test wildcard + let path = "$['map'].books[*].title"; + let result = evaluate_jsonpath(&doc, path).unwrap(); + assert_eq!(result.len(), 1); + assert_eq!( + &**result[0].as_value().unwrap().as_string().unwrap(), + "1984" + ); + + // Test recursive descent + let path = "$..title"; + let result = evaluate_jsonpath(&doc, path).unwrap(); + assert_eq!(result.len(), 1); + assert_eq!( + &**result[0].as_value().unwrap().as_string().unwrap(), + "1984" + ); + Ok(()) + } + + #[test] + fn test_complex_jsonpath_queries() -> Result<(), JsonPathError> { + let doc = LoroDoc::new(); + doc.start_auto_commit(); + let map = doc.get_map("map"); + map.insert("key", "value").unwrap(); + let books = map + .insert_container("books", ListHandler::new_detached()) + .unwrap(); + let book = books + .insert_container(0, MapHandler::new_detached()) + .unwrap(); + book.insert("title", "1984").unwrap(); + book.insert("author", "George Orwell").unwrap(); + book.insert("price", 10).unwrap(); + let book = books + .insert_container(1, MapHandler::new_detached()) + .unwrap(); + book.insert("title", "Animal Farm").unwrap(); + book.insert("author", "George Orwell").unwrap(); + book.insert("price", 8).unwrap(); + + // Test array indexing + let path = "$['map'].books[0].title"; + let result = evaluate_jsonpath(&doc, path).unwrap(); + assert_eq!(result.len(), 1); + assert_eq!( + &**result[0].as_value().unwrap().as_string().unwrap(), + "1984" + ); + + // Test recursive descent + let path = "$..title"; + let result = evaluate_jsonpath(&doc, path).unwrap(); + assert_eq!(result.len(), 2); + assert_eq!( + &**result[0].as_value().unwrap().as_string().unwrap(), + "1984" + ); + assert_eq!( + &**result[1].as_value().unwrap().as_string().unwrap(), + "Animal Farm" + ); + Ok(()) + } +} diff --git a/crates/loro-internal/src/lib.rs b/crates/loro-internal/src/lib.rs index 92f500c3..5fc768cf 100644 --- a/crates/loro-internal/src/lib.rs +++ b/crates/loro-internal/src/lib.rs @@ -25,20 +25,21 @@ pub use state::TreeParentId; pub use undo::UndoManager; pub use utils::subscription::Subscription; pub mod awareness; -pub mod cursor; -pub mod kv_store; -pub mod loro; -pub mod obs; -pub mod oplog; -pub mod txn; - pub mod change; pub mod configure; pub mod container; +pub mod cursor; pub mod dag; pub mod encoding; pub mod id; +#[cfg(feature = "jsonpath")] +pub mod jsonpath; +pub mod kv_store; +pub mod loro; +pub mod obs; pub mod op; +pub mod oplog; +pub mod txn; pub mod version; mod error; diff --git a/crates/loro-internal/src/state/map_state.rs b/crates/loro-internal/src/state/map_state.rs index 56f00de9..470dda59 100644 --- a/crates/loro-internal/src/state/map_state.rs +++ b/crates/loro-internal/src/state/map_state.rs @@ -1,4 +1,5 @@ use std::{ + collections::BTreeMap, mem, sync::{Arc, Mutex, Weak}, }; @@ -25,7 +26,7 @@ use super::{ContainerState, DiffApplyContext}; #[derive(Debug, Clone)] pub struct MapState { idx: ContainerIdx, - map: FxHashMap, + map: BTreeMap, size: usize, } @@ -35,7 +36,7 @@ impl ContainerState for MapState { } fn estimate_size(&self) -> usize { - self.map.capacity() * (mem::size_of::() + mem::size_of::()) + self.map.len() * (mem::size_of::() + mem::size_of::()) } fn is_state_empty(&self) -> bool { @@ -223,7 +224,7 @@ impl MapState { pub fn new(idx: ContainerIdx) -> Self { Self { idx, - map: FxHashMap::default(), + map: Default::default(), size: 0, } } @@ -251,17 +252,14 @@ impl MapState { pub fn remove(&mut self, key: &InternalString) { let result = self.map.remove(key); - match result { - Some(x) => { - if let Some(_) = x.value { - self.size -= 1; - } + if let Some(x) = result { + if x.value.is_some() { + self.size -= 1; } - None => {} }; } - pub fn iter(&self) -> std::collections::hash_map::Iter<'_, InternalString, MapValue> { + pub fn iter(&self) -> std::collections::btree_map::Iter<'_, InternalString, MapValue> { self.map.iter() } diff --git a/crates/loro/Cargo.toml b/crates/loro/Cargo.toml index 52cc37d3..aeea35ee 100644 --- a/crates/loro/Cargo.toml +++ b/crates/loro/Cargo.toml @@ -15,6 +15,7 @@ keywords = ["crdt", "local-first"] [dependencies] loro-internal = { path = "../loro-internal", version = "0.16.2" } +loro-common = { path = "../loro-common", version = "0.16.2", features = ["serde_json"] } loro-kv-store = { path = "../kv-store", version = "0.16.2" } delta = { path = "../delta", package = "loro-delta", version = "0.16.2" } generic-btree = { version = "^0.10.5" } @@ -32,3 +33,4 @@ pretty_assertions = "1.4.0" [features] counter = ["loro-internal/counter"] +jsonpath = ["loro-internal/jsonpath"] diff --git a/crates/loro/src/lib.rs b/crates/loro/src/lib.rs index e83b187a..ba14fa1c 100644 --- a/crates/loro/src/lib.rs +++ b/crates/loro/src/lib.rs @@ -60,6 +60,11 @@ pub use loro_internal::{loro_value, to_value}; pub use loro_internal::{LoroError, LoroResult, LoroValue, ToJson}; pub use loro_kv_store as kv_store; +#[cfg(feature = "jsonpath")] +pub use loro_internal::jsonpath; +#[cfg(feature = "jsonpath")] +pub use loro_internal::jsonpath::JsonPathError; + #[cfg(feature = "counter")] mod counter; #[cfg(feature = "counter")] @@ -693,6 +698,48 @@ impl LoroDoc { pub fn get_path_to_container(&self, id: &ContainerID) -> Option> { self.doc.get_path_to_container(id) } + + /// Evaluate a JSONPath expression on the document and return matching values or handlers. + /// + /// This method allows querying the document structure using JSONPath syntax. + /// It returns a vector of `ValueOrHandler` which can represent either primitive values + /// or container handlers, depending on what the JSONPath expression matches. + /// + /// # Arguments + /// + /// * `path` - A string slice containing the JSONPath expression to evaluate. + /// + /// # Returns + /// + /// A `Result` containing either: + /// - `Ok(Vec)`: A vector of matching values or handlers. + /// - `Err(String)`: An error message if the JSONPath expression is invalid or evaluation fails. + /// + /// # Example + /// + /// ``` + /// # use loro::LoroDoc; + /// let doc = LoroDoc::new(); + /// let map = doc.get_map("users"); + /// map.insert("alice", 30).unwrap(); + /// map.insert("bob", 25).unwrap(); + /// + /// let result = doc.jsonpath("$.users.alice").unwrap(); + /// assert_eq!(result.len(), 1); + /// assert_eq!(result[0].to_json_value(), serde_json::json!(30)); + /// ``` + #[inline] + #[cfg(feature = "jsonpath")] + pub fn jsonpath(&self, path: &str) -> Result, JsonPathError> { + self.doc.jsonpath(path).map(|vec| { + vec.into_iter() + .map(|v| match v { + ValueOrHandler::Value(v) => ValueOrContainer::Value(v), + ValueOrHandler::Handler(h) => ValueOrContainer::Container(h.into()), + }) + .collect() + }) + } } /// It's used to prevent the user from implementing the trait directly. diff --git a/crates/loro/tests/integration_test/jsonpath_test.rs b/crates/loro/tests/integration_test/jsonpath_test.rs new file mode 100644 index 00000000..65652943 --- /dev/null +++ b/crates/loro/tests/integration_test/jsonpath_test.rs @@ -0,0 +1,334 @@ +use loro::{ + ExportMode, Frontiers, LoroDoc, LoroList, LoroMap, LoroValue, ToJson, ValueOrContainer, ID, +}; +use serde_json::json; + +fn to_json(v: Vec) -> serde_json::Value { + v.into_iter() + .map(|x| x.get_deep_value().to_json_value()) + .collect() +} + +fn create_map_from_json(json: serde_json::Value) -> LoroMap { + let map = LoroMap::new(); + for (key, value) in json.as_object().unwrap().iter() { + map.insert(key, value.clone()).unwrap(); + } + map +} + +fn setup_test_doc() -> LoroDoc { + let doc = LoroDoc::new(); + let store = doc.get_map("store"); + + let books = store.insert_container("book", LoroList::new()).unwrap(); + books + .insert_container( + 0, + create_map_from_json(json!({ + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95, + "isbn": "0-553-21311-3" + })), + ) + .unwrap(); + books + .insert_container( + 1, + create_map_from_json(json!({ + "category": "fiction", + "author": "Evelyn Waugh", + "title": "Sword of Honour", + "price": 12.99, + "isbn": "0-553-21312-1" + })), + ) + .unwrap(); + books + .insert_container( + 2, + create_map_from_json(json!({ + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "price": 8.99, + "isbn": "0-553-21313-X" + })), + ) + .unwrap(); + books + .insert_container( + 3, + create_map_from_json(json!({ + "category": "fiction", + "author": "J. R. R. Tolkien", + "title": "The Lord of the Rings", + "price": 22.99, + "isbn": "0-395-19395-8" + })), + ) + .unwrap(); + + store + .insert_container( + "bicycle", + create_map_from_json(json!({ + "color": "red", + "price": 19.95 + })), + ) + .unwrap(); + + store.insert("expensive", 10).unwrap(); + + doc +} + +#[test] +fn test_all_authors() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$.store.book[*].author")?; + assert_eq!( + to_json(ans), + json!([ + "Nigel Rees", + "Evelyn Waugh", + "Herman Melville", + "J. R. R. Tolkien" + ]) + ); + Ok(()) +} + +#[test] +#[ignore = "filter syntax not implemented"] +fn test_books_with_isbn() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..book[?(@.isbn)]")?; + assert_eq!(ans.len(), 4); + Ok(()) +} + +#[test] +fn test_all_things_in_store() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$.store.*")?; + assert_eq!(ans.len(), 3); // book array, bicycle object, and expensive value + Ok(()) +} + +#[test] +fn test_all_authors_recursive() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..author")?; + assert_eq!( + to_json(ans), + json!([ + "Nigel Rees", + "Evelyn Waugh", + "Herman Melville", + "J. R. R. Tolkien" + ]) + ); + Ok(()) +} + +#[test] +fn test_all_prices() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$.store..price")?; + assert_eq!(to_json(ans), json!([19.95, 8.95, 12.99, 8.99, 22.99])); + Ok(()) +} + +#[test] +fn test_third_book() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..book[2]")?; + assert_eq!( + to_json(ans), + json!([{ + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "price": 8.99, + "isbn": "0-553-21313-X" + }]) + ); + Ok(()) +} + +#[test] +fn test_second_to_last_book() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..book[-2]")?; + assert_eq!( + to_json(ans), + json!([{ + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "price": 8.99, + "isbn": "0-553-21313-X" + }]) + ); + Ok(()) +} + +#[test] +fn test_first_two_books() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..book[0,1]")?; + assert_eq!(ans.len(), 2); + Ok(()) +} + +#[test] +fn test_books_slice() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..book[:2]")?; + assert_eq!(ans.len(), 2); + Ok(()) +} + +#[test] +fn test_books_slice_from_index() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..book[1:2]")?; + assert_eq!(ans.len(), 1); + Ok(()) +} + +#[test] +fn test_last_two_books() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..book[-2:]")?; + assert_eq!(ans.len(), 2); + Ok(()) +} + +#[test] +fn test_book_number_two_from_tail() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..book[2:]")?; + assert_eq!(ans.len(), 2); + Ok(()) +} + +#[test] +#[ignore = "filter syntax not implemented"] +fn test_books_cheaper_than_10() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$.store.book[?(@.price < 10)]")?; + assert_eq!(ans.len(), 2); + Ok(()) +} + +#[test] +#[ignore = "filter syntax not implemented"] +fn test_books_not_expensive() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..book[?(@.price <= $.expensive)]")?; + assert_eq!(ans.len(), 2); + Ok(()) +} + +#[test] +fn test_everything() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..*")?; + assert!(ans.len() > 0); + Ok(()) +} + +#[test] +fn test_books_slice_with_step() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$..book[1:9:3]")?; + assert_eq!(ans.len(), 1); + Ok(()) +} + +#[test] +fn test_multiple_keys() -> anyhow::Result<()> { + let doc = setup_test_doc(); + let ans = doc.jsonpath("$.store[\"book\", \"bicycle\"]")?; + assert_eq!(ans.len(), 2); + Ok(()) +} + +#[test] +fn test_jsonpath() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + doc.get_map("root").insert("key", LoroValue::from(1))?; + doc.get_map("root").insert("key2", LoroValue::from(2))?; + doc.get_map("root").insert("key3", LoroValue::from(3))?; + let ans = doc.jsonpath("$..").unwrap(); + assert_eq!( + to_json(ans), + serde_json::json!([ + 1, + 2, + 3, + { + "key": 1, + "key2": 2, + "key3": 3 + }, + { + "root": { + "key": 1, + "key2": 2, + "key3": 3 + } + } + ]) + ); + Ok(()) +} + +#[test] +fn test_jsonpath_with_array() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + let array = doc.get_list("root"); + array.insert(0, 1)?; + array.insert(1, 2)?; + array.insert(2, 3)?; + let ans = doc.jsonpath("$..")?; + assert_eq!( + to_json(ans), + serde_json::json!([ + 1, + 2, + 3, + [1, 2, 3], + { "root": [1, 2, 3] } + ]) + ); + Ok(()) +} + +#[test] +fn test_jsonpath_nested_objects() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + let root = doc.get_map("root"); + let child = root.insert_container("child", LoroMap::new())?; + child.insert("key", "value")?; + let ans = doc.jsonpath("$.root.child.key").unwrap(); + assert_eq!(to_json(ans), serde_json::json!(["value"])); + Ok(()) +} + +#[test] +fn test_jsonpath_wildcard() -> anyhow::Result<()> { + let doc = LoroDoc::new(); + let root = doc.get_map("root"); + root.insert("key1", 1)?; + root.insert("key2", 2)?; + root.insert("key3", 3)?; + let ans = doc.jsonpath("$.root.*").unwrap(); + assert_eq!(to_json(ans), serde_json::json!([1, 2, 3])); + Ok(()) +} diff --git a/crates/loro/tests/integration_test/mod.rs b/crates/loro/tests/integration_test/mod.rs index 0b6d70f4..f215a896 100644 --- a/crates/loro/tests/integration_test/mod.rs +++ b/crates/loro/tests/integration_test/mod.rs @@ -1,6 +1,8 @@ use loro::LoroDoc; mod gc_test; +#[cfg(feature = "jsonpath")] +mod jsonpath_test; mod undo_test; fn gen_action(doc: &LoroDoc, seed: u64, mut ops_len: usize) { diff --git a/loro-js/src/index.ts b/loro-js/src/index.ts index f0174655..6d54272e 100644 --- a/loro-js/src/index.ts +++ b/loro-js/src/index.ts @@ -6,7 +6,7 @@ export { LoroMovableList, LoroText, LoroTree, LoroTreeNode, MapOp, MovableListOp, OpId, PeerID, Side, TextOp, TreeID, TreeNodeValue, TreeOp, UndoConfig, UndoManager, UnknownOp, Value, VersionVector, decodeImportBlobMeta, setDebug, - newContainerID, newRootContainerID, LoroDoc + newContainerID, newRootContainerID } from "loro-wasm"; import { Container, diff --git a/loro-js/tests/basic.test.ts b/loro-js/tests/basic.test.ts index fa132991..30043344 100644 --- a/loro-js/tests/basic.test.ts +++ b/loro-js/tests/basic.test.ts @@ -202,7 +202,7 @@ describe("map", () => { map.set("foo", "bar"); map.set("baz", "bar"); const entries = map.keys(); - expect(entries).toStrictEqual(["foo", "baz"]); + expect(entries).toStrictEqual(["baz", "foo"]); }); it("values", () => { @@ -223,8 +223,8 @@ describe("map", () => { map.delete("new"); const entries = map.entries(); expect(entries).toStrictEqual([ - ["foo", "bar"], ["baz", "bar"], + ["foo", "bar"], ]); }); @@ -234,7 +234,7 @@ describe("map", () => { map.setContainer("text", new LoroText()); map.set("foo", "bar"); const entries = map.entries(); - expect((entries[0][1]! as Container).kind() === "Text").toBeTruthy(); + expect((entries[1][1]! as Container).kind() === "Text").toBeTruthy(); }); }); diff --git a/package.json b/package.json index 492f2b66..734c0e15 100644 --- a/package.json +++ b/package.json @@ -7,10 +7,10 @@ "scripts": { "check-all": "cargo hack check --each-feature", "build": "cargo build", - "test": "cargo nextest run --features=test_utils --no-fail-fast && cargo test --doc", + "test": "cargo nextest run --features=test_utils,jsonpath --no-fail-fast && cargo test --doc", "test-all": "nr test && nr test-wasm", "test-wasm": "cd crates/loro-wasm && deno task dev && cd ../../loro-js && pnpm i && pnpm run test", - "coverage": "mkdir -p coverage && cargo llvm-cov nextest --features test_utils --lcov > coverage/lcov-nextest.info && cargo llvm-cov report", + "coverage": "mkdir -p coverage && cargo llvm-cov nextest --features test_utils,jsonpath --lcov > coverage/lcov-nextest.info && cargo llvm-cov report", "release-wasm": "cd crates/loro-wasm && deno task release && cd ../../loro-js && pnpm i && pnpm build && pnpm run test", "check": "cargo clippy --all-features", "run-fuzz-corpus": "cd crates/fuzz && cargo +nightly fuzz run all -- -max_total_time=1",