FTS storage implementation and background indexing

This commit is contained in:
mdecimus 2023-11-17 14:59:09 +01:00
parent a3e6d152c9
commit bcc05340b2
88 changed files with 3105 additions and 2214 deletions

3
Cargo.lock generated
View file

@ -2306,6 +2306,7 @@ dependencies = [
"chrono", "chrono",
"jmap_proto", "jmap_proto",
"mail-parser", "mail-parser",
"store",
"tokio", "tokio",
] ]
@ -5131,6 +5132,7 @@ dependencies = [
"futures", "futures",
"lazy_static", "lazy_static",
"lru-cache", "lru-cache",
"lz4_flex",
"nlp", "nlp",
"num_cpus", "num_cpus",
"parking_lot", "parking_lot",
@ -5923,6 +5925,7 @@ dependencies = [
"opentelemetry-otlp", "opentelemetry-otlp",
"opentelemetry-semantic-conventions", "opentelemetry-semantic-conventions",
"privdrop", "privdrop",
"rand 0.8.5",
"rustls 0.21.7", "rustls 0.21.7",
"rustls-pemfile", "rustls-pemfile",
"serde", "serde",

View file

@ -6,6 +6,7 @@ resolver = "2"
[dependencies] [dependencies]
jmap_proto = { path = "../jmap-proto" } jmap_proto = { path = "../jmap-proto" }
store = { path = "../store" }
mail-parser = { git = "https://github.com/stalwartlabs/mail-parser", features = ["full_encoding", "serde_support", "ludicrous_mode"] } mail-parser = { git = "https://github.com/stalwartlabs/mail-parser", features = ["full_encoding", "serde_support", "ludicrous_mode"] }
ahash = { version = "0.8" } ahash = { version = "0.8" }
chrono = { version = "0.4"} chrono = { version = "0.4"}

View file

@ -21,6 +21,8 @@
* for more details. * for more details.
*/ */
use store::fts::{FilterItem, FilterType};
use super::{quoted_string, serialize_sequence, Flag, Sequence}; use super::{quoted_string, serialize_sequence, Flag, Sequence};
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
@ -129,6 +131,38 @@ pub enum Filter {
ThreadId(String), ThreadId(String),
} }
impl FilterItem for Filter {
fn filter_type(&self) -> FilterType {
match self {
Filter::From(_)
| Filter::To(_)
| Filter::Cc(_)
| Filter::Bcc(_)
| Filter::Subject(_)
| Filter::Body(_)
| Filter::Text(_)
| Filter::Header(_, _) => FilterType::Fts,
Filter::And => FilterType::And,
Filter::Or => FilterType::Or,
Filter::Not => FilterType::Not,
Filter::End => FilterType::End,
_ => FilterType::Store,
}
}
}
impl From<FilterType> for Filter {
fn from(value: FilterType) -> Self {
match value {
FilterType::And => Filter::And,
FilterType::Or => Filter::Or,
FilterType::Not => Filter::Not,
FilterType::End => Filter::End,
_ => unreachable!(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum ModSeqEntry { pub enum ModSeqEntry {
Shared(Flag), Shared(Flag),

View file

@ -36,6 +36,7 @@ use jmap_proto::types::{collection::Collection, id::Id, keyword::Keyword, proper
use mail_parser::HeaderName; use mail_parser::HeaderName;
use nlp::language::Language; use nlp::language::Language;
use store::{ use store::{
fts::{Field, FilterGroup, FtsFilter, IntoFilterGroup},
query::{self, log::Query, sort::Pagination, ResultSet}, query::{self, log::Query, sort::Pagination, ResultSet},
roaring::RoaringBitmap, roaring::RoaringBitmap,
write::now, write::now,
@ -275,371 +276,396 @@ impl SessionData {
// Convert query // Convert query
let mut include_highest_modseq = false; let mut include_highest_modseq = false;
for filter in imap_filter { for filter_group in imap_filter.into_filter_group() {
match filter { match filter_group {
search::Filter::Sequence(sequence, uid_filter) => { FilterGroup::Fts(conds) => {
let mut set = RoaringBitmap::new(); let mut fts_filters = Vec::with_capacity(filters.len());
if let (Sequence::SavedSearch, Some(prev_saved_search)) = for cond in conds {
(&sequence, &prev_saved_search) match cond {
{ search::Filter::Bcc(text) => {
if let Some(prev_saved_search) = prev_saved_search { fts_filters.push(FtsFilter::has_text(
let state = mailbox.state.lock(); Field::Header(HeaderName::Bcc),
for imap_id in prev_saved_search.iter() { text,
if let Some(id) = state.uid_to_id.get(&imap_id.uid) { Language::None,
set.insert(*id); ));
}
} }
} else { search::Filter::Body(text) => {
return Err(StatusResponse::no("No saved search found.")); fts_filters.push(FtsFilter::has_text_detect(
} Field::Body,
} else { text,
for id in mailbox self.jmap.config.default_language,
.sequence_to_ids(&sequence, is_uid || uid_filter) ));
.await? }
.keys() search::Filter::Cc(text) => {
{ fts_filters.push(FtsFilter::has_text(
set.insert(*id); Field::Header(HeaderName::Cc),
} text,
} Language::None,
filters.push(query::Filter::is_in_set(set)); ));
} }
search::Filter::All => { search::Filter::From(text) => {
filters.push(query::Filter::is_in_set(message_ids.clone())); fts_filters.push(FtsFilter::has_text(
} Field::Header(HeaderName::From),
search::Filter::Answered => { text,
filters.push(query::Filter::is_in_bitmap( Language::None,
Property::Keywords, ));
Keyword::Answered, }
)); search::Filter::Header(header, value) => {
} match HeaderName::parse(header) {
/*search::Filter::Bcc(text) => { Some(HeaderName::Other(header_name)) => {
filters.push(query::Filter::has_text(Property::Bcc, text, Language::None)); return Err(StatusResponse::no(format!(
} "Querying header '{header_name}' is not supported.",
search::Filter::Before(date) => { )));
filters.push(query::Filter::lt(Property::ReceivedAt, date as u64));
}
search::Filter::Body(text) => {
filters.push(query::Filter::has_text_detect(
Property::TextBody,
text,
self.jmap.config.default_language,
));
}
search::Filter::Cc(text) => {
filters.push(query::Filter::has_text(Property::Cc, text, Language::None));
}
search::Filter::Deleted => {
filters.push(query::Filter::is_in_bitmap(
Property::Keywords,
Keyword::Deleted,
));
}
search::Filter::Draft => {
filters.push(query::Filter::is_in_bitmap(
Property::Keywords,
Keyword::Draft,
));
}
search::Filter::Flagged => {
filters.push(query::Filter::is_in_bitmap(
Property::Keywords,
Keyword::Flagged,
));
}
search::Filter::From(text) => {
filters.push(query::Filter::has_text(
Property::From,
text,
Language::None,
));
}
search::Filter::Header(header, value) => match HeaderName::parse(&header) {
Some(HeaderName::Other(_)) | None => {
return Err(StatusResponse::no(format!(
"Querying non-RFC header '{header}' is not allowed.",
)));
}
Some(header_name) => {
let is_id = matches!(
header_name,
HeaderName::MessageId
| HeaderName::InReplyTo
| HeaderName::References
| HeaderName::ResentMessageId
);
let tokens = if !value.is_empty() {
let header_num = header_name.id().to_string();
value
.split_ascii_whitespace()
.filter_map(|token| {
if token.len() < MAX_TOKEN_LENGTH {
if is_id {
format!("{header_num}{token}")
} else {
format!("{header_num}{}", token.to_lowercase())
}
.into()
} else {
None
} }
}) Some(header_name) => {
.collect::<Vec<_>>() if !value.is_empty() {
} else { if matches!(
vec![] header_name,
}; HeaderName::MessageId
match tokens.len() { | HeaderName::InReplyTo
0 => { | HeaderName::References
filters.push(query::Filter::has_raw_text( | HeaderName::ResentMessageId
Property::Headers, ) {
header_name.id().to_string(), fts_filters.push(FtsFilter::has_keyword(
)); Field::Header(header_name),
} value,
1 => { ));
filters.push(query::Filter::has_raw_text( } else {
Property::Headers, fts_filters.push(FtsFilter::has_text(
tokens.into_iter().next().unwrap(), Field::Header(header_name),
)); value,
} Language::None,
_ => { ));
filters.push(query::Filter::And); }
for token in tokens { } else {
filters.push(query::Filter::has_raw_text( fts_filters.push(FtsFilter::has_keyword(
Property::Headers, Field::Keyword,
token, header_name.as_str().to_lowercase(),
)); ));
}
}
None => (),
} }
filters.push(query::Filter::End); }
search::Filter::Subject(text) => {
fts_filters.push(FtsFilter::has_text_detect(
Field::Header(HeaderName::Subject),
text,
self.jmap.config.default_language,
));
}
search::Filter::Text(text) => {
fts_filters.push(FtsFilter::Or);
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::From),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::To),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Cc),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Bcc),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Header(HeaderName::Subject),
&text,
self.jmap.config.default_language,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Body,
&text,
self.jmap.config.default_language,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Attachment,
text,
self.jmap.config.default_language,
));
fts_filters.push(FtsFilter::End);
}
search::Filter::To(text) => {
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::To),
text,
Language::None,
));
}
search::Filter::And => {
fts_filters.push(FtsFilter::And);
}
search::Filter::Or => {
fts_filters.push(FtsFilter::Or);
}
search::Filter::Not => {
fts_filters.push(FtsFilter::Not);
}
search::Filter::End => {
fts_filters.push(FtsFilter::End);
}
_ => (),
}
}
filters.push(query::Filter::is_in_set(
self.jmap
.fts_filter(mailbox.id.account_id, Collection::Email, fts_filters)
.await?,
));
}
FilterGroup::Store(cond) => match cond {
search::Filter::Sequence(sequence, uid_filter) => {
let mut set = RoaringBitmap::new();
if let (Sequence::SavedSearch, Some(prev_saved_search)) =
(&sequence, &prev_saved_search)
{
if let Some(prev_saved_search) = prev_saved_search {
let state = mailbox.state.lock();
for imap_id in prev_saved_search.iter() {
if let Some(id) = state.uid_to_id.get(&imap_id.uid) {
set.insert(*id);
}
}
} else {
return Err(StatusResponse::no("No saved search found."));
}
} else {
for id in mailbox
.sequence_to_ids(&sequence, is_uid || uid_filter)
.await?
.keys()
{
set.insert(*id);
} }
} }
filters.push(query::Filter::is_in_set(set));
} }
}, search::Filter::All => {
search::Filter::Keyword(keyword) => { filters.push(query::Filter::is_in_set(message_ids.clone()));
filters.push(query::Filter::is_in_bitmap( }
Property::Keywords, search::Filter::Answered => {
Keyword::from(keyword), filters.push(query::Filter::is_in_bitmap(
)); Property::Keywords,
} Keyword::Answered,
search::Filter::Larger(size) => { ));
filters.push(query::Filter::gt(Property::Size, size)); }
} search::Filter::Before(date) => {
search::Filter::On(date) => { filters.push(query::Filter::lt(Property::ReceivedAt, date as u64));
filters.push(query::Filter::And); }
filters.push(query::Filter::ge(Property::ReceivedAt, date as u64)); search::Filter::Deleted => {
filters.push(query::Filter::lt( filters.push(query::Filter::is_in_bitmap(
Property::ReceivedAt, Property::Keywords,
(date + 86400) as u64, Keyword::Deleted,
)); ));
filters.push(query::Filter::End); }
} search::Filter::Draft => {
search::Filter::Seen => { filters.push(query::Filter::is_in_bitmap(
filters.push(query::Filter::is_in_bitmap( Property::Keywords,
Property::Keywords, Keyword::Draft,
Keyword::Seen, ));
)); }
} search::Filter::Flagged => {
search::Filter::SentBefore(date) => { filters.push(query::Filter::is_in_bitmap(
filters.push(query::Filter::lt(Property::SentAt, date as u64)); Property::Keywords,
} Keyword::Flagged,
search::Filter::SentOn(date) => { ));
filters.push(query::Filter::And); }
filters.push(query::Filter::ge(Property::SentAt, date as u64)); search::Filter::Keyword(keyword) => {
filters.push(query::Filter::lt(Property::SentAt, (date + 86400) as u64)); filters.push(query::Filter::is_in_bitmap(
filters.push(query::Filter::End); Property::Keywords,
} Keyword::from(keyword),
search::Filter::SentSince(date) => { ));
filters.push(query::Filter::ge(Property::SentAt, date as u64)); }
} search::Filter::Larger(size) => {
search::Filter::Since(date) => { filters.push(query::Filter::gt(Property::Size, size));
filters.push(query::Filter::ge(Property::ReceivedAt, date as u64)); }
} search::Filter::On(date) => {
search::Filter::Smaller(size) => { filters.push(query::Filter::And);
filters.push(query::Filter::lt(Property::Size, size)); filters.push(query::Filter::ge(Property::ReceivedAt, date as u64));
} filters.push(query::Filter::lt(
search::Filter::Subject(text) => { Property::ReceivedAt,
filters.push(query::Filter::has_text_detect( (date + 86400) as u64,
Property::Subject, ));
text, filters.push(query::Filter::End);
self.jmap.config.default_language, }
)); search::Filter::Seen => {
} filters.push(query::Filter::is_in_bitmap(
search::Filter::Text(text) => { Property::Keywords,
filters.push(query::Filter::Or); Keyword::Seen,
filters.push(query::Filter::has_text( ));
Property::From, }
&text, search::Filter::SentBefore(date) => {
Language::None, filters.push(query::Filter::lt(Property::SentAt, date as u64));
)); }
filters.push(query::Filter::has_text(Property::To, &text, Language::None)); search::Filter::SentOn(date) => {
filters.push(query::Filter::has_text(Property::Cc, &text, Language::None)); filters.push(query::Filter::And);
filters.push(query::Filter::has_text( filters.push(query::Filter::ge(Property::SentAt, date as u64));
Property::Bcc, filters.push(query::Filter::lt(Property::SentAt, (date + 86400) as u64));
&text, filters.push(query::Filter::End);
Language::None, }
)); search::Filter::SentSince(date) => {
filters.push(query::Filter::has_text_detect( filters.push(query::Filter::ge(Property::SentAt, date as u64));
Property::Subject, }
&text, search::Filter::Since(date) => {
self.jmap.config.default_language, filters.push(query::Filter::ge(Property::ReceivedAt, date as u64));
)); }
filters.push(query::Filter::has_text_detect( search::Filter::Smaller(size) => {
Property::TextBody, filters.push(query::Filter::lt(Property::Size, size));
&text, }
self.jmap.config.default_language, search::Filter::Unanswered => {
)); filters.push(query::Filter::Not);
filters.push(query::Filter::has_text_detect( filters.push(query::Filter::is_in_bitmap(
Property::Attachments, Property::Keywords,
text, Keyword::Answered,
self.jmap.config.default_language, ));
)); filters.push(query::Filter::End);
filters.push(query::Filter::End); }
} search::Filter::Undeleted => {
search::Filter::To(text) => { filters.push(query::Filter::Not);
filters.push(query::Filter::has_text(Property::To, text, Language::None)); filters.push(query::Filter::is_in_bitmap(
}*/ Property::Keywords,
search::Filter::Unanswered => { Keyword::Deleted,
filters.push(query::Filter::Not); ));
filters.push(query::Filter::is_in_bitmap( filters.push(query::Filter::End);
Property::Keywords, }
Keyword::Answered, search::Filter::Undraft => {
)); filters.push(query::Filter::Not);
filters.push(query::Filter::End); filters.push(query::Filter::is_in_bitmap(
} Property::Keywords,
search::Filter::Undeleted => { Keyword::Draft,
filters.push(query::Filter::Not); ));
filters.push(query::Filter::is_in_bitmap( filters.push(query::Filter::End);
Property::Keywords, }
Keyword::Deleted, search::Filter::Unflagged => {
)); filters.push(query::Filter::Not);
filters.push(query::Filter::End); filters.push(query::Filter::is_in_bitmap(
} Property::Keywords,
search::Filter::Undraft => { Keyword::Flagged,
filters.push(query::Filter::Not); ));
filters.push(query::Filter::is_in_bitmap( filters.push(query::Filter::End);
Property::Keywords, }
Keyword::Draft, search::Filter::Unkeyword(keyword) => {
)); filters.push(query::Filter::Not);
filters.push(query::Filter::End); filters.push(query::Filter::is_in_bitmap(
} Property::Keywords,
search::Filter::Unflagged => { Keyword::from(keyword),
filters.push(query::Filter::Not); ));
filters.push(query::Filter::is_in_bitmap( filters.push(query::Filter::End);
Property::Keywords, }
Keyword::Flagged, search::Filter::Unseen => {
)); filters.push(query::Filter::Not);
filters.push(query::Filter::End); filters.push(query::Filter::is_in_bitmap(
} Property::Keywords,
search::Filter::Unkeyword(keyword) => { Keyword::Seen,
filters.push(query::Filter::Not); ));
filters.push(query::Filter::is_in_bitmap( filters.push(query::Filter::End);
Property::Keywords, }
Keyword::from(keyword), search::Filter::And => {
)); filters.push(query::Filter::And);
filters.push(query::Filter::End); }
} search::Filter::Or => {
search::Filter::Unseen => { filters.push(query::Filter::Or);
filters.push(query::Filter::Not); }
filters.push(query::Filter::is_in_bitmap( search::Filter::Not => {
Property::Keywords, filters.push(query::Filter::Not);
Keyword::Seen, }
)); search::Filter::End => {
filters.push(query::Filter::End); filters.push(query::Filter::End);
} }
search::Filter::And => { search::Filter::Recent => {
filters.push(query::Filter::And); filters.push(query::Filter::is_in_bitmap(
} Property::Keywords,
search::Filter::Or => { Keyword::Recent,
filters.push(query::Filter::Or); ));
} }
search::Filter::Not => { search::Filter::New => {
filters.push(query::Filter::Not); filters.push(query::Filter::And);
} filters.push(query::Filter::is_in_bitmap(
search::Filter::End => { Property::Keywords,
filters.push(query::Filter::End); Keyword::Recent,
} ));
search::Filter::Recent => { filters.push(query::Filter::Not);
filters.push(query::Filter::is_in_bitmap( filters.push(query::Filter::is_in_bitmap(
Property::Keywords, Property::Keywords,
Keyword::Recent, Keyword::Seen,
)); ));
} filters.push(query::Filter::End);
search::Filter::New => { filters.push(query::Filter::End);
filters.push(query::Filter::And); }
filters.push(query::Filter::is_in_bitmap( search::Filter::Old => {
Property::Keywords, filters.push(query::Filter::Not);
Keyword::Recent, filters.push(query::Filter::is_in_bitmap(
)); Property::Keywords,
filters.push(query::Filter::Not); Keyword::Seen,
filters.push(query::Filter::is_in_bitmap( ));
Property::Keywords, filters.push(query::Filter::End);
Keyword::Seen, }
)); search::Filter::Older(secs) => {
filters.push(query::Filter::End); filters.push(query::Filter::le(
filters.push(query::Filter::End); Property::ReceivedAt,
} now().saturating_sub(secs as u64),
search::Filter::Old => { ));
filters.push(query::Filter::Not); }
filters.push(query::Filter::is_in_bitmap( search::Filter::Younger(secs) => {
Property::Keywords, filters.push(query::Filter::ge(
Keyword::Seen, Property::ReceivedAt,
)); now().saturating_sub(secs as u64),
filters.push(query::Filter::End); ));
} }
search::Filter::Older(secs) => { search::Filter::ModSeq((modseq, _)) => {
filters.push(query::Filter::le( let mut set = RoaringBitmap::new();
Property::ReceivedAt, for change in self
now().saturating_sub(secs as u64), .jmap
)); .changes_(
} mailbox.id.account_id,
search::Filter::Younger(secs) => { Collection::Email,
filters.push(query::Filter::ge( Query::from_modseq(modseq),
Property::ReceivedAt, )
now().saturating_sub(secs as u64), .await?
)); .changes
} {
search::Filter::ModSeq((modseq, _)) => { let id = (change.unwrap_id() & u32::MAX as u64) as u32;
let mut set = RoaringBitmap::new(); if message_ids.contains(id) {
for change in self set.insert(id);
.jmap }
.changes_( }
mailbox.id.account_id, filters.push(query::Filter::is_in_set(set));
Collection::Email, include_highest_modseq = true;
Query::from_modseq(modseq), }
) search::Filter::EmailId(id) => {
.await? if let Some(id) = Id::from_bytes(id.as_bytes()) {
.changes filters.push(query::Filter::is_in_set(
{ RoaringBitmap::from_sorted_iter([id.document_id()]).unwrap(),
let id = (change.unwrap_id() & u32::MAX as u64) as u32; ));
if message_ids.contains(id) { } else {
set.insert(id); return Err(StatusResponse::no(format!(
"Failed to parse email id '{id}'.",
)));
} }
} }
filters.push(query::Filter::is_in_set(set)); search::Filter::ThreadId(id) => {
include_highest_modseq = true; if let Some(id) = Id::from_bytes(id.as_bytes()) {
} filters.push(query::Filter::is_in_bitmap(
search::Filter::EmailId(id) => { Property::ThreadId,
if let Some(id) = Id::from_bytes(id.as_bytes()) { id.document_id(),
filters.push(query::Filter::is_in_set( ));
RoaringBitmap::from_sorted_iter([id.document_id()]).unwrap(), } else {
)); return Err(StatusResponse::no(format!(
} else { "Failed to parse thread id '{id}'.",
return Err(StatusResponse::no(format!( )));
"Failed to parse email id '{id}'.", }
)));
} }
} _ => (),
search::Filter::ThreadId(id) => { },
if let Some(id) = Id::from_bytes(id.as_bytes()) {
filters.push(query::Filter::is_in_bitmap(
Property::ThreadId,
id.document_id(),
));
} else {
return Err(StatusResponse::no(format!(
"Failed to parse thread id '{id}'.",
)));
}
}
_ => (),
} }
} }

View file

@ -23,6 +23,8 @@
use std::fmt::Display; use std::fmt::Display;
use store::fts::{FilterItem, FilterType, FtsFilter};
use crate::{ use crate::{
error::method::MethodError, error::method::MethodError,
object::{email, mailbox}, object::{email, mailbox},
@ -785,3 +787,47 @@ impl From<Filter> for store::query::Filter {
} }
} }
} }
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<Filter> for FtsFilter<T> {
fn from(value: Filter) -> Self {
match value {
Filter::And => Self::And,
Filter::Or => Self::Or,
Filter::Not => Self::Not,
Filter::Close => Self::End,
_ => unreachable!(),
}
}
}
impl FilterItem for Filter {
fn filter_type(&self) -> FilterType {
match self {
Filter::Text(_)
| Filter::From(_)
| Filter::To(_)
| Filter::Cc(_)
| Filter::Bcc(_)
| Filter::Subject(_)
| Filter::Body(_)
| Filter::Header(_) => FilterType::Fts,
Filter::And => FilterType::And,
Filter::Or => FilterType::Or,
Filter::Not => FilterType::Not,
Filter::Close => FilterType::End,
_ => FilterType::Store,
}
}
}
impl From<FilterType> for Filter {
fn from(value: FilterType) -> Self {
match value {
FilterType::And => Filter::And,
FilterType::Or => Filter::Or,
FilterType::Not => Filter::Not,
FilterType::End => Filter::Close,
_ => unreachable!(),
}
}
}

View file

@ -25,8 +25,8 @@ use std::{borrow::Cow, collections::HashSet};
use store::{ use store::{
write::{ write::{
assert::HashedValue, BatchBuilder, BitmapClass, IntoOperations, Operation, TagValue, assert::HashedValue, BatchBuilder, BitmapClass, BitmapHash, IntoOperations, Operation,
TokenizeText, ValueClass, ValueOp, TagValue, TokenizeText, ValueClass, ValueOp,
}, },
Serialize, Serialize,
}; };
@ -238,7 +238,7 @@ fn merge_batch(
batch.ops.push(Operation::Bitmap { batch.ops.push(Operation::Bitmap {
class: BitmapClass::Text { class: BitmapClass::Text {
field, field,
token: token.into(), token: BitmapHash::new(token),
}, },
set, set,
}); });
@ -301,7 +301,7 @@ fn merge_batch(
batch.ops.push(Operation::Bitmap { batch.ops.push(Operation::Bitmap {
class: BitmapClass::Text { class: BitmapClass::Text {
field, field,
token: token.into_bytes(), token: BitmapHash::new(token),
}, },
set, set,
}); });
@ -480,7 +480,7 @@ fn build_batch(
batch.ops.push(Operation::Bitmap { batch.ops.push(Operation::Bitmap {
class: BitmapClass::Text { class: BitmapClass::Text {
field, field,
token: token.into_bytes(), token: BitmapHash::new(token),
}, },
set, set,
}); });
@ -512,7 +512,7 @@ fn build_batch(
batch.ops.push(Operation::Bitmap { batch.ops.push(Operation::Bitmap {
class: BitmapClass::Text { class: BitmapClass::Text {
field, field,
token: token.into_bytes(), token: BitmapHash::new(token),
}, },
set, set,
}); });

View file

@ -39,6 +39,9 @@ impl JMAP {
// Delete account data // Delete account data
self.store.purge_account(account_id).await?; self.store.purge_account(account_id).await?;
// Remove FTS index
let todo = 1;
// Delete account // Delete account
let mut batch = BatchBuilder::new(); let mut batch = BatchBuilder::new();
batch batch

View file

@ -41,6 +41,9 @@ impl crate::Config {
changes_max_results: settings changes_max_results: settings
.property("jmap.protocol.changes.max-results")? .property("jmap.protocol.changes.max-results")?
.unwrap_or(5000), .unwrap_or(5000),
snippet_max_results: settings
.property("jmap.protocol.search-snippet.max-results")?
.unwrap_or(100),
request_max_size: settings request_max_size: settings
.property("jmap.protocol.request.max-size")? .property("jmap.protocol.request.max-size")?
.unwrap_or(10000000), .unwrap_or(10000000),

View file

@ -377,6 +377,19 @@ pub async fn parse_jmap_request(
.into_http_response(), .into_http_response(),
}; };
} }
("db", "purge", &Method::GET) => {
return match jmap.store.purge_bitmaps().await {
Ok(_) => {
JsonResponse::new(Value::String("success".into())).into_http_response()
}
Err(err) => RequestError::blank(
StatusCode::INTERNAL_SERVER_ERROR.as_u16(),
"Purge database failed",
err.to_string(),
)
.into_http_response(),
};
}
(path_1 @ ("queue" | "report"), path_2, &Method::GET) => { (path_1 @ ("queue" | "report"), path_2, &Method::GET) => {
return jmap return jmap
.smtp .smtp

View file

@ -33,18 +33,30 @@ impl JMAP {
.map(ChangeLogBuilder::with_change_id) .map(ChangeLogBuilder::with_change_id)
} }
pub async fn assign_change_id(&self, account_id: u32) -> Result<u64, MethodError> { pub async fn assign_change_id(&self, _: u32) -> Result<u64, MethodError> {
self.store self.generate_snowflake_id()
.assign_change_id(account_id) /*self.store
.await .assign_change_id(account_id)
.map_err(|err| { .await
tracing::error!( .map_err(|err| {
tracing::error!(
event = "error",
context = "change_log",
error = ?err,
"Failed to assign changeId.");
MethodError::ServerPartialFail
})*/
}
pub fn generate_snowflake_id(&self) -> Result<u64, MethodError> {
self.snowflake_id.generate().ok_or_else(|| {
tracing::error!(
event = "error", event = "error",
context = "change_log", context = "change_log",
error = ?err, "Failed to generate snowflake id."
"Failed to assign changeId."); );
MethodError::ServerPartialFail MethodError::ServerPartialFail
}) })
} }
pub async fn commit_changes( pub async fn commit_changes(

View file

@ -53,7 +53,7 @@ use store::{
}; };
use utils::map::vec_map::VecMap; use utils::map::vec_map::VecMap;
use crate::{auth::AccessToken, Bincode, JMAP}; use crate::{auth::AccessToken, services::housekeeper::Event, Bincode, NamedKey, JMAP};
use super::{ use super::{
index::{EmailIndexBuilder, TrimTextValue, MAX_SORT_FIELD_LENGTH}, index::{EmailIndexBuilder, TrimTextValue, MAX_SORT_FIELD_LENGTH},
@ -291,7 +291,7 @@ impl JMAP {
keywords: Vec<Keyword>, keywords: Vec<Keyword>,
received_at: Option<UTCDate>, received_at: Option<UTCDate>,
) -> Result<Result<IngestedEmail, SetError>, MethodError> { ) -> Result<Result<IngestedEmail, SetError>, MethodError> {
// Obtain term index and metadata // Obtain metadata
let mut metadata = if let Some(metadata) = self let mut metadata = if let Some(metadata) = self
.get_property::<Bincode<MessageMetadata>>( .get_property::<Bincode<MessageMetadata>>(
from_account_id, from_account_id,
@ -405,6 +405,14 @@ impl JMAP {
.value(Property::MailboxIds, mailboxes, F_VALUE | F_BITMAP) .value(Property::MailboxIds, mailboxes, F_VALUE | F_BITMAP)
.value(Property::Keywords, keywords, F_VALUE | F_BITMAP) .value(Property::Keywords, keywords, F_VALUE | F_BITMAP)
.value(Property::Cid, changes.change_id, F_VALUE) .value(Property::Cid, changes.change_id, F_VALUE)
.set(
NamedKey::IndexEmail::<&[u8]> {
account_id,
document_id: message_id,
seq: self.generate_snowflake_id()?,
},
metadata.blob_hash.clone(),
)
.custom(EmailIndexBuilder::set(metadata)) .custom(EmailIndexBuilder::set(metadata))
.custom(changes); .custom(changes);
@ -417,6 +425,9 @@ impl JMAP {
MethodError::ServerPartialFail MethodError::ServerPartialFail
})?; })?;
// Request FTS index
let _ = self.housekeeper_tx.send(Event::IndexStart).await;
Ok(Ok(email)) Ok(Ok(email))
} }
} }

View file

@ -32,6 +32,8 @@ use mail_parser::{
}; };
use nlp::language::Language; use nlp::language::Language;
use store::{ use store::{
backend::MAX_TOKEN_LENGTH,
fts::{index::FtsDocument, Field},
write::{BatchBuilder, BlobOp, IntoOperations, F_BITMAP, F_CLEAR, F_INDEX, F_VALUE}, write::{BatchBuilder, BlobOp, IntoOperations, F_BITMAP, F_CLEAR, F_INDEX, F_VALUE},
BlobHash, BlobHash,
}; };
@ -60,13 +62,13 @@ pub(super) trait IndexMessage {
keywords: Vec<Keyword>, keywords: Vec<Keyword>,
mailbox_ids: Vec<u32>, mailbox_ids: Vec<u32>,
received_at: u64, received_at: u64,
) -> store::Result<&mut Self>; ) -> &mut Self;
fn index_headers(&mut self, headers: &[Header<'_>], options: u32); fn index_headers(&mut self, headers: &[Header<'_>], options: u32);
} }
pub(super) trait IndexMessageText<'x> { pub trait IndexMessageText<'x>: Sized {
fn index_message(&mut self, message: &'x Message<'x>); fn index_message(self, message: &'x Message<'x>) -> Self;
} }
impl IndexMessage for BatchBuilder { impl IndexMessage for BatchBuilder {
@ -77,7 +79,7 @@ impl IndexMessage for BatchBuilder {
keywords: Vec<Keyword>, keywords: Vec<Keyword>,
mailbox_ids: Vec<u32>, mailbox_ids: Vec<u32>,
received_at: u64, received_at: u64,
) -> store::Result<&mut Self> { ) -> &mut Self {
// Index keywords // Index keywords
self.value(Property::Keywords, keywords, F_VALUE | F_BITMAP); self.value(Property::Keywords, keywords, F_VALUE | F_BITMAP);
@ -164,7 +166,7 @@ impl IndexMessage for BatchBuilder {
F_VALUE, F_VALUE,
); );
Ok(self) self
} }
fn index_headers(&mut self, headers: &[Header<'_>], options: u32) { fn index_headers(&mut self, headers: &[Header<'_>], options: u32) {
@ -262,9 +264,8 @@ impl IndexMessage for BatchBuilder {
} }
} }
/* impl<'x> IndexMessageText<'x> for FtsDocument<'x, HeaderName<'x>> {
impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> { fn index_message(mut self, message: &'x Message<'x>) -> Self {
fn index_message(&mut self, message: &'x Message<'x>) {
let mut language = Language::Unknown; let mut language = Language::Unknown;
for (part_id, part) in message.parts.iter().take(MAX_MESSAGE_PARTS).enumerate() { for (part_id, part) in message.parts.iter().take(MAX_MESSAGE_PARTS).enumerate() {
@ -277,9 +278,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
continue; continue;
} }
// Index hasHeader property // Index hasHeader property
self.index_raw_token(Property::Headers, header.name.as_str()); self.index_keyword(Field::Keyword, header.name.as_str().to_ascii_lowercase());
match header.name { match &header.name {
HeaderName::MessageId HeaderName::MessageId
| HeaderName::InReplyTo | HeaderName::InReplyTo
| HeaderName::References | HeaderName::References
@ -287,45 +288,35 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
header.value.visit_text(|id| { header.value.visit_text(|id| {
// Index ids without stemming // Index ids without stemming
if id.len() < MAX_TOKEN_LENGTH { if id.len() < MAX_TOKEN_LENGTH {
let fix = "true"; self.index_keyword(
self.index_raw_token(Property::MessageId, id.to_string()); Field::Header(header.name.clone()),
id.to_string(),
);
} }
}); });
} }
HeaderName::From | HeaderName::To | HeaderName::Cc | HeaderName::Bcc => { HeaderName::From | HeaderName::To | HeaderName::Cc | HeaderName::Bcc => {
let property = Property::from_header(&header.name);
header.value.visit_addresses(|_, value| { header.value.visit_addresses(|_, value| {
// Index an address name or email without stemming // Index an address name or email without stemming
self.index_raw(property.clone(), value.to_string()); self.index_tokenized(
Field::Header(header.name.clone()),
value.to_string(),
);
}); });
} }
HeaderName::Subject => { HeaderName::Subject => {
// Index subject for FTS // Index subject for FTS
self.index( if let Some(subject) = header.value.as_text() {
Property::Subject, self.index(Field::Header(HeaderName::Subject), subject, language);
match &header.value { }
HeaderValue::Text(text) => text.clone(),
HeaderValue::TextList(list) if !list.is_empty() => {
list.first().unwrap().clone()
}
_ => "".into(),
},
language,
);
} }
HeaderName::Comments | HeaderName::Keywords | HeaderName::ListId => { HeaderName::Comments | HeaderName::Keywords | HeaderName::ListId => {
// Index headers // Index headers
header.value.visit_text(|text| { header.value.visit_text(|text| {
for token in text.split_ascii_whitespace() { self.index_tokenized(
if token.len() < MAX_TOKEN_LENGTH { Field::Header(header.name.clone()),
let fix = "true"; text.to_string(),
self.index_raw_token( );
Property::Headers,
token.to_lowercase(),
);
}
}
}); });
} }
_ => (), _ => (),
@ -337,9 +328,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
PartType::Text(text) => { PartType::Text(text) => {
if message.text_body.contains(&part_id) || message.html_body.contains(&part_id) if message.text_body.contains(&part_id) || message.html_body.contains(&part_id)
{ {
self.index(Property::TextBody, text.as_ref(), part_language); self.index(Field::Body, text.as_ref(), part_language);
} else { } else {
self.index(Property::Attachments, text.as_ref(), part_language); self.index(Field::Attachment, text.as_ref(), part_language);
} }
} }
PartType::Html(html) => { PartType::Html(html) => {
@ -347,9 +338,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
if message.text_body.contains(&part_id) || message.html_body.contains(&part_id) if message.text_body.contains(&part_id) || message.html_body.contains(&part_id)
{ {
self.index(Property::TextBody, text, part_language); self.index(Field::Body, text, part_language);
} else { } else {
self.index(Property::Attachments, text, part_language); self.index(Field::Attachment, text, part_language);
} }
} }
PartType::Message(nested_message) => { PartType::Message(nested_message) => {
@ -360,21 +351,17 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
if let Some(HeaderValue::Text(subject)) = if let Some(HeaderValue::Text(subject)) =
nested_message.header(HeaderName::Subject) nested_message.header(HeaderName::Subject)
{ {
self.index( self.index(Field::Attachment, subject.as_ref(), nested_message_language);
Property::Attachments,
subject.as_ref(),
nested_message_language,
);
} }
for sub_part in nested_message.parts.iter().take(MAX_MESSAGE_PARTS) { for sub_part in nested_message.parts.iter().take(MAX_MESSAGE_PARTS) {
let language = sub_part.language().unwrap_or(nested_message_language); let language = sub_part.language().unwrap_or(nested_message_language);
match &sub_part.body { match &sub_part.body {
PartType::Text(text) => { PartType::Text(text) => {
self.index(Property::Attachments, text.as_ref(), language); self.index(Field::Attachment, text.as_ref(), language);
} }
PartType::Html(html) => { PartType::Html(html) => {
self.index(Property::Attachments, html_to_text(html), language); self.index(Field::Attachment, html_to_text(html), language);
} }
_ => (), _ => (),
} }
@ -383,9 +370,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
_ => {} _ => {}
} }
} }
self
} }
} }
*/
pub struct EmailIndexBuilder<'x> { pub struct EmailIndexBuilder<'x> {
inner: Bincode<MessageMetadata<'x>>, inner: Bincode<MessageMetadata<'x>>,

View file

@ -33,6 +33,7 @@ use jmap_proto::{
use mail_parser::{ use mail_parser::{
parsers::fields::thread::thread_name, HeaderName, HeaderValue, Message, PartType, parsers::fields::thread::thread_name, HeaderName, HeaderValue, Message, PartType,
}; };
use store::{ use store::{
ahash::AHashSet, ahash::AHashSet,
query::Filter, query::Filter,
@ -46,7 +47,8 @@ use utils::map::vec_map::VecMap;
use crate::{ use crate::{
email::index::{IndexMessage, MAX_ID_LENGTH}, email::index::{IndexMessage, MAX_ID_LENGTH},
IngestError, JMAP, services::housekeeper::Event,
IngestError, NamedKey, JMAP,
}; };
use super::{ use super::{
@ -237,15 +239,14 @@ impl JMAP {
IngestError::Temporary IngestError::Temporary
})?; })?;
let change_id = self let change_id = self
.store
.assign_change_id(params.account_id) .assign_change_id(params.account_id)
.await .await
.map_err(|err| { .map_err(|_| {
tracing::error!( tracing::error!(
event = "error", event = "error",
context = "email_ingest", context = "email_ingest",
error = ?err, "Failed to assign changeId."
"Failed to assign changeId."); );
IngestError::Temporary IngestError::Temporary
})?; })?;
@ -307,17 +308,19 @@ impl JMAP {
params.mailbox_ids, params.mailbox_ids,
params.received_at.unwrap_or_else(now), params.received_at.unwrap_or_else(now),
) )
.map_err(|err| {
tracing::error!(
event = "error",
context = "email_ingest",
error = ?err,
"Failed to index message.");
IngestError::Temporary
})?
.value(Property::Cid, change_id, F_VALUE) .value(Property::Cid, change_id, F_VALUE)
.value(Property::ThreadId, thread_id, F_VALUE | F_BITMAP) .value(Property::ThreadId, thread_id, F_VALUE | F_BITMAP)
.custom(changes); .custom(changes)
.set(
NamedKey::IndexEmail::<&[u8]> {
account_id: params.account_id,
document_id,
seq: self
.generate_snowflake_id()
.map_err(|_| IngestError::Temporary)?,
},
blob_id.hash.clone(),
);
self.store.write(batch.build()).await.map_err(|err| { self.store.write(batch.build()).await.map_err(|err| {
tracing::error!( tracing::error!(
event = "error", event = "error",
@ -327,6 +330,9 @@ impl JMAP {
IngestError::Temporary IngestError::Temporary
})?; })?;
// Request FTS index
let _ = self.housekeeper_tx.send(Event::IndexStart).await;
Ok(IngestedEmail { Ok(IngestedEmail {
id, id,
change_id, change_id,
@ -434,18 +440,14 @@ impl JMAP {
// Delete all but the most common threadId // Delete all but the most common threadId
let mut batch = BatchBuilder::new(); let mut batch = BatchBuilder::new();
let change_id = self let change_id = self.assign_change_id(account_id).await.map_err(|_| {
.store tracing::error!(
.assign_change_id(account_id) event = "error",
.await context = "find_or_merge_thread",
.map_err(|err| { "Failed to assign changeId for thread merge."
tracing::error!( );
event = "error", IngestError::Temporary
context = "find_or_merge_thread", })?;
error = ?err,
"Failed to assign changeId for thread merge.");
IngestError::Temporary
})?;
let mut changes = ChangeLogBuilder::with_change_id(change_id); let mut changes = ChangeLogBuilder::with_change_id(change_id);
batch batch
.with_account_id(account_id) .with_account_id(account_id)

View file

@ -27,7 +27,10 @@ use jmap_proto::{
object::email::QueryArguments, object::email::QueryArguments,
types::{acl::Acl, collection::Collection, keyword::Keyword, property::Property}, types::{acl::Acl, collection::Collection, keyword::Keyword, property::Property},
}; };
use mail_parser::HeaderName;
use nlp::language::Language;
use store::{ use store::{
fts::{Field, FilterGroup, FtsFilter, IntoFilterGroup},
query::{self}, query::{self},
roaring::RoaringBitmap, roaring::RoaringBitmap,
write::ValueClass, write::ValueClass,
@ -45,200 +48,226 @@ impl JMAP {
let account_id = request.account_id.document_id(); let account_id = request.account_id.document_id();
let mut filters = Vec::with_capacity(request.filter.len()); let mut filters = Vec::with_capacity(request.filter.len());
for cond in std::mem::take(&mut request.filter) { for cond_group in std::mem::take(&mut request.filter).into_filter_group() {
match cond { match cond_group {
Filter::InMailbox(mailbox) => filters.push(query::Filter::is_in_bitmap( FilterGroup::Fts(conds) => {
Property::MailboxIds, let mut fts_filters = Vec::with_capacity(filters.len());
mailbox.document_id(), for cond in conds {
)), match cond {
Filter::InMailboxOtherThan(mailboxes) => { Filter::Text(text) => {
filters.push(query::Filter::Not); fts_filters.push(FtsFilter::Or);
filters.push(query::Filter::Or); fts_filters.push(FtsFilter::has_text(
for mailbox in mailboxes { Field::Header(HeaderName::From),
filters.push(query::Filter::is_in_bitmap( &text,
Property::MailboxIds,
mailbox.document_id(),
));
}
filters.push(query::Filter::End);
filters.push(query::Filter::End);
}
Filter::Before(date) => filters.push(query::Filter::lt(Property::ReceivedAt, date)),
Filter::After(date) => filters.push(query::Filter::gt(Property::ReceivedAt, date)),
Filter::MinSize(size) => filters.push(query::Filter::ge(Property::Size, size)),
Filter::MaxSize(size) => filters.push(query::Filter::lt(Property::Size, size)),
Filter::AllInThreadHaveKeyword(keyword) => filters.push(query::Filter::is_in_set(
self.thread_keywords(account_id, keyword, true).await?,
)),
Filter::SomeInThreadHaveKeyword(keyword) => filters.push(query::Filter::is_in_set(
self.thread_keywords(account_id, keyword, false).await?,
)),
Filter::NoneInThreadHaveKeyword(keyword) => {
filters.push(query::Filter::Not);
filters.push(query::Filter::is_in_set(
self.thread_keywords(account_id, keyword, false).await?,
));
filters.push(query::Filter::End);
}
Filter::HasKeyword(keyword) => {
filters.push(query::Filter::is_in_bitmap(Property::Keywords, keyword))
}
Filter::NotKeyword(keyword) => {
filters.push(query::Filter::Not);
filters.push(query::Filter::is_in_bitmap(Property::Keywords, keyword));
filters.push(query::Filter::End);
}
Filter::HasAttachment(has_attach) => {
if !has_attach {
filters.push(query::Filter::Not);
}
filters.push(query::Filter::is_in_bitmap(Property::HasAttachment, ()));
if !has_attach {
filters.push(query::Filter::End);
}
}
/*Filter::Text(text) => {
filters.push(query::Filter::Or);
filters.push(query::Filter::has_text(
Property::From,
&text,
Language::None,
));
filters.push(query::Filter::has_text(Property::To, &text, Language::None));
filters.push(query::Filter::has_text(Property::Cc, &text, Language::None));
filters.push(query::Filter::has_text(
Property::Bcc,
&text,
Language::None,
));
filters.push(query::Filter::has_text_detect(
Property::Subject,
&text,
self.config.default_language,
));
filters.push(query::Filter::has_text_detect(
Property::TextBody,
&text,
self.config.default_language,
));
filters.push(query::Filter::has_text_detect(
Property::Attachments,
text,
self.config.default_language,
));
filters.push(query::Filter::End);
}
Filter::From(text) => filters.push(query::Filter::has_text(
Property::From,
text,
Language::None, Language::None,
)), ));
Filter::To(text) => { fts_filters.push(FtsFilter::has_text(
filters.push(query::Filter::has_text(Property::To, text, Language::None)) Field::Header(HeaderName::To),
} &text,
Filter::Cc(text) => { Language::None,
filters.push(query::Filter::has_text(Property::Cc, text, Language::None)) ));
} fts_filters.push(FtsFilter::has_text(
Filter::Bcc(text) => { Field::Header(HeaderName::Cc),
filters.push(query::Filter::has_text(Property::Bcc, text, Language::None)) &text,
} Language::None,
Filter::Subject(text) => filters.push(query::Filter::has_text_detect( ));
Property::Subject, fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Bcc),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Header(HeaderName::Subject),
&text,
self.config.default_language,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Body,
&text,
self.config.default_language,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Attachment,
text, text,
self.config.default_language, self.config.default_language,
)), ));
Filter::Body(text) => filters.push(query::Filter::has_text_detect( fts_filters.push(FtsFilter::End);
Property::TextBody, }
text, Filter::From(text) => fts_filters.push(FtsFilter::has_text(
self.config.default_language, Field::Header(HeaderName::From),
)), text,
Filter::Header(header) => { Language::None,
let mut header = header.into_iter(); )),
let header_name = header.next().ok_or_else(|| { Filter::To(text) => fts_filters.push(FtsFilter::has_text(
MethodError::InvalidArguments("Header name is missing.".to_string()) Field::Header(HeaderName::To),
})?; text,
Language::None,
)),
Filter::Cc(text) => fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Cc),
text,
Language::None,
)),
Filter::Bcc(text) => fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Bcc),
text,
Language::None,
)),
Filter::Subject(text) => fts_filters.push(FtsFilter::has_text_detect(
Field::Header(HeaderName::Subject),
text,
self.config.default_language,
)),
Filter::Body(text) => fts_filters.push(FtsFilter::has_text_detect(
Field::Body,
text,
self.config.default_language,
)),
Filter::Header(header) => {
let mut header = header.into_iter();
let header_name = header.next().ok_or_else(|| {
MethodError::InvalidArguments(
"Header name is missing.".to_string(),
)
})?;
match HeaderName::parse(&header_name) { match HeaderName::parse(header_name) {
Some(HeaderName::Other(_)) | None => { Some(HeaderName::Other(header_name)) => {
return Err(MethodError::InvalidArguments(format!( return Err(MethodError::InvalidArguments(format!(
"Querying non-RFC header '{header_name}' is not allowed.", "Querying header '{header_name}' is not supported.",
))); )));
} }
Some(header_name) => { Some(header_name) => {
let is_id = matches!( if let Some(header_value) = header.next() {
if matches!(
header_name, header_name,
HeaderName::MessageId HeaderName::MessageId
| HeaderName::InReplyTo | HeaderName::InReplyTo
| HeaderName::References | HeaderName::References
| HeaderName::ResentMessageId | HeaderName::ResentMessageId
); ) {
let tokens = if let Some(header_value) = header.next() { fts_filters.push(FtsFilter::has_keyword(
let header_num = header_name.id().to_string(); Field::Header(header_name),
header_value header_value,
.split_ascii_whitespace() ));
.filter_map(|token| {
if token.len() < MAX_TOKEN_LENGTH {
if is_id {
format!("{header_num}{token}")
} else {
format!("{header_num}{}", token.to_lowercase())
}
.into()
} else {
None
}
})
.collect::<Vec<_>>()
} else { } else {
vec![] fts_filters.push(FtsFilter::has_text(
}; Field::Header(header_name),
match tokens.len() { header_value,
0 => { Language::None,
filters.push(query::Filter::has_raw_text( ));
Property::Headers,
header_name.id().to_string(),
));
}
1 => {
filters.push(query::Filter::has_raw_text(
Property::Headers,
tokens.into_iter().next().unwrap(),
));
}
_ => {
filters.push(query::Filter::And);
for token in tokens {
filters.push(query::Filter::has_raw_text(
Property::Headers,
token,
));
}
filters.push(query::Filter::End);
}
} }
} else {
fts_filters.push(FtsFilter::has_keyword(
Field::Keyword,
header_name.as_str().to_lowercase(),
));
} }
} }
None => (),
} }
*/ }
// Non-standard Filter::And | Filter::Or | Filter::Not | Filter::Close => {
Filter::Id(ids) => { fts_filters.push(cond.into());
let mut set = RoaringBitmap::new(); }
for id in ids { other => return Err(MethodError::UnsupportedFilter(other.to_string())),
set.insert(id.document_id()); }
} }
filters.push(query::Filter::is_in_set(set)); filters.push(query::Filter::is_in_set(
} self.fts_filter(account_id, Collection::Email, fts_filters)
Filter::SentBefore(date) => filters.push(query::Filter::lt(Property::SentAt, date)), .await?,
Filter::SentAfter(date) => filters.push(query::Filter::gt(Property::SentAt, date)), ));
Filter::InThread(id) => filters.push(query::Filter::is_in_bitmap(
Property::ThreadId,
id.document_id(),
)),
Filter::And | Filter::Or | Filter::Not | Filter::Close => {
filters.push(cond.into());
} }
FilterGroup::Store(cond) => {
match cond {
Filter::InMailbox(mailbox) => filters.push(query::Filter::is_in_bitmap(
Property::MailboxIds,
mailbox.document_id(),
)),
Filter::InMailboxOtherThan(mailboxes) => {
filters.push(query::Filter::Not);
filters.push(query::Filter::Or);
for mailbox in mailboxes {
filters.push(query::Filter::is_in_bitmap(
Property::MailboxIds,
mailbox.document_id(),
));
}
filters.push(query::Filter::End);
filters.push(query::Filter::End);
}
Filter::Before(date) => {
filters.push(query::Filter::lt(Property::ReceivedAt, date))
}
Filter::After(date) => {
filters.push(query::Filter::gt(Property::ReceivedAt, date))
}
Filter::MinSize(size) => {
filters.push(query::Filter::ge(Property::Size, size))
}
Filter::MaxSize(size) => {
filters.push(query::Filter::lt(Property::Size, size))
}
Filter::AllInThreadHaveKeyword(keyword) => {
filters.push(query::Filter::is_in_set(
self.thread_keywords(account_id, keyword, true).await?,
))
}
Filter::SomeInThreadHaveKeyword(keyword) => {
filters.push(query::Filter::is_in_set(
self.thread_keywords(account_id, keyword, false).await?,
))
}
Filter::NoneInThreadHaveKeyword(keyword) => {
filters.push(query::Filter::Not);
filters.push(query::Filter::is_in_set(
self.thread_keywords(account_id, keyword, false).await?,
));
filters.push(query::Filter::End);
}
Filter::HasKeyword(keyword) => {
filters.push(query::Filter::is_in_bitmap(Property::Keywords, keyword))
}
Filter::NotKeyword(keyword) => {
filters.push(query::Filter::Not);
filters.push(query::Filter::is_in_bitmap(Property::Keywords, keyword));
filters.push(query::Filter::End);
}
Filter::HasAttachment(has_attach) => {
if !has_attach {
filters.push(query::Filter::Not);
}
filters.push(query::Filter::is_in_bitmap(Property::HasAttachment, ()));
if !has_attach {
filters.push(query::Filter::End);
}
}
other => return Err(MethodError::UnsupportedFilter(other.to_string())), // Non-standard
Filter::Id(ids) => {
let mut set = RoaringBitmap::new();
for id in ids {
set.insert(id.document_id());
}
filters.push(query::Filter::is_in_set(set));
}
Filter::SentBefore(date) => {
filters.push(query::Filter::lt(Property::SentAt, date))
}
Filter::SentAfter(date) => {
filters.push(query::Filter::gt(Property::SentAt, date))
}
Filter::InThread(id) => filters.push(query::Filter::is_in_bitmap(
Property::ThreadId,
id.document_id(),
)),
Filter::And | Filter::Or | Filter::Not | Filter::Close => {
filters.push(cond.into());
}
other => return Err(MethodError::UnsupportedFilter(other.to_string())),
}
}
} }
} }

View file

@ -59,7 +59,9 @@ use store::{
Serialize, Serialize,
}; };
use crate::{auth::AccessToken, Bincode, IngestError, JMAP}; use crate::{
auth::AccessToken, services::housekeeper::Event, Bincode, IngestError, NamedKey, JMAP,
};
use super::{ use super::{
headers::{BuildHeader, ValueToHeader}, headers::{BuildHeader, ValueToHeader},
@ -1208,6 +1210,16 @@ impl JMAP {
.delete_document(thread_id); .delete_document(thread_id);
} }
// Remove message from FTS index
batch.set(
NamedKey::IndexEmail::<&[u8]> {
account_id,
document_id,
seq: self.generate_snowflake_id()?,
},
vec![],
);
// Commit batch // Commit batch
match self.store.write(batch.build()).await { match self.store.write(batch.build()).await {
Ok(_) => (), Ok(_) => (),
@ -1226,6 +1238,9 @@ impl JMAP {
} }
} }
// Request FTS index
let _ = self.housekeeper_tx.send(Event::IndexStart).await;
Ok(Ok(changes)) Ok(Ok(changes))
} }
} }

View file

@ -27,15 +27,15 @@ use jmap_proto::{
query::Filter, query::Filter,
search_snippet::{GetSearchSnippetRequest, GetSearchSnippetResponse, SearchSnippet}, search_snippet::{GetSearchSnippetRequest, GetSearchSnippetResponse, SearchSnippet},
}, },
types::{acl::Acl, collection::Collection}, types::{acl::Acl, collection::Collection, property::Property},
}; };
use mail_parser::{decoders::html::html_to_text, MessageParser, PartType}; use mail_parser::{decoders::html::html_to_text, GetHeader, HeaderName, PartType};
use nlp::language::{stemmer::Stemmer, Language}; use nlp::language::{search_snippet::generate_snippet, stemmer::Stemmer, Language};
use store::BlobHash; use store::backend::MAX_TOKEN_LENGTH;
use crate::{auth::AccessToken, JMAP}; use crate::{auth::AccessToken, Bincode, JMAP};
use super::index::MAX_MESSAGE_PARTS; use super::metadata::{MessageMetadata, MetadataPartType};
impl JMAP { impl JMAP {
pub async fn email_search_snippet( pub async fn email_search_snippet(
@ -45,37 +45,33 @@ impl JMAP {
) -> Result<GetSearchSnippetResponse, MethodError> { ) -> Result<GetSearchSnippetResponse, MethodError> {
let mut filter_stack = vec![]; let mut filter_stack = vec![];
let mut include_term = true; let mut include_term = true;
//let mut terms = vec![]; let mut terms = vec![];
let mut match_phrase = false; let mut is_exact = false;
let mut language = self.config.default_language;
for cond in request.filter { for cond in request.filter {
match cond { match cond {
Filter::Text(text) | Filter::Subject(text) | Filter::Body(text) => { Filter::Text(text) | Filter::Subject(text) | Filter::Body(text) => {
/*if include_term { if include_term {
let (text, language) = Language::detect(text, self.config.default_language); let (text, language_) =
Language::detect(text, self.config.default_language);
language = language_;
if (text.starts_with('"') && text.ends_with('"')) if (text.starts_with('"') && text.ends_with('"'))
|| (text.starts_with('\'') && text.ends_with('\'')) || (text.starts_with('\'') && text.ends_with('\''))
{ {
terms.push( for token in language.tokenize_text(&text, MAX_TOKEN_LENGTH) {
language terms.push(token.word.into_owned());
.tokenize_text(&text, MAX_TOKEN_LENGTH) }
.map(|token| (token.word.into_owned(), None)) is_exact = true;
.collect::<Vec<_>>(),
);
match_phrase = true;
} else { } else {
terms.push( for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH) {
Stemmer::new(&text, language, MAX_TOKEN_LENGTH) terms.push(token.word.into_owned());
.map(|token| { if let Some(stemmed_word) = token.stemmed_word {
( terms.push(stemmed_word.into_owned());
token.word.into_owned(), }
token.stemmed_word.map(|w| w.into_owned()), }
)
})
.collect::<Vec<_>>(),
);
} }
}*/ }
} }
Filter::And | Filter::Or => { Filter::And | Filter::Or => {
filter_stack.push(cond); filter_stack.push(cond);
@ -103,150 +99,112 @@ impl JMAP {
not_found: vec![], not_found: vec![],
}; };
if email_ids.len() > self.config.get_max_objects { if email_ids.len() > self.config.snippet_max_results {
return Err(MethodError::RequestTooLarge); return Err(MethodError::RequestTooLarge);
} }
/* for email_id in email_ids {
for email_id in email_ids { let document_id = email_id.document_id();
let document_id = email_id.document_id(); let mut snippet = SearchSnippet {
let mut snippet = SearchSnippet { email_id,
email_id, subject: None,
subject: None, preview: None,
preview: None, };
}; if !document_ids.contains(document_id) {
if !document_ids.contains(document_id) { response.not_found.push(email_id);
response.not_found.push(email_id); continue;
continue; } else if terms.is_empty() {
} else if terms.is_empty() { response.list.push(snippet);
response.list.push(snippet); continue;
continue; }
} let metadata = match self
.get_property::<Bincode<MessageMetadata>>(
account_id,
Collection::Email,
document_id,
&Property::BodyStructure,
)
.await?
{
Some(metadata) => metadata.inner,
None => {
response.not_found.push(email_id);
continue;
}
};
// Obtain the term index and raw message // Add subject snippet
let (term_index, raw_message) = if let (Some(term_index), Some(raw_message)) = ( if let Some(subject) = metadata
self.get_term_index::<TermIndex>(account_id, Collection::Email, document_id) .contents
.await?, .root_part()
self.get_blob( .headers
&BlobHash::LinkedMaildir { .header_value(&HeaderName::Subject)
account_id, .and_then(|v| v.as_text())
document_id, .and_then(|v| generate_snippet(v, &terms, language, is_exact))
}, {
0..u32::MAX, snippet.subject = subject.into();
) }
.await?,
) {
(term_index, raw_message)
} else {
response.not_found.push(email_id);
continue;
};
// Parse message // Check if the snippet can be generated from the preview
let message = if let Some(message) = MessageParser::new().parse(&raw_message) { /*if let Some(body) = generate_snippet(&metadata.preview, &terms) {
message snippet.preview = body.into();
} else { } else {*/
response.not_found.push(email_id); // Download message
continue; let raw_message =
}; if let Some(raw_message) = self.get_blob(&metadata.blob_hash, 0..u32::MAX).await? {
raw_message
} else {
tracing::warn!(event = "not-found",
account_id = account_id,
collection = ?Collection::Email,
document_id = email_id.document_id(),
blob_id = ?metadata.blob_hash,
"Blob not found");
response.not_found.push(email_id);
continue;
};
// Build the match terms // Find a matching part
let mut match_terms = Vec::new(); 'outer: for part in &metadata.contents.parts {
for term in &terms { match &part.body {
for (word, stemmed_word) in term { MetadataPartType::Text | MetadataPartType::Html => {
match_terms.push(term_index.get_match_term(word, stemmed_word.as_deref())); let text = match part.decode_contents(&raw_message) {
PartType::Text(text) => text,
PartType::Html(html) => html_to_text(&html).into(),
_ => unreachable!(),
};
if let Some(body) = generate_snippet(&text, &terms, language, is_exact) {
snippet.preview = body.into();
break;
} }
} }
MetadataPartType::Message(message) => {
for part in &message.parts {
if let MetadataPartType::Text | MetadataPartType::Html = part.body {
let text = match part.decode_contents(&raw_message) {
PartType::Text(text) => text,
PartType::Html(html) => html_to_text(&html).into(),
_ => unreachable!(),
};
'outer: for term_group in term_index if let Some(body) =
.match_terms(&match_terms, None, match_phrase, true, true) generate_snippet(&text, &terms, language, is_exact)
.map_err(|err| match err { {
term_index::Error::InvalidArgument => { snippet.preview = body.into();
MethodError::UnsupportedFilter("Too many search terms.".to_string()) break 'outer;
}
err => {
tracing::error!(
account_id = account_id,
document_id = document_id,
reason = ?err,
"Failed to generate search snippet.");
MethodError::UnsupportedFilter(
"Failed to generate search snippet.".to_string(),
)
}
})?
.unwrap_or_default()
{
if term_group.part_id == 0 {
// Generate subject snippent
snippet.subject =
generate_snippet(&term_group.terms, message.subject().unwrap_or_default());
} else {
let mut part_num = 1;
for part in &message.parts {
match &part.body {
PartType::Text(text) => {
if part_num == term_group.part_id {
snippet.preview = generate_snippet(&term_group.terms, text);
break 'outer;
} else {
part_num += 1;
}
}
PartType::Html(html) => {
if part_num == term_group.part_id {
snippet.preview =
generate_snippet(&term_group.terms, &html_to_text(html));
break 'outer;
} else {
part_num += 1;
}
}
PartType::Message(message) => {
if let Some(subject) = message.subject() {
if part_num == term_group.part_id {
snippet.preview =
generate_snippet(&term_group.terms, subject);
break 'outer;
} else {
part_num += 1;
}
}
for sub_part in message.parts.iter().take(MAX_MESSAGE_PARTS) {
match &sub_part.body {
PartType::Text(text) => {
if part_num == term_group.part_id {
snippet.preview =
generate_snippet(&term_group.terms, text);
break 'outer;
} else {
part_num += 1;
}
}
PartType::Html(html) => {
if part_num == term_group.part_id {
snippet.preview = generate_snippet(
&term_group.terms,
&html_to_text(html),
);
break 'outer;
} else {
part_num += 1;
}
}
_ => (),
}
}
}
_ => (),
} }
} }
} }
} }
_ => (),
response.list.push(snippet);
} }
*/ }
//}
response.list.push(snippet);
}
Ok(response) Ok(response)
} }
} }

View file

@ -21,7 +21,7 @@
* for more details. * for more details.
*/ */
use std::{collections::hash_map::RandomState, sync::Arc, time::Duration}; use std::{collections::hash_map::RandomState, fmt::Display, sync::Arc, time::Duration};
use ::sieve::{Compiler, Runtime}; use ::sieve::{Compiler, Runtime};
use api::session::BaseCapabilities; use api::session::BaseCapabilities;
@ -49,17 +49,23 @@ use services::{
use smtp::core::SMTP; use smtp::core::SMTP;
use store::{ use store::{
backend::{fs::FsStore, sqlite::SqliteStore}, backend::{fs::FsStore, sqlite::SqliteStore},
fts::FtsFilter,
parking_lot::Mutex, parking_lot::Mutex,
query::{sort::Pagination, Comparator, Filter, ResultSet, SortedResultSet}, query::{sort::Pagination, Comparator, Filter, ResultSet, SortedResultSet},
roaring::RoaringBitmap, roaring::RoaringBitmap,
write::{key::KeySerializer, BatchBuilder, BitmapClass, TagValue, ToBitmaps, ValueClass}, write::{
BitmapKey, BlobStore, Deserialize, Key, Serialize, Store, ValueKey, SUBSPACE_VALUES, key::{DeserializeBigEndian, KeySerializer},
BatchBuilder, BitmapClass, TagValue, ToBitmaps, ValueClass,
},
BitmapKey, BlobStore, Deserialize, FtsStore, Key, Serialize, Store, ValueKey, SUBSPACE_VALUES,
U32_LEN, U64_LEN,
}; };
use tokio::sync::mpsc; use tokio::sync::mpsc;
use utils::{ use utils::{
config::Rate, config::Rate,
ipc::DeliveryEvent, ipc::DeliveryEvent,
map::ttl_dashmap::{TtlDashMap, TtlMap}, map::ttl_dashmap::{TtlDashMap, TtlMap},
snowflake::SnowflakeIdGenerator,
UnwrapFailure, UnwrapFailure,
}; };
@ -85,11 +91,13 @@ pub const LONG_SLUMBER: Duration = Duration::from_secs(60 * 60 * 24);
pub struct JMAP { pub struct JMAP {
pub store: Store, pub store: Store,
pub blob_store: BlobStore, pub blob_store: BlobStore,
pub fts_store: FtsStore,
pub config: Config, pub config: Config,
pub directory: Arc<dyn Directory>, pub directory: Arc<dyn Directory>,
pub sessions: TtlDashMap<String, u32>, pub sessions: TtlDashMap<String, u32>,
pub access_tokens: TtlDashMap<u32, Arc<AccessToken>>, pub access_tokens: TtlDashMap<u32, Arc<AccessToken>>,
pub snowflake_id: SnowflakeIdGenerator,
pub rate_limit_auth: DashMap<u32, Arc<Mutex<AuthenticatedLimiter>>>, pub rate_limit_auth: DashMap<u32, Arc<Mutex<AuthenticatedLimiter>>>,
pub rate_limit_unauth: DashMap<RemoteAddress, Arc<Mutex<AnonymousLimiter>>>, pub rate_limit_unauth: DashMap<RemoteAddress, Arc<Mutex<AnonymousLimiter>>>,
@ -108,6 +116,7 @@ pub struct Config {
pub default_language: Language, pub default_language: Language,
pub query_max_results: usize, pub query_max_results: usize,
pub changes_max_results: usize, pub changes_max_results: usize,
pub snippet_max_results: usize,
pub request_max_size: usize, pub request_max_size: usize,
pub request_max_calls: usize, pub request_max_calls: usize,
@ -187,6 +196,11 @@ impl JMAP {
.property::<u64>("global.shared-map.shard")? .property::<u64>("global.shared-map.shard")?
.unwrap_or(32) .unwrap_or(32)
.next_power_of_two() as usize; .next_power_of_two() as usize;
let store = Store::SQLite(Arc::new(
SqliteStore::open(config)
.await
.failed("Unable to open database"),
));
let jmap_server = Arc::new(JMAP { let jmap_server = Arc::new(JMAP {
directory: directory_config directory: directory_config
@ -197,11 +211,12 @@ impl JMAP {
config.value_require("jmap.directory")? config.value_require("jmap.directory")?
)) ))
.clone(), .clone(),
store: Store::SQLite(Arc::new( snowflake_id: config
SqliteStore::open(config) .property::<u64>("global.node-id")?
.await .map(SnowflakeIdGenerator::with_node_id)
.failed("Unable to open database"), .unwrap_or_else(SnowflakeIdGenerator::new),
)), fts_store: FtsStore::Store(store.clone()),
store,
blob_store: BlobStore::Fs(Arc::new( blob_store: BlobStore::Fs(Arc::new(
FsStore::open(config) FsStore::open(config)
.await .await
@ -618,7 +633,28 @@ impl JMAP {
.await .await
.map_err(|err| { .map_err(|err| {
tracing::error!(event = "error", tracing::error!(event = "error",
context = "mailbox_set", context = "filter",
account_id = account_id,
collection = ?collection,
error = ?err,
"Failed to execute filter.");
MethodError::ServerPartialFail
})
}
pub async fn fts_filter<T: Into<u8> + Display + Clone + std::fmt::Debug>(
&self,
account_id: u32,
collection: Collection,
filters: Vec<FtsFilter<T>>,
) -> Result<RoaringBitmap, MethodError> {
self.fts_store
.query(account_id, collection, filters)
.await
.map_err(|err| {
tracing::error!(event = "error",
context = "fts-filter",
account_id = account_id, account_id = account_id,
collection = ?collection, collection = ?collection,
error = ?err, error = ?err,
@ -805,6 +841,11 @@ pub enum NamedKey<T: AsRef<[u8]>> {
Name(T), Name(T),
Id(u32), Id(u32),
Quota(u32), Quota(u32),
IndexEmail {
account_id: u32,
document_id: u32,
seq: u64,
},
} }
impl<T: AsRef<[u8]>> From<&NamedKey<T>> for ValueClass { impl<T: AsRef<[u8]>> From<&NamedKey<T>> for ValueClass {
@ -817,21 +858,44 @@ impl<T: AsRef<[u8]>> From<&NamedKey<T>> for ValueClass {
.finalize(), .finalize(),
), ),
NamedKey::Id(id) => ValueClass::Named( NamedKey::Id(id) => ValueClass::Named(
KeySerializer::new(std::mem::size_of::<u32>()) KeySerializer::new(std::mem::size_of::<u32>() + 1)
.write(1u8) .write(1u8)
.write_leb128(*id) .write_leb128(*id)
.finalize(), .finalize(),
), ),
NamedKey::Quota(id) => ValueClass::Named( NamedKey::Quota(id) => ValueClass::Named(
KeySerializer::new(std::mem::size_of::<u32>()) KeySerializer::new(std::mem::size_of::<u32>() + 1)
.write(2u8) .write(2u8)
.write_leb128(*id) .write_leb128(*id)
.finalize(), .finalize(),
), ),
NamedKey::IndexEmail {
account_id,
document_id,
seq,
} => ValueClass::Named(
KeySerializer::new(std::mem::size_of::<u32>() * 4 + 1)
.write(3u8)
.write(*seq)
.write(*account_id)
.write(*document_id)
.finalize(),
),
} }
} }
} }
impl<T: AsRef<[u8]>> NamedKey<T> {
pub fn deserialize_index_email(bytes: &[u8]) -> store::Result<Self> {
let len = bytes.len();
Ok(NamedKey::IndexEmail {
seq: bytes.deserialize_be_u64(len - U64_LEN - (U32_LEN * 2))?,
account_id: bytes.deserialize_be_u32(len - U32_LEN * 2)?,
document_id: bytes.deserialize_be_u32(len - U32_LEN)?,
})
}
}
impl<T: AsRef<[u8]>> From<NamedKey<T>> for ValueClass { impl<T: AsRef<[u8]>> From<NamedKey<T>> for ValueClass {
fn from(key: NamedKey<T>) -> Self { fn from(key: NamedKey<T>) -> Self {
(&key).into() (&key).into()

View file

@ -36,43 +36,73 @@ use super::IPC_CHANNEL_BUFFER;
pub enum Event { pub enum Event {
PurgeDb, PurgeDb,
PurgeBlobs,
PurgeSessions, PurgeSessions,
IndexStart,
IndexDone,
#[cfg(feature = "test_mode")]
IndexIsActive(tokio::sync::oneshot::Sender<bool>),
Exit, Exit,
} }
const TASK_PURGE_DB: usize = 0; const TASK_PURGE_DB: usize = 0;
const TASK_PURGE_BLOBS: usize = 1; const TASK_PURGE_SESSIONS: usize = 1;
const TASK_PURGE_SESSIONS: usize = 2;
pub fn spawn_housekeeper(core: Arc<JMAP>, settings: &Config, mut rx: mpsc::Receiver<Event>) { pub fn spawn_housekeeper(core: Arc<JMAP>, settings: &Config, mut rx: mpsc::Receiver<Event>) {
let purge_db_at = settings let purge_db_at = settings
.property_or_static::<SimpleCron>("jmap.purge.schedule.db", "0 3 *") .property_or_static::<SimpleCron>("jmap.purge.schedule.db", "0 3 *")
.failed("Initialize housekeeper"); .failed("Initialize housekeeper");
let purge_blobs_at = settings
.property_or_static::<SimpleCron>("jmap.purge.schedule.blobs", "30 3 *")
.failed("Initialize housekeeper");
let purge_cache = settings let purge_cache = settings
.property_or_static::<SimpleCron>("jmap.purge.schedule.sessions", "15 * *") .property_or_static::<SimpleCron>("jmap.purge.schedule.sessions", "15 * *")
.failed("Initialize housekeeper"); .failed("Initialize housekeeper");
tokio::spawn(async move { tokio::spawn(async move {
tracing::debug!("Housekeeper task started."); tracing::debug!("Housekeeper task started.");
let mut index_busy = true;
let mut index_pending = false;
// Index any queued messages
let core_ = core.clone();
tokio::spawn(async move {
core_.fts_index_queued().await;
});
loop { loop {
let time_to_next = [ let time_to_next = [purge_db_at.time_to_next(), purge_cache.time_to_next()];
purge_db_at.time_to_next(), let mut tasks_to_run = [false, false];
purge_blobs_at.time_to_next(),
purge_cache.time_to_next(),
];
let mut tasks_to_run = [false, false, false];
let start_time = Instant::now(); let start_time = Instant::now();
match tokio::time::timeout(time_to_next.iter().min().copied().unwrap(), rx.recv()).await match tokio::time::timeout(time_to_next.iter().min().copied().unwrap(), rx.recv()).await
{ {
Ok(Some(event)) => match event { Ok(Some(event)) => match event {
Event::PurgeDb => tasks_to_run[TASK_PURGE_DB] = true, Event::PurgeDb => tasks_to_run[TASK_PURGE_DB] = true,
Event::PurgeBlobs => tasks_to_run[TASK_PURGE_BLOBS] = true,
Event::PurgeSessions => tasks_to_run[TASK_PURGE_SESSIONS] = true, Event::PurgeSessions => tasks_to_run[TASK_PURGE_SESSIONS] = true,
Event::IndexStart => {
if !index_busy {
index_busy = true;
let core = core.clone();
tokio::spawn(async move {
core.fts_index_queued().await;
});
} else {
index_pending = true;
}
}
Event::IndexDone => {
if index_pending {
index_pending = false;
let core = core.clone();
tokio::spawn(async move {
core.fts_index_queued().await;
});
} else {
index_busy = false;
}
}
#[cfg(feature = "test_mode")]
Event::IndexIsActive(tx) => {
tx.send(index_busy).ok();
}
Event::Exit => { Event::Exit => {
tracing::debug!("Housekeeper task exiting."); tracing::debug!("Housekeeper task exiting.");
return; return;
@ -104,13 +134,12 @@ pub fn spawn_housekeeper(core: Arc<JMAP>, settings: &Config, mut rx: mpsc::Recei
tokio::spawn(async move { tokio::spawn(async move {
match task_id { match task_id {
TASK_PURGE_DB => { TASK_PURGE_DB => {
tracing::info!("Purging database."); tracing::info!("Purging database...");
if let Err(err) = core.store.purge_bitmaps().await { if let Err(err) = core.store.purge_bitmaps().await {
tracing::error!("Error while purging bitmaps: {}", err); tracing::error!("Error while purging bitmaps: {}", err);
} }
}
TASK_PURGE_BLOBS => { tracing::info!("Purging blobs...",);
tracing::info!("Purging temporary blobs.",);
if let Err(err) = if let Err(err) =
core.store.blob_hash_purge(core.blob_store.clone()).await core.store.blob_hash_purge(core.blob_store.clone()).await
{ {

View file

@ -0,0 +1,224 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use jmap_proto::types::{collection::Collection, property::Property};
use store::{
fts::index::FtsDocument,
write::{BatchBuilder, ValueClass},
IterateParams, ValueKey,
};
use crate::{
email::{index::IndexMessageText, metadata::MessageMetadata},
Bincode, NamedKey, JMAP,
};
use super::housekeeper::Event;
impl JMAP {
pub async fn fts_index_queued(&self) {
let from_key = ValueKey::<ValueClass> {
account_id: 0,
collection: 0,
document_id: 0,
class: NamedKey::IndexEmail::<&[u8]> {
account_id: 0,
document_id: 0,
seq: 0,
}
.into(),
};
let to_key = ValueKey::<ValueClass> {
account_id: u32::MAX,
collection: u8::MAX,
document_id: u32::MAX,
class: NamedKey::IndexEmail::<&[u8]> {
account_id: u32::MAX,
document_id: u32::MAX,
seq: u64::MAX,
}
.into(),
};
// Retrieve entries pending to be indexed
// TODO: Support indexing from multiple nodes
let mut entries = Vec::new();
let _ = self
.store
.iterate(
IterateParams::new(from_key, to_key).ascending(),
|key, value| {
entries.push((
NamedKey::<Vec<u8>>::deserialize_index_email(key)?,
value.to_vec(),
));
Ok(true)
},
)
.await
.map_err(|err| {
tracing::error!(
context = "fts_index_queued",
event = "error",
reason = ?err,
"Failed to iterate over index emails"
);
});
// Index entries
for (key, blob_hash) in entries {
if let NamedKey::IndexEmail {
account_id,
document_id,
..
} = &key
{
if !blob_hash.is_empty() {
match self
.get_property::<Bincode<MessageMetadata>>(
*account_id,
Collection::Email,
*document_id,
Property::BodyStructure,
)
.await
{
Ok(Some(metadata))
if metadata.inner.blob_hash.as_slice() == blob_hash.as_slice() =>
{
// Obtain raw message
let raw_message = if let Ok(Some(raw_message)) =
self.get_blob(&metadata.inner.blob_hash, 0..u32::MAX).await
{
raw_message
} else {
tracing::warn!(
context = "fts_index_queued",
event = "error",
account_id = *account_id,
document_id = *document_id,
blob_hash = ?metadata.inner.blob_hash,
"Message blob not found"
);
continue;
};
let message = metadata.inner.contents.into_message(&raw_message);
// Index message
let document =
FtsDocument::with_default_language(self.config.default_language)
.with_account_id(*account_id)
.with_collection(Collection::Email)
.with_document_id(*document_id)
.index_message(&message);
if let Err(err) = self.fts_store.index(document).await {
tracing::error!(
context = "fts_index_queued",
event = "error",
account_id = *account_id,
document_id = *document_id,
reason = ?err,
"Failed to index email in FTS index"
);
continue;
}
tracing::debug!(
context = "fts_index_queued",
event = "index",
account_id = *account_id,
document_id = *document_id,
"Indexed document in FTS index"
);
}
Err(err) => {
tracing::error!(
context = "fts_index_queued",
event = "error",
account_id = *account_id,
document_id = *document_id,
reason = ?err,
"Failed to retrieve email metadata"
);
break;
}
_ => {
// The message was probably deleted or overwritten
tracing::debug!(
context = "fts_index_queued",
event = "error",
account_id = *account_id,
document_id = *document_id,
"Email metadata not found"
);
}
}
} else {
if let Err(err) = self
.fts_store
.remove(*account_id, Collection::Email.into(), *document_id)
.await
{
tracing::error!(
context = "fts_index_queued",
event = "error",
account_id = *account_id,
document_id = *document_id,
reason = ?err,
"Failed to remove document from FTS index"
);
continue;
}
tracing::debug!(
context = "fts_index_queued",
event = "delete",
account_id = *account_id,
document_id = *document_id,
"Deleted document from FTS index"
);
}
}
// Remove entry from queue
if let Err(err) = self
.store
.write(BatchBuilder::new().clear(key).build_batch())
.await
{
tracing::error!(
context = "fts_index_queued",
event = "error",
reason = ?err,
"Failed to remove index email from queue"
);
break;
}
}
if let Err(err) = self.housekeeper_tx.send(Event::IndexDone).await {
tracing::warn!("Failed to send index done event to housekeeper: {}", err);
}
}
}

View file

@ -23,6 +23,7 @@
pub mod delivery; pub mod delivery;
pub mod housekeeper; pub mod housekeeper;
pub mod index;
pub mod ingest; pub mod ingest;
pub mod state; pub mod state;

View file

@ -22,6 +22,7 @@
*/ */
pub mod detect; pub mod detect;
pub mod search_snippet;
pub mod stemmer; pub mod stemmer;
pub mod stopwords; pub mod stopwords;

View file

@ -21,7 +21,7 @@
* for more details. * for more details.
*/ */
use super::term_index::Term; use super::Language;
fn escape_char(c: char, string: &mut String) { fn escape_char(c: char, string: &mut String) {
match c { match c {
@ -45,9 +45,53 @@ fn escape_char_len(c: char) -> usize {
} }
} }
pub fn generate_snippet(terms: &[Term], text: &str) -> Option<String> { pub struct Term {
offset: usize,
len: usize,
}
pub fn generate_snippet(
text: &str,
needles: &[impl AsRef<str>],
language: Language,
is_exact: bool,
) -> Option<String> {
let mut terms = Vec::new();
if is_exact {
let tokens = language.tokenize_text(text, 200).collect::<Vec<_>>();
for tokens in tokens.windows(needles.len()) {
if needles
.iter()
.zip(tokens)
.all(|(needle, token)| needle.as_ref() == token.word.as_ref())
{
for token in tokens {
terms.push(Term {
offset: token.from,
len: token.to - token.from,
});
}
}
}
} else {
for token in language.tokenize_text(text, 200) {
if needles.iter().any(|needle| {
let needle = needle.as_ref();
needle == token.word.as_ref() || needle.len() > 2 && token.word.contains(needle)
}) {
terms.push(Term {
offset: token.from,
len: token.to - token.from,
});
}
}
}
if terms.is_empty() {
return None;
}
let mut snippet = String::with_capacity(text.len()); let mut snippet = String::with_capacity(text.len());
let start_offset = terms.get(0)?.offset as usize; let start_offset = terms.get(0)?.offset;
if start_offset > 0 { if start_offset > 0 {
let mut word_count = 0; let mut word_count = 0;
@ -92,25 +136,22 @@ pub fn generate_snippet(terms: &[Term], text: &str) -> Option<String> {
let mut terms = terms.iter().peekable(); let mut terms = terms.iter().peekable();
'outer: while let Some(term) = terms.next() { 'outer: while let Some(term) = terms.next() {
if snippet.len() + ("<mark>".len() * 2) + term.len as usize + 1 > 255 { if snippet.len() + ("<mark>".len() * 2) + term.len + 1 > 255 {
break; break;
} }
snippet.push_str("<mark>"); snippet.push_str("<mark>");
snippet.push_str(text.get(term.offset as usize..term.offset as usize + term.len as usize)?); snippet.push_str(text.get(term.offset..term.offset + term.len)?);
snippet.push_str("</mark>"); snippet.push_str("</mark>");
let next_offset = if let Some(next_term) = terms.peek() { let next_offset = if let Some(next_term) = terms.peek() {
next_term.offset as usize next_term.offset
} else { } else {
text.len() text.len()
}; };
let mut last_is_space = false; let mut last_is_space = false;
for char in text for char in text.get(term.offset + term.len..next_offset)?.chars() {
.get(term.offset as usize + term.len as usize..next_offset)?
.chars()
{
if !char.is_whitespace() { if !char.is_whitespace() {
last_is_space = false; last_is_space = false;
} else { } else {
@ -133,15 +174,7 @@ pub fn generate_snippet(terms: &[Term], text: &str) -> Option<String> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::language::{search_snippet::generate_snippet, Language};
use nlp::language::Language;
use crate::{
fts::term_index::{TermIndex, TermIndexBuilder},
Deserialize, Serialize,
};
use super::*;
#[test] #[test]
fn search_snippets() { fn search_snippets() {
@ -236,39 +269,18 @@ mod tests {
]; ];
for (parts, tests) in inputs { for (parts, tests) in inputs {
let mut builder = TermIndexBuilder::new(); for (needles, snippets) in tests {
let mut results = Vec::new();
for (field_num, part) in parts.iter().enumerate() { for part in &parts {
let mut terms = Vec::new(); if let Some(matched) =
for token in Language::English.tokenize_text(part, 40) { generate_snippet(part, &needles, Language::English, false)
terms.push(builder.add_token(token)); {
} results.push(matched);
builder.add_terms(field_num as u8, 0, terms); }
}
let compressed_term_index = builder.serialize();
let term_index = TermIndex::deserialize(&compressed_term_index[..]).unwrap();
for (match_words, snippets) in tests {
let mut match_terms = Vec::new();
for word in &match_words {
match_terms.push(term_index.get_match_term(word, None));
} }
let term_groups = term_index assert_eq!(snippets, results);
.match_terms(&match_terms, None, false, true, true)
.unwrap()
.unwrap();
assert_eq!(term_groups.len(), snippets.len());
for (term_group, snippet) in term_groups.iter().zip(snippets.iter()) {
assert_eq!(
snippet,
&generate_snippet(&term_group.terms, parts[term_group.field_id as usize])
.unwrap()
);
}
} }
} }
} }

View file

@ -141,6 +141,7 @@ pub static STEMMER_MAP: &[Option<Algorithm>] = &[
None, // Tagalog = 67, None, // Tagalog = 67,
None, // Armenian = 68, None, // Armenian = 68,
None, // Unknown = 69, None, // Unknown = 69,
None, // None = 70,
]; ];
#[cfg(test)] #[cfg(test)]

View file

@ -93,6 +93,7 @@ pub static STOP_WORDS: &[Option<&Set<&'static str>>] = &[
None, // Tagalog = 67, None, // Tagalog = 67,
None, // Armenian = 68, None, // Armenian = 68,
None, // Unknown = 69, None, // Unknown = 69,
None, // None = 70,
]; ];
static ARABIC: Set<&'static str> = phf_set! { static ARABIC: Set<&'static str> = phf_set! {

View file

@ -30,6 +30,7 @@ num_cpus = { version = "1.15.0", optional = true }
blake3 = "1.3.3" blake3 = "1.3.3"
tracing = "0.1" tracing = "0.1"
async-trait = "0.1.68" async-trait = "0.1.68"
lz4_flex = { version = "0.11" }
[dev-dependencies] [dev-dependencies]
tokio = { version = "1.23", features = ["full"] } tokio = { version = "1.23", features = ["full"] }

View file

@ -0,0 +1,44 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::ops::Range;
use super::FdbStore;
impl FdbStore {
pub(crate) async fn get_blob(
&self,
key: &[u8],
range: Range<u32>,
) -> crate::Result<Option<Vec<u8>>> {
todo!()
}
pub(crate) async fn put_blob(&self, key: &[u8], data: &[u8]) -> crate::Result<()> {
todo!()
}
pub(crate) async fn delete_blob(&self, key: &[u8]) -> crate::Result<bool> {
todo!()
}
}

View file

@ -28,10 +28,7 @@ use futures::StreamExt;
use rand::Rng; use rand::Rng;
use std::time::Instant; use std::time::Instant;
use crate::{ use crate::{write::now, BitmapKey, IndexKey};
write::{key::KeySerializer, now},
BitmapKey, IndexKey, SUBSPACE_VALUES,
};
use super::{ use super::{
bitmap::{next_available_index, BITS_PER_BLOCK}, bitmap::{next_available_index, BITS_PER_BLOCK},
@ -183,36 +180,4 @@ impl FdbStore {
} }
} }
} }
pub(crate) async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
let start = Instant::now();
let counter = KeySerializer::new(U32_LEN + 2)
.write(SUBSPACE_VALUES)
.write(account_id)
.finalize();
loop {
// Read id
let trx = self.db.create_trx()?;
let id = if let Some(bytes) = trx.get(&counter, false).await? {
u64::deserialize(&bytes)? + 1
} else {
0
};
trx.set(&counter, &id.serialize());
match trx.commit().await {
Ok(_) => {
return Ok(id);
}
Err(err) => {
if start.elapsed() < MAX_COMMIT_TIME {
err.on_error().await?;
} else {
return Err(FdbError::from(err).into());
}
}
}
}
}
} }

View file

@ -26,6 +26,7 @@ use foundationdb::{api::NetworkAutoStop, Database, FdbError};
use crate::Error; use crate::Error;
pub mod bitmap; pub mod bitmap;
pub mod blob;
pub mod id_assign; pub mod id_assign;
pub mod main; pub mod main;
pub mod purge; pub mod purge;

View file

@ -95,7 +95,7 @@ impl FdbStore {
account_id: u32, account_id: u32,
collection: u8, collection: u8,
field: u8, field: u8,
value: Vec<u8>, value: &[u8],
op: query::Operator, op: query::Operator,
) -> crate::Result<Option<RoaringBitmap>> { ) -> crate::Result<Option<RoaringBitmap>> {
let k1 = let k1 =
@ -116,27 +116,23 @@ impl FdbStore {
let (begin, end) = match op { let (begin, end) = match op {
Operator::LowerThan => ( Operator::LowerThan => (
KeySelector::first_greater_or_equal(k1.finalize()), KeySelector::first_greater_or_equal(k1.finalize()),
KeySelector::first_greater_or_equal(k2.write(&value[..]).write(0u32).finalize()), KeySelector::first_greater_or_equal(k2.write(value).write(0u32).finalize()),
), ),
Operator::LowerEqualThan => ( Operator::LowerEqualThan => (
KeySelector::first_greater_or_equal(k1.finalize()), KeySelector::first_greater_or_equal(k1.finalize()),
KeySelector::first_greater_or_equal( KeySelector::first_greater_or_equal(k2.write(value).write(u32::MAX).finalize()),
k2.write(&value[..]).write(u32::MAX).finalize(),
),
), ),
Operator::GreaterThan => ( Operator::GreaterThan => (
KeySelector::first_greater_than(k1.write(&value[..]).write(u32::MAX).finalize()), KeySelector::first_greater_than(k1.write(value).write(u32::MAX).finalize()),
KeySelector::first_greater_or_equal(k2.finalize()), KeySelector::first_greater_or_equal(k2.finalize()),
), ),
Operator::GreaterEqualThan => ( Operator::GreaterEqualThan => (
KeySelector::first_greater_or_equal(k1.write(&value[..]).write(0u32).finalize()), KeySelector::first_greater_or_equal(k1.write(value).write(0u32).finalize()),
KeySelector::first_greater_or_equal(k2.finalize()), KeySelector::first_greater_or_equal(k2.finalize()),
), ),
Operator::Equal => ( Operator::Equal => (
KeySelector::first_greater_or_equal(k1.write(&value[..]).write(0u32).finalize()), KeySelector::first_greater_or_equal(k1.write(value).write(0u32).finalize()),
KeySelector::first_greater_or_equal( KeySelector::first_greater_or_equal(k2.write(value).write(u32::MAX).finalize()),
k2.write(&value[..]).write(u32::MAX).finalize(),
),
), ),
}; };
let key_len = begin.key().len(); let key_len = begin.key().len();

View file

@ -52,9 +52,7 @@ impl FsStore {
))) )))
} }
} }
}
impl FsStore {
pub(crate) async fn get_blob( pub(crate) async fn get_blob(
&self, &self,
key: &[u8], key: &[u8],
@ -113,9 +111,7 @@ impl FsStore {
Ok(false) Ok(false)
} }
} }
}
impl FsStore {
fn build_path(&self, key: &[u8]) -> PathBuf { fn build_path(&self, key: &[u8]) -> PathBuf {
let mut path = self.path.clone(); let mut path = self.path.clone();

View file

@ -30,8 +30,8 @@ pub mod s3;
#[cfg(feature = "sqlite")] #[cfg(feature = "sqlite")]
pub mod sqlite; pub mod sqlite;
pub(crate) const MAX_TOKEN_LENGTH: usize = (u8::MAX >> 2) as usize; pub const MAX_TOKEN_LENGTH: usize = (u8::MAX >> 1) as usize;
pub(crate) const MAX_TOKEN_MASK: usize = MAX_TOKEN_LENGTH - 1; pub const MAX_TOKEN_MASK: usize = MAX_TOKEN_LENGTH - 1;
#[cfg(feature = "test_mode")] #[cfg(feature = "test_mode")]
pub static ID_ASSIGNMENT_EXPIRY: std::sync::atomic::AtomicU64 = pub static ID_ASSIGNMENT_EXPIRY: std::sync::atomic::AtomicU64 =

View file

@ -140,3 +140,8 @@ impl From<rocksdb::Error> for crate::Error {
Self::InternalError(format!("RocksDB error: {}", value)) Self::InternalError(format!("RocksDB error: {}", value))
} }
} }
#[cfg(feature = "rocks")]
pub struct Store {
db: rocksdb::OptimisticTransactionDB<rocksdb::MultiThreaded>,
}

View file

@ -0,0 +1,83 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::ops::Range;
use rusqlite::OptionalExtension;
use super::SqliteStore;
impl SqliteStore {
pub(crate) async fn get_blob(
&self,
key: &[u8],
range: Range<u32>,
) -> crate::Result<Option<Vec<u8>>> {
let conn = self.conn_pool.get()?;
self.spawn_worker(move || {
let mut result = conn.prepare_cached("SELECT v FROM t WHERE k = ?")?;
result
.query_row([&key], |row| {
Ok({
let bytes = row.get_ref(0)?.as_bytes()?;
if range.start == 0 && range.end == u32::MAX {
bytes.to_vec()
} else {
bytes
.get(
range.start as usize
..std::cmp::min(bytes.len(), range.end as usize),
)
.unwrap_or_default()
.to_vec()
}
})
})
.optional()
.map_err(Into::into)
})
.await
}
pub(crate) async fn put_blob(&self, key: &[u8], data: &[u8]) -> crate::Result<()> {
let conn = self.conn_pool.get()?;
self.spawn_worker(move || {
conn.prepare_cached("INSERT OR REPLACE INTO t (k, v) VALUES (?, ?)")?
.execute([key, data])
.map_err(|e| crate::Error::InternalError(format!("Failed to insert blob: {}", e)))
.map(|_| ())
})
.await
}
pub(crate) async fn delete_blob(&self, key: &[u8]) -> crate::Result<bool> {
let conn = self.conn_pool.get()?;
self.spawn_worker(move || {
conn.prepare_cached("DELETE FROM t WHERE k = ?")?
.execute([key])
.map_err(|e| crate::Error::InternalError(format!("Failed to delete blob: {}", e)))
.map(|_| true)
})
.await
}
}

View file

@ -23,7 +23,7 @@
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::{write::key::DeserializeBigEndian, BitmapKey, IterateParams, LogKey, U64_LEN}; use crate::BitmapKey;
use super::SqliteStore; use super::SqliteStore;
@ -46,15 +46,13 @@ impl IdCacheKey {
pub struct IdAssigner { pub struct IdAssigner {
pub freed_document_ids: Option<RoaringBitmap>, pub freed_document_ids: Option<RoaringBitmap>,
pub next_document_id: u32, pub next_document_id: u32,
pub next_change_id: u64,
} }
impl IdAssigner { impl IdAssigner {
pub fn new(used_ids: Option<RoaringBitmap>, next_change_id: u64) -> Self { pub fn new(used_ids: Option<RoaringBitmap>) -> Self {
let mut assigner = IdAssigner { let mut assigner = IdAssigner {
freed_document_ids: None, freed_document_ids: None,
next_document_id: 0, next_document_id: 0,
next_change_id,
}; };
if let Some(used_ids) = used_ids { if let Some(used_ids) = used_ids {
if let Some(max) = used_ids.max() { if let Some(max) = used_ids.max() {
@ -85,28 +83,9 @@ impl IdAssigner {
id id
} }
} }
pub fn assign_change_id(&mut self) -> u64 {
let id = self.next_change_id;
self.next_change_id += 1;
id
}
} }
impl SqliteStore { impl SqliteStore {
pub(crate) async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
let collection = u8::MAX;
let key = IdCacheKey::new(account_id, collection);
for _ in 0..2 {
if let Some(assigner) = self.id_assigner.lock().get_mut(&key) {
return Ok(assigner.assign_change_id());
}
self.build_id_assigner(key).await?;
}
unreachable!()
}
pub(crate) async fn assign_document_id( pub(crate) async fn assign_document_id(
&self, &self,
account_id: u32, account_id: u32,
@ -128,56 +107,16 @@ impl SqliteStore {
let used_ids = self let used_ids = self
.get_bitmap(BitmapKey::document_ids(key.account_id, key.collection)) .get_bitmap(BitmapKey::document_ids(key.account_id, key.collection))
.await?; .await?;
let next_change_id = self
.get_last_change_id(key.account_id, key.collection)
.await?
.map(|id| id + 1)
.unwrap_or(0);
let id_assigner = self.id_assigner.clone(); let id_assigner = self.id_assigner.clone();
let mut id_assigner = id_assigner.lock(); let mut id_assigner = id_assigner.lock();
// Make sure id assigner was not added by another thread // Make sure id assigner was not added by another thread
if id_assigner.get_mut(&key).is_none() { if id_assigner.get_mut(&key).is_none() {
id_assigner.insert(key, IdAssigner::new(used_ids, next_change_id)); id_assigner.insert(key, IdAssigner::new(used_ids));
} }
Ok(()) Ok(())
} }
async fn get_last_change_id(
&self,
account_id: u32,
collection: impl Into<u8> + Sync + Send,
) -> crate::Result<Option<u64>> {
let collection = collection.into();
let from_key = LogKey {
account_id,
collection,
change_id: u64::MAX,
};
let to_key = LogKey {
account_id,
collection,
change_id: 0,
};
let mut last_change_id = None;
self.iterate(
IterateParams::new(from_key, to_key)
.descending()
.no_values()
.only_first(),
|key, _| {
last_change_id = key.deserialize_be_u64(key.len() - U64_LEN)?.into();
Ok(false)
},
)
.await?;
Ok(last_change_id)
}
} }
#[cfg(test)] #[cfg(test)]
@ -188,7 +127,7 @@ mod tests {
#[test] #[test]
fn id_assigner() { fn id_assigner() {
let mut assigner = IdAssigner::new(None, 0); let mut assigner = IdAssigner::new(None);
assert_eq!(assigner.assign_document_id(), 0); assert_eq!(assigner.assign_document_id(), 0);
assert_eq!(assigner.assign_document_id(), 1); assert_eq!(assigner.assign_document_id(), 1);
assert_eq!(assigner.assign_document_id(), 2); assert_eq!(assigner.assign_document_id(), 2);
@ -197,7 +136,6 @@ mod tests {
RoaringBitmap::from_sorted_iter([0, 2, 4, 6]) RoaringBitmap::from_sorted_iter([0, 2, 4, 6])
.unwrap() .unwrap()
.into(), .into(),
0,
); );
assert_eq!(assigner.assign_document_id(), 1); assert_eq!(assigner.assign_document_id(), 1);
assert_eq!(assigner.assign_document_id(), 3); assert_eq!(assigner.assign_document_id(), 3);

View file

@ -30,8 +30,8 @@ use tokio::sync::oneshot;
use utils::{config::Config, UnwrapFailure}; use utils::{config::Config, UnwrapFailure};
use crate::{ use crate::{
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_COUNTERS, SUBSPACE_INDEXES, SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_BLOB_DATA, SUBSPACE_COUNTERS,
SUBSPACE_LOGS, SUBSPACE_VALUES, SUBSPACE_INDEXES, SUBSPACE_LOGS, SUBSPACE_VALUES,
}; };
use super::{pool::SqliteConnectionManager, SqliteStore}; use super::{pool::SqliteConnectionManager, SqliteStore};
@ -78,7 +78,12 @@ impl SqliteStore {
pub(super) fn create_tables(&self) -> crate::Result<()> { pub(super) fn create_tables(&self) -> crate::Result<()> {
let conn = self.conn_pool.get()?; let conn = self.conn_pool.get()?;
for table in [SUBSPACE_VALUES, SUBSPACE_LOGS, SUBSPACE_ACLS] { for table in [
SUBSPACE_VALUES,
SUBSPACE_LOGS,
SUBSPACE_ACLS,
SUBSPACE_BLOB_DATA,
] {
let table = char::from(table); let table = char::from(table);
conn.execute( conn.execute(
&format!( &format!(

View file

@ -34,6 +34,7 @@ use self::{
pool::SqliteConnectionManager, pool::SqliteConnectionManager,
}; };
pub mod blob;
pub mod id_assign; pub mod id_assign;
pub mod main; pub mod main;
pub mod pool; pub mod pool;

View file

@ -32,7 +32,6 @@ impl SqliteStore {
pub(crate) async fn purge_bitmaps(&self) -> crate::Result<()> { pub(crate) async fn purge_bitmaps(&self) -> crate::Result<()> {
let conn = self.conn_pool.get()?; let conn = self.conn_pool.get()?;
self.spawn_worker(move || { self.spawn_worker(move || {
//Todo
conn.prepare_cached(concat!( conn.prepare_cached(concat!(
"DELETE FROM b WHERE ", "DELETE FROM b WHERE ",
"a = 0 AND ", "a = 0 AND ",

View file

@ -110,7 +110,7 @@ impl SqliteStore {
account_id: u32, account_id: u32,
collection: u8, collection: u8,
field: u8, field: u8,
value: Vec<u8>, value: &[u8],
op: query::Operator, op: query::Operator,
) -> crate::Result<Option<RoaringBitmap>> { ) -> crate::Result<Option<RoaringBitmap>> {
let conn = self.conn_pool.get()?; let conn = self.conn_pool.get()?;
@ -132,27 +132,27 @@ impl SqliteStore {
Operator::LowerThan => ( Operator::LowerThan => (
("SELECT k FROM i WHERE k >= ? AND k < ?"), ("SELECT k FROM i WHERE k >= ? AND k < ?"),
(k1.finalize()), (k1.finalize()),
(k2.write(&value[..]).write(0u32).finalize()), (k2.write(value).write(0u32).finalize()),
), ),
Operator::LowerEqualThan => ( Operator::LowerEqualThan => (
("SELECT k FROM i WHERE k >= ? AND k <= ?"), ("SELECT k FROM i WHERE k >= ? AND k <= ?"),
(k1.finalize()), (k1.finalize()),
(k2.write(&value[..]).write(u32::MAX).finalize()), (k2.write(value).write(u32::MAX).finalize()),
), ),
Operator::GreaterThan => ( Operator::GreaterThan => (
("SELECT k FROM i WHERE k > ? AND k <= ?"), ("SELECT k FROM i WHERE k > ? AND k <= ?"),
(k1.write(&value[..]).write(u32::MAX).finalize()), (k1.write(value).write(u32::MAX).finalize()),
(k2.finalize()), (k2.finalize()),
), ),
Operator::GreaterEqualThan => ( Operator::GreaterEqualThan => (
("SELECT k FROM i WHERE k >= ? AND k <= ?"), ("SELECT k FROM i WHERE k >= ? AND k <= ?"),
(k1.write(&value[..]).write(0u32).finalize()), (k1.write(value).write(0u32).finalize()),
(k2.finalize()), (k2.finalize()),
), ),
Operator::Equal => ( Operator::Equal => (
("SELECT k FROM i WHERE k >= ? AND k <= ?"), ("SELECT k FROM i WHERE k >= ? AND k <= ?"),
(k1.write(&value[..]).write(0u32).finalize()), (k1.write(value).write(0u32).finalize()),
(k2.write(&value[..]).write(u32::MAX).finalize()), (k2.write(value).write(u32::MAX).finalize()),
), ),
}; };
@ -314,7 +314,7 @@ impl SqliteStore {
// Values // Values
let mut has_errors = false; let mut has_errors = false;
for table in [crate::SUBSPACE_VALUES, crate::SUBSPACE_ACLS, crate::SUBSPACE_COUNTERS] { for table in [crate::SUBSPACE_VALUES, crate::SUBSPACE_ACLS, crate::SUBSPACE_COUNTERS, crate::SUBSPACE_BLOB_DATA] {
let table = char::from(table); let table = char::from(table);
let mut query = conn.prepare_cached(&format!("SELECT k, v FROM {table}")).unwrap(); let mut query = conn.prepare_cached(&format!("SELECT k, v FROM {table}")).unwrap();
let mut rows = query.query([]).unwrap(); let mut rows = query.query([]).unwrap();
@ -370,7 +370,7 @@ impl SqliteStore {
// Bitmaps // Bitmaps
let mut query = conn let mut query = conn
.prepare_cached("SELECT z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p FROM b") .prepare_cached(&format!("SELECT z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p FROM {}", char::from(crate::SUBSPACE_BITMAPS)))
.unwrap(); .unwrap();
let mut rows = query.query([]).unwrap(); let mut rows = query.query([]).unwrap();

View file

@ -274,8 +274,8 @@ impl SqliteStore {
#[cfg(feature = "test_mode")] #[cfg(feature = "test_mode")]
pub(crate) async fn destroy(&self) { pub(crate) async fn destroy(&self) {
use crate::{ use crate::{
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_COUNTERS, SUBSPACE_INDEXES, SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_BLOB_DATA, SUBSPACE_COUNTERS,
SUBSPACE_LOGS, SUBSPACE_VALUES, SUBSPACE_INDEXES, SUBSPACE_LOGS, SUBSPACE_VALUES,
}; };
let conn = self.conn_pool.get().unwrap(); let conn = self.conn_pool.get().unwrap();
@ -287,6 +287,7 @@ impl SqliteStore {
SUBSPACE_BLOBS, SUBSPACE_BLOBS,
SUBSPACE_ACLS, SUBSPACE_ACLS,
SUBSPACE_COUNTERS, SUBSPACE_COUNTERS,
SUBSPACE_BLOB_DATA,
] { ] {
conn.execute(&format!("DROP TABLE {}", char::from(table)), []) conn.execute(&format!("DROP TABLE {}", char::from(table)), [])
.unwrap(); .unwrap();

View file

@ -21,23 +21,27 @@
* for more details. * for more details.
*/ */
use std::ops::{BitAndAssign, Range}; use std::{
fmt::Display,
ops::{BitAndAssign, Range},
};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::{ use crate::{
fts::{index::FtsDocument, FtsFilter},
query, query,
write::{Batch, BitmapClass, ValueClass}, write::{Batch, BitmapClass, ValueClass},
BitmapKey, BlobStore, Deserialize, IterateParams, Key, Store, ValueKey, BitmapKey, BlobStore, Deserialize, FtsStore, IterateParams, Key, Store, ValueKey,
}; };
impl Store { impl Store {
pub async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> { /*pub async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
match self { match self {
Self::SQLite(store) => store.assign_change_id(account_id).await, Self::SQLite(store) => store.assign_change_id(account_id).await,
Self::FoundationDb(store) => store.assign_change_id(account_id).await, Self::FoundationDb(store) => store.assign_change_id(account_id).await,
} }
} }*/
pub async fn assign_document_id( pub async fn assign_document_id(
&self, &self,
@ -110,7 +114,7 @@ impl Store {
account_id: u32, account_id: u32,
collection: u8, collection: u8,
field: u8, field: u8,
value: Vec<u8>, value: &[u8],
op: query::Operator, op: query::Operator,
) -> crate::Result<Option<RoaringBitmap>> { ) -> crate::Result<Option<RoaringBitmap>> {
match self { match self {
@ -149,7 +153,7 @@ impl Store {
} }
} }
pub(crate) async fn iterate<T: Key>( pub async fn iterate<T: Key>(
&self, &self,
params: IterateParams<T>, params: IterateParams<T>,
cb: impl for<'x> FnMut(&'x [u8], &'x [u8]) -> crate::Result<bool> + Sync + Send, cb: impl for<'x> FnMut(&'x [u8], &'x [u8]) -> crate::Result<bool> + Sync + Send,
@ -190,6 +194,27 @@ impl Store {
} }
} }
pub async fn get_blob(&self, key: &[u8], range: Range<u32>) -> crate::Result<Option<Vec<u8>>> {
match self {
Self::SQLite(store) => store.get_blob(key, range).await,
Self::FoundationDb(store) => store.get_blob(key, range).await,
}
}
pub async fn put_blob(&self, key: &[u8], data: &[u8]) -> crate::Result<()> {
match self {
Self::SQLite(store) => store.put_blob(key, data).await,
Self::FoundationDb(store) => store.put_blob(key, data).await,
}
}
pub async fn delete_blob(&self, key: &[u8]) -> crate::Result<bool> {
match self {
Self::SQLite(store) => store.delete_blob(key).await,
Self::FoundationDb(store) => store.delete_blob(key).await,
}
}
#[cfg(feature = "test_mode")] #[cfg(feature = "test_mode")]
pub async fn destroy(&self) { pub async fn destroy(&self) {
match self { match self {
@ -269,6 +294,8 @@ impl BlobStore {
match self { match self {
Self::Fs(store) => store.get_blob(key, range).await, Self::Fs(store) => store.get_blob(key, range).await,
Self::S3(store) => store.get_blob(key, range).await, Self::S3(store) => store.get_blob(key, range).await,
Self::Sqlite(store) => store.get_blob(key, range).await,
Self::FoundationDb(store) => store.get_blob(key, range).await,
} }
} }
@ -276,6 +303,8 @@ impl BlobStore {
match self { match self {
Self::Fs(store) => store.put_blob(key, data).await, Self::Fs(store) => store.put_blob(key, data).await,
Self::S3(store) => store.put_blob(key, data).await, Self::S3(store) => store.put_blob(key, data).await,
Self::Sqlite(store) => store.put_blob(key, data).await,
Self::FoundationDb(store) => store.put_blob(key, data).await,
} }
} }
@ -283,6 +312,47 @@ impl BlobStore {
match self { match self {
Self::Fs(store) => store.delete_blob(key).await, Self::Fs(store) => store.delete_blob(key).await,
Self::S3(store) => store.delete_blob(key).await, Self::S3(store) => store.delete_blob(key).await,
Self::Sqlite(store) => store.delete_blob(key).await,
Self::FoundationDb(store) => store.delete_blob(key).await,
}
}
}
impl FtsStore {
pub async fn index<T: Into<u8> + Display + Clone + std::fmt::Debug>(
&self,
document: FtsDocument<'_, T>,
) -> crate::Result<()> {
match self {
FtsStore::Store(store) => store.fts_index(document).await,
}
}
pub async fn query<T: Into<u8> + Display + Clone + std::fmt::Debug>(
&self,
account_id: u32,
collection: impl Into<u8>,
filters: Vec<FtsFilter<T>>,
) -> crate::Result<RoaringBitmap> {
match self {
FtsStore::Store(store) => store.fts_query(account_id, collection, filters).await,
}
}
pub async fn remove(
&self,
account_id: u32,
collection: u8,
document_id: u32,
) -> crate::Result<bool> {
match self {
FtsStore::Store(store) => store.fts_remove(account_id, collection, document_id).await,
}
}
pub async fn remove_all(&self, account_id: u32) -> crate::Result<()> {
match self {
FtsStore::Store(store) => store.fts_remove_all(account_id).await,
} }
} }
} }

View file

@ -1,257 +0,0 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::{
borrow::Cow,
f64::consts::LN_2,
hash::{Hash, Hasher},
};
use nlp::{language::stemmer::StemmedToken, tokenizers::Token};
use roaring::RoaringBitmap;
use utils::codec::leb128::{Leb128Reader, Leb128Vec};
use crate::{Deserialize, Error, Serialize};
pub struct BloomFilter {
m: u64,
b: RoaringBitmap,
}
#[derive(Debug)]
pub struct BloomHash {
pub h: [u64; 7],
}
#[derive(Debug)]
pub struct BloomHashGroup {
pub h1: BloomHash,
pub h2: Option<BloomHash>,
}
const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
0xaf1f2242106c64b3,
0x60ca4cfb4b3ed0ce,
0xc7dbc0bb615e82b3,
0x520ad065378daf88,
);
lazy_static::lazy_static! {
static ref SIPHASHER: siphasher::sip::SipHasher13 =
siphasher::sip::SipHasher13::new_with_keys(0x56205cbdba8f02a6, 0xbd0dbc4bb06d687b);
}
const P: f64 = 0.01;
impl BloomFilter {
pub fn new(items: usize) -> Self {
Self {
m: if items > 0 {
std::cmp::max(Self::estimate_m(items, P), 10240)
} else {
0
},
b: RoaringBitmap::new(),
}
}
fn from_params(m: u64, b: RoaringBitmap) -> Self {
Self { m, b }
}
fn estimate_m(n: usize, p: f64) -> u64 {
(((n as f64) * f64::ln(p) / (-8.0 * LN_2.powi(2))).ceil() as u64) * 8
}
#[allow(dead_code)]
fn estimate_k(m: u64, n: usize) -> u32 {
std::cmp::max(((m as f64) / (n as f64) * f64::ln(2.0f64)).ceil() as u32, 1)
}
pub fn insert(&mut self, hash: &BloomHash) {
self.b.insert((hash.h[0] % self.m) as u32);
self.b.insert((hash.h[1] % self.m) as u32);
self.b.insert((hash.h[2] % self.m) as u32);
self.b.insert((hash.h[3] % self.m) as u32);
self.b.insert((hash.h[4] % self.m) as u32);
self.b.insert((hash.h[5] % self.m) as u32);
self.b.insert((hash.h[6] % self.m) as u32);
}
pub fn contains(&self, hash: &BloomHash) -> bool {
self.b.contains((hash.h[0] % self.m) as u32)
&& self.b.contains((hash.h[1] % self.m) as u32)
&& self.b.contains((hash.h[2] % self.m) as u32)
&& self.b.contains((hash.h[3] % self.m) as u32)
&& self.b.contains((hash.h[4] % self.m) as u32)
&& self.b.contains((hash.h[5] % self.m) as u32)
&& self.b.contains((hash.h[6] % self.m) as u32)
}
pub fn is_subset(&self, other: &Self) -> bool {
self.b.is_subset(&other.b)
}
pub fn is_empty(&self) -> bool {
self.m == 0 || self.b.is_empty()
}
}
pub trait BloomHasher {
fn hash<T: Hash + AsRef<[u8]> + ?Sized>(item: &T) -> Self;
}
impl BloomHash {
pub fn hash<T: Hash + AsRef<[u8]> + ?Sized>(item: &T) -> Self {
let h1 = xxhash_rust::xxh3::xxh3_64(item.as_ref());
let h2 = farmhash::hash64(item.as_ref());
let h3 = AHASHER.hash_one(item);
let mut sh = *SIPHASHER;
sh.write(item.as_ref());
let h4 = sh.finish();
Self {
h: [h1, h2, h3, h4, h1 ^ h2, h2 ^ h3, h3 ^ h4],
}
}
}
pub fn hash_token(item: &str) -> Vec<u8> {
let h1 = xxhash_rust::xxh3::xxh3_64(item.as_ref()).to_le_bytes();
let h2 = farmhash::hash64(item.as_ref()).to_le_bytes();
let h3 = AHASHER.hash_one(item).to_le_bytes();
let mut sh = *SIPHASHER;
sh.write(item.as_ref());
let h4 = sh.finish().to_le_bytes();
match item.len() {
0..=8 => {
let mut hash = Vec::with_capacity(6);
hash.extend_from_slice(&h1[..2]);
hash.extend_from_slice(&h2[..2]);
hash.push(h3[0]);
hash.push(h4[0]);
hash
}
9..=16 => {
let mut hash = Vec::with_capacity(8);
hash.extend_from_slice(&h1[..2]);
hash.extend_from_slice(&h2[..2]);
hash.extend_from_slice(&h3[..2]);
hash.extend_from_slice(&h4[..2]);
hash
}
17..=32 => {
let mut hash = Vec::with_capacity(12);
hash.extend_from_slice(&h1[..3]);
hash.extend_from_slice(&h2[..3]);
hash.extend_from_slice(&h3[..3]);
hash.extend_from_slice(&h4[..3]);
hash
}
_ => {
let mut hash = Vec::with_capacity(16);
hash.extend_from_slice(&h1[..4]);
hash.extend_from_slice(&h2[..4]);
hash.extend_from_slice(&h3[..4]);
hash.extend_from_slice(&h4[..4]);
hash
}
}
}
impl From<&str> for BloomHash {
fn from(s: &str) -> Self {
Self::hash(&s)
}
}
impl From<String> for BloomHash {
fn from(s: String) -> Self {
Self::hash(&s)
}
}
impl From<&String> for BloomHash {
fn from(s: &String) -> Self {
Self::hash(&s)
}
}
impl From<Cow<'_, str>> for BloomHash {
fn from(s: Cow<'_, str>) -> Self {
Self::hash(s.as_ref())
}
}
impl From<Token<Cow<'_, str>>> for BloomHashGroup {
fn from(t: Token<Cow<'_, str>>) -> Self {
Self {
h1: BloomHash::hash(t.word.as_ref()),
h2: None,
}
}
}
impl From<StemmedToken<'_>> for BloomHashGroup {
fn from(t: StemmedToken<'_>) -> Self {
Self {
h1: BloomHash::hash(t.word.as_ref()),
h2: t.stemmed_word.map(|w| BloomHash::hash(&format!("{w}_"))),
}
}
}
impl From<Cow<'_, str>> for BloomHashGroup {
fn from(t: Cow<'_, str>) -> Self {
Self {
h1: BloomHash::hash(t.as_ref()),
h2: None,
}
}
}
impl Serialize for BloomFilter {
fn serialize(self) -> Vec<u8> {
let mut buf = Vec::with_capacity(U64_LEN + self.b.serialized_size());
buf.push_leb128(self.m);
let _ = self.b.serialize_into(&mut buf);
buf
}
}
impl Deserialize for BloomFilter {
fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
let (m, pos) = bytes.read_leb128().ok_or_else(|| {
Error::InternalError(
"Failed to read 'm' value while deserializing bloom filter.".to_string(),
)
})?;
RoaringBitmap::deserialize_unchecked_from(bytes.get(pos..).ok_or_else(|| {
Error::InternalError(
"Failed to read bitmap while deserializing bloom filter.".to_string(),
)
})?)
.map_err(|err| Error::InternalError(format!("Failed to deserialize bloom filter: {err}.")))
.map(|b| Self::from_params(m, b))
}
}

View file

@ -1,250 +0,0 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::{borrow::Cow, collections::HashSet, fmt::Display};
use ahash::AHashSet;
use nlp::{
language::{
detect::{LanguageDetector, MIN_LANGUAGE_SCORE},
stemmer::Stemmer,
Language,
},
tokenizers::{space::SpaceTokenizer, Token},
};
use utils::map::vec_map::VecMap;
use crate::{
query::RawValue,
write::{BatchBuilder, IntoOperations, Operation, ValueClass},
Serialize, HASH_EXACT, HASH_STEMMED,
};
use super::term_index::{TermIndexBuilder, TokenIndex};
pub const MAX_TOKEN_LENGTH: usize = (u8::MAX >> 2) as usize;
pub const MAX_TOKEN_MASK: usize = MAX_TOKEN_LENGTH - 1;
struct Text<'x, T: Into<u8> + Display> {
field: T,
text: Cow<'x, str>,
language: Type,
}
enum Type {
Stem(Language),
Tokenize,
Static,
}
pub struct FtsIndexBuilder<'x, T: Into<u8> + Display> {
parts: Vec<Text<'x, T>>,
default_language: Language,
}
impl<'x, T: Into<u8> + Display> FtsIndexBuilder<'x, T> {
pub fn with_default_language(default_language: Language) -> FtsIndexBuilder<'x, T> {
FtsIndexBuilder {
parts: vec![],
default_language,
}
}
pub fn index(&mut self, field: T, text: impl Into<Cow<'x, str>>, language: Language) {
self.parts.push(Text {
field,
text: text.into(),
language: Type::Stem(language),
});
}
pub fn index_raw(&mut self, field: T, text: impl Into<Cow<'x, str>>) {
self.parts.push(Text {
field,
text: text.into(),
language: Type::Tokenize,
});
}
pub fn index_raw_token(&mut self, field: T, text: impl Into<Cow<'x, str>>) {
self.parts.push(Text {
field,
text: text.into(),
language: Type::Static,
});
}
}
impl<'x, T: Into<u8> + Display> IntoOperations for FtsIndexBuilder<'x, T> {
fn build(self, batch: &mut BatchBuilder) {
let mut detect = LanguageDetector::new();
let mut tokens: VecMap<u8, AHashSet<String>> = VecMap::new();
let mut parts = Vec::new();
for text in self.parts {
match text.language {
Type::Stem(language) => {
let language = if language == Language::Unknown {
detect.detect(&text.text, MIN_LANGUAGE_SCORE)
} else {
language
};
parts.push((text.field, language, text.text));
}
Type::Tokenize => {
let tokens = tokens.get_mut_or_insert(text.field.into());
for token in SpaceTokenizer::new(text.text.as_ref(), MAX_TOKEN_LENGTH) {
tokens.insert(token);
}
}
Type::Static => {
tokens
.get_mut_or_insert(text.field.into())
.insert(text.text.into_owned());
}
}
}
let default_language = detect
.most_frequent_language()
.unwrap_or(self.default_language);
let mut term_index = TermIndexBuilder::new();
let mut ops = AHashSet::new();
for (part_id, (field, language, text)) in parts.into_iter().enumerate() {
let language = if language != Language::Unknown {
language
} else {
default_language
};
let mut terms = Vec::new();
let field: u8 = field.into();
for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH).collect::<Vec<_>>() {
ops.insert(Operation::hash(&token.word, HASH_EXACT, field, true));
if let Some(stemmed_word) = &token.stemmed_word {
ops.insert(Operation::hash(stemmed_word, HASH_STEMMED, field, true));
}
terms.push(term_index.add_stemmed_token(token));
}
if !terms.is_empty() {
term_index.add_terms(field, part_id as u32, terms);
}
}
for (field, tokens) in tokens {
let mut terms = Vec::with_capacity(tokens.len());
for token in tokens {
ops.insert(Operation::hash(&token, HASH_EXACT, field, true));
terms.push(term_index.add_token(Token {
word: token.into(),
from: 0,
to: 0,
}));
}
term_index.add_terms(field, 0, terms);
}
for op in ops {
batch.ops.push(op);
}
batch.ops.push(Operation::Value {
class: ValueClass::Property {
field: u8::MAX,
family: u8::MAX,
},
set: term_index.serialize().into(),
});
}
}
impl TokenIndex {
fn build_index(self, batch: &mut BatchBuilder, set: bool) {
let mut ops = AHashSet::with_capacity(self.tokens.len() * 2);
for term in self.terms {
for (term_ids, is_exact) in [(term.exact_terms, true), (term.stemmed_terms, false)] {
for term_id in term_ids {
if let Some(word) = self.tokens.get(term_id as usize) {
ops.insert(Operation::hash(
word,
if is_exact { HASH_EXACT } else { HASH_STEMMED },
term.field_id,
set,
));
}
}
}
}
for op in ops {
batch.ops.push(op);
}
}
}
impl IntoOperations for TokenIndex {
fn build(self, batch: &mut BatchBuilder) {
self.build_index(batch, false);
batch.ops.push(Operation::Value {
class: ValueClass::Property {
field: u8::MAX,
family: u8::MAX,
},
set: None,
});
}
}
impl IntoOperations for RawValue<TokenIndex> {
fn build(self, batch: &mut BatchBuilder) {
self.inner.build_index(batch, true);
batch.ops.push(Operation::Value {
class: ValueClass::Property {
field: u8::MAX,
family: u8::MAX,
},
set: self.raw.into(),
});
}
}
pub trait ToTokens {
fn to_tokens(&self) -> HashSet<String>;
}
impl ToTokens for &str {
fn to_tokens(&self) -> HashSet<String> {
let mut tokens = HashSet::new();
for token in SpaceTokenizer::new(self, MAX_TOKEN_LENGTH) {
tokens.insert(token);
}
tokens
}
}
impl ToTokens for &String {
fn to_tokens(&self) -> HashSet<String> {
self.as_str().to_tokens()
}
}

View file

@ -0,0 +1,372 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::{borrow::Cow, fmt::Display};
use ahash::{AHashMap, AHashSet};
use nlp::{
language::{
detect::{LanguageDetector, MIN_LANGUAGE_SCORE},
stemmer::Stemmer,
Language,
},
tokenizers::word::WordTokenizer,
};
use crate::{
backend::MAX_TOKEN_LENGTH,
write::{
hash::TokenType, key::KeySerializer, BatchBuilder, BitmapClass, BitmapHash, Operation,
ValueClass,
},
Deserialize, Error, Store, ValueKey, U64_LEN,
};
use super::Field;
#[derive(Debug)]
struct Text<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> {
field: Field<T>,
text: Cow<'x, str>,
typ: Type,
}
#[derive(Debug)]
enum Type {
Text(Language),
Tokenize,
Keyword,
}
#[derive(Debug)]
pub struct FtsDocument<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> {
parts: Vec<Text<'x, T>>,
default_language: Language,
account_id: u32,
collection: u8,
document_id: u32,
}
impl<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> FtsDocument<'x, T> {
pub fn with_default_language(default_language: Language) -> FtsDocument<'x, T> {
FtsDocument {
parts: vec![],
default_language,
account_id: 0,
document_id: 0,
collection: 0,
}
}
pub fn with_account_id(mut self, account_id: u32) -> Self {
self.account_id = account_id;
self
}
pub fn with_document_id(mut self, document_id: u32) -> Self {
self.document_id = document_id;
self
}
pub fn with_collection(mut self, collection: impl Into<u8>) -> Self {
self.collection = collection.into();
self
}
pub fn index(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>, language: Language) {
self.parts.push(Text {
field,
text: text.into(),
typ: Type::Text(language),
});
}
pub fn index_tokenized(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>) {
self.parts.push(Text {
field,
text: text.into(),
typ: Type::Tokenize,
});
}
pub fn index_keyword(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>) {
self.parts.push(Text {
field,
text: text.into(),
typ: Type::Keyword,
});
}
}
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<Field<T>> for u8 {
fn from(value: Field<T>) -> Self {
match value {
Field::Body => 0,
Field::Attachment => 1,
Field::Keyword => 2,
Field::Header(value) => 3 + value.into(),
}
}
}
impl Store {
pub async fn fts_index<T: Into<u8> + Display + Clone + std::fmt::Debug>(
&self,
document: FtsDocument<'_, T>,
) -> crate::Result<()> {
let mut detect = LanguageDetector::new();
let mut tokens: AHashMap<BitmapHash, AHashSet<u8>> = AHashMap::new();
let mut parts = Vec::new();
for text in document.parts {
match text.typ {
Type::Text(language) => {
let language = if language == Language::Unknown {
detect.detect(&text.text, MIN_LANGUAGE_SCORE)
} else {
language
};
parts.push((text.field, language, text.text));
}
Type::Tokenize => {
let field = u8::from(text.field);
for token in WordTokenizer::new(text.text.as_ref(), MAX_TOKEN_LENGTH) {
tokens
.entry(BitmapHash::new(token.word.as_ref()))
.or_default()
.insert(TokenType::word(field));
}
}
Type::Keyword => {
let field = u8::from(text.field);
tokens
.entry(BitmapHash::new(text.text.as_ref()))
.or_default()
.insert(TokenType::word(field));
}
}
}
let default_language = detect
.most_frequent_language()
.unwrap_or(document.default_language);
for (field, language, text) in parts.into_iter() {
let language = if language != Language::Unknown {
language
} else {
default_language
};
let field: u8 = field.into();
let mut last_token = Cow::Borrowed("");
for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH) {
if !last_token.is_empty() {
tokens
.entry(BitmapHash::new(&format!("{} {}", last_token, token.word)))
.or_default()
.insert(TokenType::bigram(field));
}
tokens
.entry(BitmapHash::new(token.word.as_ref()))
.or_default()
.insert(TokenType::word(field));
if let Some(stemmed_word) = token.stemmed_word {
tokens
.entry(BitmapHash::new(stemmed_word.as_ref()))
.or_default()
.insert(TokenType::stemmed(field));
}
last_token = token.word;
}
}
if tokens.is_empty() {
return Ok(());
}
// Serialize tokens
let mut serializer = KeySerializer::new(tokens.len() * U64_LEN * 2);
let mut keys = Vec::with_capacity(tokens.len());
for (hash, fields) in tokens.into_iter() {
serializer = serializer
.write(hash.hash.as_slice())
.write(hash.len)
.write(fields.len() as u8);
for field in fields.into_iter() {
serializer = serializer.write(field);
keys.push(Operation::Bitmap {
class: BitmapClass::Text { field, token: hash },
set: true,
});
}
}
// Write term index
let mut batch = BatchBuilder::new();
batch
.with_account_id(document.account_id)
.with_collection(document.collection)
.update_document(document.document_id)
.set(
ValueClass::TermIndex,
lz4_flex::compress_prepend_size(&serializer.finalize()),
);
self.write(batch.build()).await?;
let mut batch = BatchBuilder::new();
batch
.with_account_id(document.account_id)
.with_collection(document.collection)
.update_document(document.document_id);
for (pos, key) in keys.into_iter().enumerate() {
if pos > 0 && pos & 1023 == 0 {
self.write(batch.build()).await?;
batch = BatchBuilder::new();
batch
.with_account_id(document.account_id)
.with_collection(document.collection)
.update_document(document.document_id);
}
batch.ops.push(key);
}
if !batch.is_empty() {
self.write(batch.build()).await?;
}
Ok(())
}
pub async fn fts_remove(
&self,
account_id: u32,
collection: u8,
document_id: u32,
) -> crate::Result<bool> {
// Obtain term index
let term_index = if let Some(term_index) = self
.get_value::<TermIndex>(ValueKey {
account_id,
collection,
document_id,
class: ValueClass::TermIndex,
})
.await?
{
term_index
} else {
return Ok(false);
};
// Remove keys
let mut batch = BatchBuilder::new();
batch
.with_account_id(account_id)
.with_collection(collection)
.update_document(document_id);
for (pos, key) in term_index.ops.into_iter().enumerate() {
if pos > 0 && pos & 1023 == 0 {
self.write(batch.build()).await?;
batch = BatchBuilder::new();
batch
.with_account_id(account_id)
.with_collection(collection)
.update_document(document_id);
}
batch.ops.push(key);
}
if !batch.is_empty() {
self.write(batch.build()).await?;
}
// Remove term index
let mut batch = BatchBuilder::new();
batch
.with_account_id(account_id)
.with_collection(collection)
.update_document(document_id)
.clear(ValueClass::TermIndex);
self.write(batch.build()).await?;
Ok(true)
}
pub async fn fts_remove_all(&self, _: u32) -> crate::Result<()> {
// No-op
// Term indexes are stored in the same key range as the document
Ok(())
}
}
struct TermIndex {
ops: Vec<Operation>,
}
impl Deserialize for TermIndex {
fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
let bytes = lz4_flex::decompress_size_prepended(bytes)
.map_err(|_| Error::InternalError("Failed to decompress term index".to_string()))?;
let mut ops = Vec::new();
let mut bytes = bytes.iter().peekable();
while bytes.peek().is_some() {
let mut hash = BitmapHash {
hash: [0; 8],
len: 0,
};
for byte in hash.hash.iter_mut() {
*byte = *bytes.next().ok_or(Error::InternalError(
"Unexpected EOF reading term index".to_string(),
))?;
}
hash.len = *bytes.next().ok_or(Error::InternalError(
"Unexpected EOF reading term index".to_string(),
))?;
let num_fields = *bytes.next().ok_or(Error::InternalError(
"Unexpected EOF reading term index".to_string(),
))?;
for _ in 0..num_fields {
let field = *bytes.next().ok_or(Error::InternalError(
"Unexpected EOF reading term index".to_string(),
))?;
ops.push(Operation::Bitmap {
class: BitmapClass::Text { field, token: hash },
set: false,
});
}
}
Ok(Self { ops })
}
}

View file

@ -21,55 +21,188 @@
* for more details. * for more details.
*/ */
use crate::{ use std::fmt::Display;
write::{BitmapFamily, Operation},
BitmapKey, Serialize, BM_HASH,
};
use self::{bloom::hash_token, builder::MAX_TOKEN_MASK}; use nlp::language::Language;
pub mod bloom; pub mod index;
pub mod builder;
pub mod query; pub mod query;
pub mod search_snippet;
pub mod term_index;
impl BitmapKey<Vec<u8>> { #[derive(Clone, Debug)]
pub fn hash(word: &str, account_id: u32, collection: u8, family: u8, field: u8) -> Self { pub enum Field<T: Into<u8> + Display + Clone + std::fmt::Debug> {
BitmapKey { Header(T),
account_id, Body,
collection, Attachment,
family: BM_HASH | family | (word.len() & MAX_TOKEN_MASK) as u8, Keyword,
field, }
block_num: 0,
key: hash_token(word),
}
}
pub fn value( #[derive(Debug)]
account_id: u32, pub enum FtsFilter<T: Into<u8> + Display + Clone + std::fmt::Debug> {
collection: impl Into<u8>, Exact {
field: impl Into<u8>, field: Field<T>,
value: impl BitmapFamily + Serialize, text: String,
language: Language,
},
Contains {
field: Field<T>,
text: String,
language: Language,
},
Keyword {
field: Field<T>,
text: String,
},
And,
Or,
Not,
End,
}
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> FtsFilter<T> {
pub fn has_text_detect(
field: Field<T>,
text: impl Into<String>,
default_language: Language,
) -> Self { ) -> Self {
BitmapKey { let (text, language) = Language::detect(text.into(), default_language);
account_id, Self::has_text(field, text, language)
collection: collection.into(), }
family: value.family(),
field: field.into(), pub fn has_text(field: Field<T>, text: impl Into<String>, language: Language) -> Self {
block_num: 0, let text = text.into();
key: value.serialize(), if !matches!(language, Language::None) && (text.starts_with('"') && text.ends_with('"'))
|| (text.starts_with('\'') && text.ends_with('\''))
{
FtsFilter::Exact {
field,
text,
language,
}
} else {
FtsFilter::Contains {
field,
text,
language,
}
} }
} }
pub fn has_keyword(field: Field<T>, text: impl Into<String>) -> Self {
FtsFilter::Keyword {
field,
text: text.into(),
}
}
pub fn has_english_text(field: Field<T>, text: impl Into<String>) -> Self {
Self::has_text(field, text, Language::English)
}
} }
impl Operation { #[derive(Clone, Copy)]
pub fn hash(word: &str, family: u8, field: u8, set: bool) -> Self { pub enum FilterType {
Operation::Bitmap { And,
family: BM_HASH | family | (word.len() & MAX_TOKEN_MASK) as u8, Or,
field, Not,
key: hash_token(word), End,
set, Store,
Fts,
}
pub enum FilterGroup<T: FilterItem> {
Fts(Vec<T>),
Store(T),
}
pub trait FilterItem: Clone {
fn filter_type(&self) -> FilterType;
}
pub trait IntoFilterGroup<T: FilterItem + From<FilterType>> {
fn into_filter_group(self) -> Vec<FilterGroup<T>>;
}
impl<T: FilterItem + From<FilterType>> IntoFilterGroup<T> for Vec<T> {
fn into_filter_group(self) -> Vec<FilterGroup<T>> {
let mut filter = Vec::with_capacity(self.len());
let mut iter = self.into_iter();
let mut logical_op = None;
while let Some(item) = iter.next() {
if matches!(item.filter_type(), FilterType::Fts) {
let mut store_item = None;
let mut depth = 0;
let mut fts = Vec::with_capacity(5);
// Add the logical operator if there is one
let in_logical_op = if let Some(op) = logical_op.take() {
fts.push(op);
true
} else {
false
};
fts.push(item);
for item in iter.by_ref() {
match item.filter_type() {
FilterType::And | FilterType::Or | FilterType::Not => {
depth += 1;
fts.push(item);
}
FilterType::End if depth > 0 => {
depth -= 1;
fts.push(item);
}
FilterType::Fts => {
fts.push(item);
}
_ => {
store_item = Some(item);
break;
}
}
}
if in_logical_op {
fts.push(T::from(FilterType::End));
}
if depth > 0 {
let mut store = Vec::with_capacity(depth * 2);
while depth > 0 {
let item = fts.pop().unwrap();
if matches!(
item.filter_type(),
FilterType::And | FilterType::Or | FilterType::Not
) {
depth -= 1;
}
store.push(FilterGroup::Store(item));
}
filter.push(FilterGroup::Fts(fts));
filter.extend(store);
} else {
filter.push(FilterGroup::Fts(fts));
}
if let Some(item) = store_item {
filter.push(FilterGroup::Store(item));
}
} else {
match item.filter_type() {
FilterType::And | FilterType::Or => {
logical_op = Some(item.clone());
}
FilterType::Not => {
logical_op = Some(T::from(FilterType::And));
}
_ => {}
}
filter.push(FilterGroup::Store(item));
}
} }
filter
} }
} }

View file

@ -21,138 +21,210 @@
* for more details. * for more details.
*/ */
use std::ops::BitOrAssign; use std::{
fmt::Display,
ops::{BitAndAssign, BitOrAssign, BitXorAssign},
};
use nlp::language::{stemmer::Stemmer, Language}; use nlp::language::stemmer::Stemmer;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::{fts::builder::MAX_TOKEN_LENGTH, BitmapKey, ValueKey, HASH_EXACT, HASH_STEMMED}; use crate::{backend::MAX_TOKEN_LENGTH, fts::FtsFilter, write::BitmapClass, BitmapKey, Store};
use super::term_index::TermIndex; struct State<T: Into<u8> + Display + Clone + std::fmt::Debug> {
pub op: FtsFilter<T>,
pub bm: Option<RoaringBitmap>,
}
#[async_trait::async_trait] impl Store {
pub trait StoreFts: StoreRead { pub async fn fts_query<T: Into<u8> + Display + Clone + std::fmt::Debug>(
async fn fts_query( &self,
&mut self,
account_id: u32, account_id: u32,
collection: u8, collection: impl Into<u8>,
field: u8, filters: Vec<FtsFilter<T>>,
text: &str, ) -> crate::Result<RoaringBitmap> {
language: Language, let collection = collection.into();
match_phrase: bool, let mut not_mask = RoaringBitmap::new();
) -> crate::Result<Option<RoaringBitmap>> { let mut not_fetch = false;
if match_phrase {
let mut phrase = Vec::new(); let mut state: State<T> = FtsFilter::And.into();
let mut bit_keys = Vec::new(); let mut stack = Vec::new();
for token in language.tokenize_text(text, MAX_TOKEN_LENGTH) { let mut filters = filters.into_iter().peekable();
let key = BitmapKey::hash(
token.word.as_ref(), while let Some(filter) = filters.next() {
account_id, let mut result = match filter {
collection, FtsFilter::Exact {
HASH_EXACT,
field, field,
); text,
if !bit_keys.contains(&key) { language,
bit_keys.push(key); } => {
let field: u8 = field.clone().into();
let tokens = language
.tokenize_text(text.as_ref(), MAX_TOKEN_LENGTH)
.map(|t| t.word)
.collect::<Vec<_>>();
let keys = if tokens.len() > 1 {
tokens
.windows(2)
.map(|bg| BitmapKey {
account_id,
collection,
class: BitmapClass::bigram(format!("{} {}", bg[0], bg[1]), field),
block_num: 0,
})
.collect::<Vec<_>>()
} else {
tokens
.into_iter()
.map(|word| BitmapKey {
account_id,
collection,
class: BitmapClass::word(word.as_ref(), field),
block_num: 0,
})
.collect::<Vec<_>>()
};
self.get_bitmaps_intersection(keys).await?
} }
FtsFilter::Contains {
field,
text,
language,
} => {
let mut result = RoaringBitmap::new();
let field: u8 = field.clone().into();
phrase.push(token.word); for token in Stemmer::new(text.as_ref(), language, MAX_TOKEN_LENGTH) {
} let token1 = BitmapKey {
let bitmaps = match self.get_bitmaps_intersection(bit_keys).await? { account_id,
Some(b) if !b.is_empty() => b, collection,
_ => return Ok(None), class: BitmapClass::word(token.word.as_ref(), field),
}; block_num: 0,
};
let token2 = BitmapKey {
account_id,
collection,
class: BitmapClass::stemmed(
if let Some(stemmed_word) = token.stemmed_word {
stemmed_word
} else {
token.word
}
.as_ref(),
field,
),
block_num: 0,
};
match phrase.len() { match self.get_bitmaps_union(vec![token1, token2]).await? {
0 => return Ok(None), Some(b) if !b.is_empty() => {
1 => return Ok(Some(bitmaps)), if !result.is_empty() {
_ => (), result &= b;
} if result.is_empty() {
break;
let mut results = RoaringBitmap::new(); }
for document_id in bitmaps { } else {
if let Some(term_index) = self result = b;
.get_value::<TermIndex>(ValueKey::term_index( }
account_id,
collection,
document_id,
))
.await?
{
if term_index
.match_terms(
&phrase
.iter()
.map(|w| term_index.get_match_term(w, None))
.collect::<Vec<_>>(),
field.into(),
true,
false,
false,
)
.map_err(|e| {
crate::Error::InternalError(format!(
"TermIndex match_terms failed for {account_id}/{collection}/{document_id}: {e:?}"
))
})?
.is_some()
{
results.insert(document_id);
}
} else {
tracing::debug!(
event = "error",
context = "fts_query",
account_id = account_id,
collection = collection,
document_id = document_id,
"Document is missing a term index",
);
}
}
if !results.is_empty() {
Ok(Some(results))
} else {
Ok(None)
}
} else {
let mut bitmaps = RoaringBitmap::new();
for token in Stemmer::new(text, language, MAX_TOKEN_LENGTH) {
let token1 =
BitmapKey::hash(&token.word, account_id, collection, HASH_EXACT, field);
let token2 = if let Some(stemmed_word) = token.stemmed_word {
BitmapKey::hash(&stemmed_word, account_id, collection, HASH_STEMMED, field)
} else {
let mut token2 = token1.clone();
token2.family &= !HASH_EXACT;
token2.family |= HASH_STEMMED;
token2
};
match self.get_bitmaps_union(vec![token1, token2]).await? {
Some(b) if !b.is_empty() => {
if !bitmaps.is_empty() {
bitmaps &= b;
if bitmaps.is_empty() {
return Ok(None);
} }
} else { _ => break,
bitmaps = b;
} }
} }
_ => return Ok(None),
}; if !result.is_empty() {
Some(result)
} else {
None
}
}
FtsFilter::Keyword { field, text } => {
self.get_bitmap(BitmapKey {
account_id,
collection,
class: BitmapClass::word(text, field),
block_num: 0,
})
.await?
}
op @ (FtsFilter::And | FtsFilter::Or | FtsFilter::Not) => {
stack.push(state);
state = op.into();
continue;
}
FtsFilter::End => {
if let Some(prev_state) = stack.pop() {
let bm = state.bm;
state = prev_state;
bm
} else {
break;
}
}
};
// Only fetch not mask if we need it
if matches!(state.op, FtsFilter::Not) && !not_fetch {
not_mask = self
.get_bitmap(BitmapKey::document_ids(account_id, collection))
.await?
.unwrap_or_else(RoaringBitmap::new);
not_fetch = true;
} }
Ok(Some(bitmaps)) // Apply logical operation
if let Some(dest) = &mut state.bm {
match state.op {
FtsFilter::And => {
if let Some(result) = result {
dest.bitand_assign(result);
} else {
dest.clear();
}
}
FtsFilter::Or => {
if let Some(result) = result {
dest.bitor_assign(result);
}
}
FtsFilter::Not => {
if let Some(mut result) = result {
result.bitxor_assign(&not_mask);
dest.bitand_assign(result);
}
}
_ => unreachable!(),
}
} else if let Some(ref mut result_) = result {
if let FtsFilter::Not = state.op {
result_.bitxor_assign(&not_mask);
}
state.bm = result;
} else if let FtsFilter::Not = state.op {
state.bm = Some(not_mask.clone());
} else {
state.bm = Some(RoaringBitmap::new());
}
// And short circuit
if matches!(state.op, FtsFilter::And) && state.bm.as_ref().unwrap().is_empty() {
while let Some(filter) = filters.peek() {
if matches!(filter, FtsFilter::End) {
break;
} else {
filters.next();
}
}
}
} }
Ok(state.bm.unwrap_or_default())
} }
async fn get_bitmaps_union<T: AsRef<[u8]> + Sync + Send>( async fn get_bitmaps_union(
&self, &self,
keys: Vec<BitmapKey<T>>, keys: Vec<BitmapKey<BitmapClass>>,
) -> crate::Result<Option<RoaringBitmap>> { ) -> crate::Result<Option<RoaringBitmap>> {
let mut bm = RoaringBitmap::new(); let mut bm = RoaringBitmap::new();
@ -165,3 +237,12 @@ pub trait StoreFts: StoreRead {
Ok(if !bm.is_empty() { Some(bm) } else { None }) Ok(if !bm.is_empty() { Some(bm) } else { None })
} }
} }
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<FtsFilter<T>> for State<T> {
fn from(value: FtsFilter<T>) -> Self {
Self {
op: value,
bm: None,
}
}
}

View file

@ -23,7 +23,7 @@
use std::{borrow::Cow, convert::TryInto}; use std::{borrow::Cow, convert::TryInto};
use crate::{Deserialize, Serialize}; use crate::{Deserialize, Serialize, U32_LEN, U64_LEN};
use ahash::{AHashMap, AHashSet}; use ahash::{AHashMap, AHashSet};
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x}; use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};

View file

@ -24,8 +24,8 @@
use std::{fmt::Display, sync::Arc}; use std::{fmt::Display, sync::Arc};
pub mod backend; pub mod backend;
//pub mod fts;
pub mod dispatch; pub mod dispatch;
pub mod fts;
pub mod query; pub mod query;
pub mod write; pub mod write;
@ -37,11 +37,6 @@ pub use rand;
pub use roaring; pub use roaring;
use write::{BitmapClass, BlobOp, ValueClass}; use write::{BitmapClass, BlobOp, ValueClass};
#[cfg(feature = "rocks")]
pub struct Store {
db: rocksdb::OptimisticTransactionDB<rocksdb::MultiThreaded>,
}
pub trait Deserialize: Sized + Sync + Send { pub trait Deserialize: Sized + Sync + Send {
fn deserialize(bytes: &[u8]) -> crate::Result<Self>; fn deserialize(bytes: &[u8]) -> crate::Result<Self>;
} }
@ -103,9 +98,9 @@ pub struct LogKey {
pub change_id: u64, pub change_id: u64,
} }
const BLOB_HASH_LEN: usize = 32; pub const BLOB_HASH_LEN: usize = 32;
const U64_LEN: usize = std::mem::size_of::<u64>(); pub const U64_LEN: usize = std::mem::size_of::<u64>();
const U32_LEN: usize = std::mem::size_of::<u32>(); pub const U32_LEN: usize = std::mem::size_of::<u32>();
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)] #[derive(Clone, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct BlobHash([u8; BLOB_HASH_LEN]); pub struct BlobHash([u8; BLOB_HASH_LEN]);
@ -158,6 +153,7 @@ pub const SUBSPACE_VALUES: u8 = b'v';
pub const SUBSPACE_LOGS: u8 = b'l'; pub const SUBSPACE_LOGS: u8 = b'l';
pub const SUBSPACE_INDEXES: u8 = b'i'; pub const SUBSPACE_INDEXES: u8 = b'i';
pub const SUBSPACE_BLOBS: u8 = b'o'; pub const SUBSPACE_BLOBS: u8 = b'o';
pub const SUBSPACE_BLOB_DATA: u8 = b't';
pub const SUBSPACE_ACLS: u8 = b'a'; pub const SUBSPACE_ACLS: u8 = b'a';
pub const SUBSPACE_COUNTERS: u8 = b'c'; pub const SUBSPACE_COUNTERS: u8 = b'c';
@ -179,6 +175,13 @@ pub enum Store {
pub enum BlobStore { pub enum BlobStore {
Fs(Arc<FsStore>), Fs(Arc<FsStore>),
S3(Arc<S3Store>), S3(Arc<S3Store>),
Sqlite(Arc<SqliteStore>),
FoundationDb(Arc<FdbStore>),
}
#[derive(Clone)]
pub enum FtsStore {
Store(Store),
} }
impl From<SqliteStore> for Store { impl From<SqliteStore> for Store {
@ -204,3 +207,9 @@ impl From<S3Store> for BlobStore {
Self::S3(Arc::new(store)) Self::S3(Arc::new(store))
} }
} }
impl From<Store> for FtsStore {
fn from(store: Store) -> Self {
Self::Store(store)
}
}

View file

@ -24,7 +24,7 @@
use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign}; use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign};
use ahash::HashSet; use ahash::HashSet;
use nlp::tokenizers::space::SpaceTokenizer; use nlp::tokenizers::word::WordTokenizer;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::{backend::MAX_TOKEN_LENGTH, BitmapKey, Store}; use crate::{backend::MAX_TOKEN_LENGTH, BitmapKey, Store};
@ -32,8 +32,8 @@ use crate::{backend::MAX_TOKEN_LENGTH, BitmapKey, Store};
use super::{Filter, ResultSet}; use super::{Filter, ResultSet};
struct State { struct State {
op: Filter, pub op: Filter,
bm: Option<RoaringBitmap>, pub bm: Option<RoaringBitmap>,
} }
impl Store { impl Store {
@ -44,8 +44,6 @@ impl Store {
filters: Vec<Filter>, filters: Vec<Filter>,
) -> crate::Result<ResultSet> { ) -> crate::Result<ResultSet> {
let collection = collection.into(); let collection = collection.into();
let mut not_mask = RoaringBitmap::new();
let mut not_fetch = false;
if filters.is_empty() { if filters.is_empty() {
return Ok(ResultSet { return Ok(ResultSet {
account_id, account_id,
@ -61,10 +59,13 @@ impl Store {
let mut stack = Vec::new(); let mut stack = Vec::new();
let mut filters = filters.into_iter().peekable(); let mut filters = filters.into_iter().peekable();
let mut not_mask = RoaringBitmap::new();
let mut not_fetch = false;
while let Some(filter) = filters.next() { while let Some(filter) = filters.next() {
let result = match filter { let mut result = match filter {
Filter::MatchValue { field, op, value } => { Filter::MatchValue { field, op, value } => {
self.range_to_bitmap(account_id, collection, field, value, op) self.range_to_bitmap(account_id, collection, field, &value, op)
.await? .await?
} }
Filter::HasText { Filter::HasText {
@ -74,7 +75,8 @@ impl Store {
} => { } => {
if tokenize { if tokenize {
self.get_bitmaps_intersection( self.get_bitmaps_intersection(
SpaceTokenizer::new(&text, MAX_TOKEN_LENGTH) WordTokenizer::new(&text, MAX_TOKEN_LENGTH)
.map(|token| token.word.into_owned())
.collect::<HashSet<String>>() .collect::<HashSet<String>>()
.into_iter() .into_iter()
.map(|word| { .map(|word| {
@ -114,6 +116,7 @@ impl Store {
} }
}; };
// Only fetch not mask if we need it
if matches!(state.op, Filter::Not) && !not_fetch { if matches!(state.op, Filter::Not) && !not_fetch {
not_mask = self not_mask = self
.get_bitmap(BitmapKey::document_ids(account_id, collection)) .get_bitmap(BitmapKey::document_ids(account_id, collection))
@ -122,8 +125,41 @@ impl Store {
not_fetch = true; not_fetch = true;
} }
state.op.apply(&mut state.bm, result, &not_mask); // Apply logical operation
if let Some(dest) = &mut state.bm {
match state.op {
Filter::And => {
if let Some(result) = result {
dest.bitand_assign(result);
} else {
dest.clear();
}
}
Filter::Or => {
if let Some(result) = result {
dest.bitor_assign(result);
}
}
Filter::Not => {
if let Some(mut result) = result {
result.bitxor_assign(&not_mask);
dest.bitand_assign(result);
}
}
_ => unreachable!(),
}
} else if let Some(ref mut result_) = result {
if let Filter::Not = state.op {
result_.bitxor_assign(&not_mask);
}
state.bm = result;
} else if let Filter::Not = state.op {
state.bm = Some(not_mask.clone());
} else {
state.bm = Some(RoaringBitmap::new());
}
// And short-circuit
if matches!(state.op, Filter::And) && state.bm.as_ref().unwrap().is_empty() { if matches!(state.op, Filter::And) && state.bm.as_ref().unwrap().is_empty() {
while let Some(filter) = filters.peek() { while let Some(filter) = filters.peek() {
if matches!(filter, Filter::End) { if matches!(filter, Filter::End) {
@ -143,49 +179,6 @@ impl Store {
} }
} }
impl Filter {
#[inline(always)]
pub fn apply(
&self,
dest: &mut Option<RoaringBitmap>,
mut src: Option<RoaringBitmap>,
not_mask: &RoaringBitmap,
) {
if let Some(dest) = dest {
match self {
Filter::And => {
if let Some(src) = src {
dest.bitand_assign(src);
} else {
dest.clear();
}
}
Filter::Or => {
if let Some(src) = src {
dest.bitor_assign(src);
}
}
Filter::Not => {
if let Some(mut src) = src {
src.bitxor_assign(not_mask);
dest.bitand_assign(src);
}
}
_ => unreachable!(),
}
} else if let Some(ref mut src_) = src {
if let Filter::Not = self {
src_.bitxor_assign(not_mask);
}
*dest = src;
} else if let Filter::Not = self {
*dest = Some(not_mask.clone());
} else {
*dest = Some(RoaringBitmap::new());
}
}
}
impl From<Filter> for State { impl From<Filter> for State {
fn from(value: Filter) -> Self { fn from(value: Filter) -> Self {
Self { Self {

View file

@ -130,12 +130,12 @@ impl Store {
let from_key = LogKey { let from_key = LogKey {
account_id, account_id,
collection, collection,
change_id: u64::MAX, change_id: 0,
}; };
let to_key = LogKey { let to_key = LogKey {
account_id, account_id,
collection, collection,
change_id: 0, change_id: u64::MAX,
}; };
let mut last_change_id = None; let mut last_change_id = None;

View file

@ -29,7 +29,7 @@ pub mod sort;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::{ use crate::{
write::{BitmapClass, TagValue}, write::{BitmapClass, BitmapHash, TagValue},
BitmapKey, IterateParams, Key, Serialize, BitmapKey, IterateParams, Key, Serialize,
}; };
@ -144,48 +144,6 @@ impl Filter {
} }
} }
/*pub fn has_text_detect(
field: impl Into<u8>,
text: impl Into<String>,
default_language: Language,
) -> Self {
let (text, language) = Language::detect(text.into(), default_language);
Self::has_text(field, text, language)
}
pub fn has_text(field: impl Into<u8>, text: impl Into<String>, language: Language) -> Self {
let text = text.into();
let op = if !matches!(language, Language::None) {
if (text.starts_with('"') && text.ends_with('"'))
|| (text.starts_with('\'') && text.ends_with('\''))
{
TextMatch::Exact(language)
} else {
TextMatch::Stemmed(language)
}
} else {
TextMatch::Tokenized
};
Filter::HasText {
field: field.into(),
text,
op,
}
}
pub fn has_raw_text(field: impl Into<u8>, text: impl Into<String>) -> Self {
Filter::HasText {
field: field.into(),
text: text.into(),
op: TextMatch::Raw,
}
}
pub fn has_english_text(field: impl Into<u8>, text: impl Into<String>) -> Self {
Self::has_text(field, text, Language::English)
}*/
pub fn has_text(field: impl Into<u8>, text: impl Into<String>) -> Self { pub fn has_text(field: impl Into<u8>, text: impl Into<String>) -> Self {
Filter::HasText { Filter::HasText {
field: field.into(), field: field.into(),
@ -255,14 +213,14 @@ impl BitmapKey<BitmapClass> {
account_id: u32, account_id: u32,
collection: impl Into<u8>, collection: impl Into<u8>,
field: impl Into<u8>, field: impl Into<u8>,
token: impl Into<Vec<u8>>, token: impl AsRef<[u8]>,
) -> Self { ) -> Self {
BitmapKey { BitmapKey {
account_id, account_id,
collection: collection.into(), collection: collection.into(),
class: BitmapClass::Text { class: BitmapClass::Text {
field: field.into(), field: field.into(),
token: token.into(), token: BitmapHash::new(token),
}, },
block_num: 0, block_num: 0,
} }
@ -317,20 +275,3 @@ impl<T: Key> IterateParams<T> {
self self
} }
} }
/*
#[derive(Debug)]
pub struct RawValue<T: Deserialize> {
pub raw: Vec<u8>,
pub inner: T,
}
impl<T: Deserialize> Deserialize for RawValue<T> {
fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
Ok(RawValue {
inner: T::deserialize(bytes)?,
raw: bytes.to_vec(),
})
}
}
*/

View file

@ -160,10 +160,10 @@ impl BatchBuilder {
self self
} }
pub fn set(&mut self, class: impl Into<ValueClass>, value: Vec<u8>) -> &mut Self { pub fn set(&mut self, class: impl Into<ValueClass>, value: impl Into<Vec<u8>>) -> &mut Self {
self.ops.push(Operation::Value { self.ops.push(Operation::Value {
class: class.into(), class: class.into(),
op: ValueOp::Set(value), op: ValueOp::Set(value.into()),
}); });
self self
} }

View file

@ -0,0 +1,158 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use crate::backend::MAX_TOKEN_LENGTH;
use super::{BitmapClass, BitmapHash};
impl BitmapClass {
pub fn word(token: impl AsRef<[u8]>, field: impl Into<u8>) -> Self {
BitmapClass::Text {
field: field.into(),
token: BitmapHash::new(token),
}
}
pub fn stemmed(token: impl AsRef<[u8]>, field: impl Into<u8>) -> Self {
BitmapClass::Text {
field: field.into() | 1 << 6,
token: BitmapHash::new(token),
}
}
pub fn bigram(token: impl AsRef<[u8]>, field: impl Into<u8>) -> Self {
BitmapClass::Text {
field: field.into() | 1 << 7,
token: BitmapHash::new(token),
}
}
}
impl BitmapHash {
pub fn new(item: impl AsRef<[u8]>) -> Self {
Self {
len: std::cmp::min(item.as_ref().len(), MAX_TOKEN_LENGTH) as u8,
hash: hash(item),
}
}
pub fn to_u64(&self) -> u64 {
u64::from_be_bytes(self.hash)
}
}
fn hash(item: impl AsRef<[u8]>) -> [u8; 8] {
let item = item.as_ref();
let mut result = [0u8; 8];
if item.len() <= 8 {
result[..item.len()].copy_from_slice(item);
} else {
result[..4].copy_from_slice(&xxhash_rust::xxh3::xxh3_64(item).to_le_bytes()[..4]);
result[4..8].copy_from_slice(&farmhash::hash64(item).to_le_bytes()[..4]);
}
result
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub struct TokenType {}
impl TokenType {
pub fn word(field: u8) -> u8 {
field
}
pub fn stemmed(field: u8) -> u8 {
1 << 6 | field
}
pub fn bigram(field: u8) -> u8 {
1 << 7 | field
}
}
/*
const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
0xaf1f2242106c64b3,
0x60ca4cfb4b3ed0ce,
0xc7dbc0bb615e82b3,
0x520ad065378daf88,
);
lazy_static::lazy_static! {
static ref SIPHASHER: siphasher::sip::SipHasher13 =
siphasher::sip::SipHasher13::new_with_keys(0x56205cbdba8f02a6, 0xbd0dbc4bb06d687b);
}
let h1 = xxhash_rust::xxh3::xxh3_64(item).to_le_bytes();
let h2 = farmhash::hash64(item).to_le_bytes();
let h3 = AHASHER.hash_one(item).to_le_bytes();
let mut sh = *SIPHASHER;
sh.write(item.as_ref());
let h4 = sh.finish().to_le_bytes();
result[..2].copy_from_slice(&h1[..2]);
result[2..4].copy_from_slice(&h2[..2]);
result[4..6].copy_from_slice(&h3[..2]);
result[6..8].copy_from_slice(&h4[..2]);
impl KeySerializer {
pub fn hash_text(mut self, item: impl AsRef<[u8]>) -> Self {
let item = item.as_ref();
if item.len() <= 8 {
self.buf.extend_from_slice(item);
} else {
let h1 = xxhash_rust::xxh3::xxh3_64(item).to_le_bytes();
let h2 = farmhash::hash64(item).to_le_bytes();
let h3 = AHASHER.hash_one(item).to_le_bytes();
let mut sh = *SIPHASHER;
sh.write(item.as_ref());
let h4 = sh.finish().to_le_bytes();
match item.len() {
9..=16 => {
self.buf.extend_from_slice(&h1[..2]);
self.buf.extend_from_slice(&h2[..2]);
self.buf.extend_from_slice(&h3[..2]);
self.buf.extend_from_slice(&h4[..2]);
}
17..=32 => {
self.buf.extend_from_slice(&h1[..3]);
self.buf.extend_from_slice(&h2[..3]);
self.buf.extend_from_slice(&h3[..3]);
self.buf.extend_from_slice(&h4[..3]);
}
_ => {
self.buf.extend_from_slice(&h1[..4]);
self.buf.extend_from_slice(&h2[..4]);
self.buf.extend_from_slice(&h3[..4]);
self.buf.extend_from_slice(&h4[..4]);
}
}
}
self
}
}
*/

View file

@ -21,19 +21,19 @@
* for more details. * for more details.
*/ */
use std::{convert::TryInto, hash::Hasher}; use std::convert::TryInto;
use utils::codec::leb128::Leb128_; use utils::codec::leb128::Leb128_;
use crate::{ use crate::{
backend::MAX_TOKEN_MASK, BitmapKey, BlobHash, BlobKey, IndexKey, IndexKeyPrefix, Key, LogKey, BitmapKey, BlobHash, BlobKey, IndexKey, IndexKeyPrefix, Key, LogKey, ValueKey, BLOB_HASH_LEN,
ValueKey, BLOB_HASH_LEN, SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_INDEXES, SUBSPACE_LOGS, SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_INDEXES, SUBSPACE_LOGS, SUBSPACE_VALUES, U32_LEN,
SUBSPACE_VALUES, U32_LEN, U64_LEN, U64_LEN,
}; };
use super::{BitmapClass, BlobOp, TagValue, ValueClass}; use super::{BitmapClass, BlobOp, TagValue, ValueClass};
pub struct KeySerializer { pub struct KeySerializer {
buf: Vec<u8>, pub buf: Vec<u8>,
} }
pub trait KeySerialize { pub trait KeySerialize {
@ -241,6 +241,15 @@ impl<T: AsRef<ValueClass> + Sync + Send> Key for ValueKey<T> {
} }
.write(u32::MAX) .write(u32::MAX)
.write(name.as_slice()), .write(name.as_slice()),
ValueClass::TermIndex => if include_subspace {
KeySerializer::new(U32_LEN * 2 + 3).write(crate::SUBSPACE_VALUES)
} else {
KeySerializer::new(U32_LEN * 2 + 2)
}
.write(self.account_id)
.write(self.collection)
.write_leb128(self.document_id)
.write(u8::MAX),
} }
.finalize() .finalize()
} }
@ -277,35 +286,64 @@ impl<T: AsRef<BitmapClass> + Sync + Send> Key for BitmapKey<T> {
fn serialize(&self, include_subspace: bool) -> Vec<u8> { fn serialize(&self, include_subspace: bool) -> Vec<u8> {
const BM_DOCUMENT_IDS: u8 = 0; const BM_DOCUMENT_IDS: u8 = 0;
const BM_TAG: u8 = 1 << 5; const BM_TAG: u8 = 1 << 6;
const BM_TEXT: u8 = 1 << 6; const BM_TEXT: u8 = 1 << 7;
const TAG_ID: u8 = 0; const TAG_ID: u8 = 0;
const TAG_TEXT: u8 = 1 << 0; const TAG_TEXT: u8 = 1 << 0;
const TAG_STATIC: u8 = 1 << 1; const TAG_STATIC: u8 = 1 << 1;
let ks = if include_subspace {
KeySerializer::new(self.len() + 1).write(crate::SUBSPACE_BITMAPS)
} else {
KeySerializer::new(self.len())
}
.write(self.account_id)
.write(self.collection);
match self.class.as_ref() { match self.class.as_ref() {
BitmapClass::DocumentIds => ks.write(BM_DOCUMENT_IDS), BitmapClass::DocumentIds => if include_subspace {
KeySerializer::new(U32_LEN + 3).write(SUBSPACE_BITMAPS)
} else {
KeySerializer::new(U32_LEN + 2)
}
.write(self.account_id)
.write(self.collection)
.write(BM_DOCUMENT_IDS),
BitmapClass::Tag { field, value } => match value { BitmapClass::Tag { field, value } => match value {
TagValue::Id(id) => ks.write(BM_TAG | TAG_ID).write(*field).write_leb128(*id), TagValue::Id(id) => if include_subspace {
TagValue::Text(text) => ks KeySerializer::new((U32_LEN * 2) + 4).write(SUBSPACE_BITMAPS)
.write(BM_TAG | TAG_TEXT) } else {
.write(*field) KeySerializer::new((U32_LEN * 2) + 3)
.write(text.as_slice()), }
TagValue::Static(id) => ks.write(BM_TAG | TAG_STATIC).write(*field).write(*id), .write(self.account_id)
}, .write(self.collection)
BitmapClass::Text { field, token } => ks .write(BM_TAG | TAG_ID)
.write(BM_TEXT | (token.len() & MAX_TOKEN_MASK) as u8)
.write(*field) .write(*field)
.hash_text(token), .write_leb128(*id),
TagValue::Text(text) => if include_subspace {
KeySerializer::new(U32_LEN + 4 + text.len()).write(SUBSPACE_BITMAPS)
} else {
KeySerializer::new(U32_LEN + 3 + text.len())
}
.write(self.account_id)
.write(self.collection)
.write(BM_TAG | TAG_TEXT)
.write(*field)
.write(text.as_slice()),
TagValue::Static(id) => if include_subspace {
KeySerializer::new(U32_LEN + 5).write(SUBSPACE_BITMAPS)
} else {
KeySerializer::new(U32_LEN + 4)
}
.write(self.account_id)
.write(self.collection)
.write(BM_TAG | TAG_STATIC)
.write(*field)
.write(*id),
},
BitmapClass::Text { field, token } => if include_subspace {
KeySerializer::new(U32_LEN + 16 + 3 + 1).write(SUBSPACE_BITMAPS)
} else {
KeySerializer::new(U32_LEN + 16 + 3)
}
.write(self.account_id)
.write(self.collection)
.write(BM_TEXT | token.len)
.write(*field)
.write(token.hash.as_slice()),
} }
.write(self.block_num) .write(self.block_num)
.finalize() .finalize()
@ -349,81 +387,3 @@ impl<T: AsRef<BlobHash> + Sync + Send> Key for BlobKey<T> {
crate::SUBSPACE_BLOBS crate::SUBSPACE_BLOBS
} }
} }
const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
0xaf1f2242106c64b3,
0x60ca4cfb4b3ed0ce,
0xc7dbc0bb615e82b3,
0x520ad065378daf88,
);
lazy_static::lazy_static! {
static ref SIPHASHER: siphasher::sip::SipHasher13 =
siphasher::sip::SipHasher13::new_with_keys(0x56205cbdba8f02a6, 0xbd0dbc4bb06d687b);
}
impl KeySerializer {
fn hash_text(mut self, item: impl AsRef<[u8]>) -> Self {
let item = item.as_ref();
if item.len() <= 8 {
self.buf.extend_from_slice(item);
} else {
let h1 = xxhash_rust::xxh3::xxh3_64(item).to_le_bytes();
let h2 = farmhash::hash64(item).to_le_bytes();
let h3 = AHASHER.hash_one(item).to_le_bytes();
let mut sh = *SIPHASHER;
sh.write(item.as_ref());
let h4 = sh.finish().to_le_bytes();
match item.len() {
9..=16 => {
self.buf.extend_from_slice(&h1[..2]);
self.buf.extend_from_slice(&h2[..2]);
self.buf.extend_from_slice(&h3[..2]);
self.buf.extend_from_slice(&h4[..2]);
}
17..=32 => {
self.buf.extend_from_slice(&h1[..3]);
self.buf.extend_from_slice(&h2[..3]);
self.buf.extend_from_slice(&h3[..3]);
self.buf.extend_from_slice(&h4[..3]);
}
_ => {
self.buf.extend_from_slice(&h1[..4]);
self.buf.extend_from_slice(&h2[..4]);
self.buf.extend_from_slice(&h3[..4]);
self.buf.extend_from_slice(&h4[..4]);
}
}
}
self
}
}
impl<T: AsRef<BitmapClass>> BitmapKey<T> {
#[allow(clippy::len_without_is_empty)]
pub fn len(&self) -> usize {
std::mem::size_of::<BitmapKey<BitmapClass>>()
+ match self.class.as_ref() {
BitmapClass::DocumentIds => 0,
BitmapClass::Tag { value, .. } => match value {
TagValue::Id(_) => U32_LEN,
TagValue::Text(v) => v.len(),
TagValue::Static(_) => 1,
},
BitmapClass::Text { token, .. } => token.len(),
}
}
}
impl<T: AsRef<ValueClass>> ValueKey<T> {
#[allow(clippy::len_without_is_empty)]
pub fn len(&self) -> usize {
std::mem::size_of::<ValueKey<ValueClass>>()
+ match self.class.as_ref() {
ValueClass::Property(_) => 1,
ValueClass::Acl(_) => U32_LEN,
ValueClass::Named(v) => v.len(),
}
}
}

View file

@ -23,7 +23,7 @@
use std::{collections::HashSet, hash::Hash, slice::Iter, time::SystemTime}; use std::{collections::HashSet, hash::Hash, slice::Iter, time::SystemTime};
use nlp::tokenizers::space::SpaceTokenizer; use nlp::tokenizers::word::WordTokenizer;
use utils::codec::leb128::{Leb128Iterator, Leb128Vec}; use utils::codec::leb128::{Leb128Iterator, Leb128Vec};
use crate::{ use crate::{
@ -35,6 +35,7 @@ use self::assert::AssertValue;
pub mod assert; pub mod assert;
pub mod batch; pub mod batch;
pub mod blob; pub mod blob;
pub mod hash;
pub mod key; pub mod key;
pub mod log; pub mod log;
@ -92,14 +93,20 @@ pub enum Operation {
}, },
} }
#[derive(Debug, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum BitmapClass { pub enum BitmapClass {
DocumentIds, DocumentIds,
Tag { field: u8, value: TagValue }, Tag { field: u8, value: TagValue },
Text { field: u8, token: Vec<u8> }, Text { field: u8, token: BitmapHash },
} }
#[derive(Debug, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct BitmapHash {
pub hash: [u8; 8],
pub len: u8,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum TagValue { pub enum TagValue {
Id(u32), Id(u32),
Text(Vec<u8>), Text(Vec<u8>),
@ -111,6 +118,7 @@ pub enum ValueClass {
Property(u8), Property(u8),
Acl(u32), Acl(u32),
Named(Vec<u8>), Named(Vec<u8>),
TermIndex,
} }
#[derive(Debug, PartialEq, Eq, Hash, Default)] #[derive(Debug, PartialEq, Eq, Hash, Default)]
@ -352,7 +360,7 @@ impl ToBitmaps for &str {
ops.push(Operation::Bitmap { ops.push(Operation::Bitmap {
class: BitmapClass::Text { class: BitmapClass::Text {
field, field,
token: token.into_bytes(), token: BitmapHash::new(token),
}, },
set, set,
}); });
@ -362,8 +370,8 @@ impl ToBitmaps for &str {
impl TokenizeText for &str { impl TokenizeText for &str {
fn tokenize_into(&self, tokens: &mut HashSet<String>) { fn tokenize_into(&self, tokens: &mut HashSet<String>) {
for token in SpaceTokenizer::new(self, MAX_TOKEN_LENGTH) { for token in WordTokenizer::new(self, MAX_TOKEN_LENGTH) {
tokens.insert(token); tokens.insert(token.word.into_owned());
} }
} }
@ -479,6 +487,10 @@ impl BlobHash {
pub fn try_from_hash_slice(value: &[u8]) -> Result<BlobHash, std::array::TryFromSliceError> { pub fn try_from_hash_slice(value: &[u8]) -> Result<BlobHash, std::array::TryFromSliceError> {
value.try_into().map(BlobHash) value.try_into().map(BlobHash)
} }
pub fn as_slice(&self) -> &[u8] {
self.0.as_ref()
}
} }
impl From<&[u8]> for BlobHash { impl From<&[u8]> for BlobHash {
@ -523,6 +535,12 @@ impl AsRef<BlobClass> for BlobClass {
} }
} }
impl From<BlobHash> for Vec<u8> {
fn from(value: BlobHash) -> Self {
value.0.to_vec()
}
}
impl BlobClass { impl BlobClass {
pub fn account_id(&self) -> u32 { pub fn account_id(&self) -> u32 {
match self { match self {

View file

@ -23,6 +23,7 @@ opentelemetry-semantic-conventions = { version = "0.12.0" }
dashmap = "5.4" dashmap = "5.4"
ahash = { version = "0.8" } ahash = { version = "0.8" }
chrono = "0.4" chrono = "0.4"
rand = "0.8.5"
[target.'cfg(unix)'.dependencies] [target.'cfg(unix)'.dependencies]
privdrop = "0.5.3" privdrop = "0.5.3"

View file

@ -30,6 +30,7 @@ pub mod config;
pub mod ipc; pub mod ipc;
pub mod listener; pub mod listener;
pub mod map; pub mod map;
pub mod snowflake;
pub mod suffixlist; pub mod suffixlist;
use opentelemetry::{ use opentelemetry::{

View file

@ -0,0 +1,69 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::{
sync::atomic::{AtomicU64, Ordering},
time::{Duration, SystemTime},
};
pub struct SnowflakeIdGenerator {
epoch: SystemTime,
node_id: u64,
sequence: AtomicU64,
}
const SEQUENCE_LEN: u64 = 12;
const NODE_ID_LEN: u64 = 9;
const SEQUENCE_MASK: u64 = (1 << SEQUENCE_LEN) - 1;
const NODE_ID_MASK: u64 = (1 << NODE_ID_LEN) - 1;
impl SnowflakeIdGenerator {
pub fn new() -> Self {
Self::with_node_id(rand::random::<u64>())
}
pub fn with_node_id(node_id: u64) -> Self {
Self {
epoch: SystemTime::UNIX_EPOCH + Duration::from_secs(1632280000), // 52 years after UNIX_EPOCH
node_id,
sequence: 0.into(),
}
}
pub fn generate(&self) -> Option<u64> {
let elapsed = self.epoch.elapsed().ok()?.as_millis() as u64;
let sequence = self.sequence.fetch_add(1, Ordering::Relaxed);
(elapsed << (SEQUENCE_LEN + NODE_ID_LEN)
| (self.node_id & NODE_ID_MASK) << SEQUENCE_LEN
| (sequence & SEQUENCE_MASK))
.into()
}
}
impl Default for SnowflakeIdGenerator {
fn default() -> Self {
Self::new()
}
}

View file

@ -25,9 +25,11 @@ use std::{fs, io};
use imap_proto::ResponseType; use imap_proto::ResponseType;
use super::{resources_dir, AssertResult, ImapConnection, Type}; use crate::jmap::wait_for_index;
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) { use super::{resources_dir, AssertResult, IMAPTest, ImapConnection, Type};
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection, handle: &IMAPTest) {
// Invalid APPEND commands // Invalid APPEND commands
imap.send("APPEND \"All Mail\" {1+}\r\na").await; imap.send("APPEND \"All Mail\" {1+}\r\na").await;
imap.assert_read(Type::Tagged, ResponseType::No) imap.assert_read(Type::Tagged, ResponseType::No)
@ -80,6 +82,8 @@ pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) {
assert_eq!(code.next(), Some(expected_uid.to_string().as_str())); assert_eq!(code.next(), Some(expected_uid.to_string().as_str()));
expected_uid += 1; expected_uid += 1;
} }
wait_for_index(&handle.jmap).await;
} }
pub async fn assert_append_message( pub async fn assert_append_message(

View file

@ -225,7 +225,7 @@ refresh-token-renew = "2s"
"#; "#;
#[allow(dead_code)] #[allow(dead_code)]
struct IMAPTest { pub struct IMAPTest {
jmap: Arc<JMAP>, jmap: Arc<JMAP>,
imap: Arc<IMAP>, imap: Arc<IMAP>,
temp_dir: TempDir, temp_dir: TempDir,
@ -331,7 +331,7 @@ async fn init_imap_tests(delete_if_exists: bool) -> IMAPTest {
pub async fn imap_tests() { pub async fn imap_tests() {
/*tracing::subscriber::set_global_default( /*tracing::subscriber::set_global_default(
tracing_subscriber::FmtSubscriber::builder() tracing_subscriber::FmtSubscriber::builder()
.with_max_level(tracing::Level::TRACE) .with_max_level(tracing::Level::DEBUG)
.finish(), .finish(),
) )
.unwrap();*/ .unwrap();*/
@ -364,10 +364,10 @@ pub async fn imap_tests() {
} }
mailbox::test(&mut imap, &mut imap_check).await; mailbox::test(&mut imap, &mut imap_check).await;
append::test(&mut imap, &mut imap_check).await; append::test(&mut imap, &mut imap_check, &handle).await;
search::test(&mut imap, &mut imap_check).await; search::test(&mut imap, &mut imap_check).await;
fetch::test(&mut imap, &mut imap_check).await; fetch::test(&mut imap, &mut imap_check).await;
store::test(&mut imap, &mut imap_check).await; store::test(&mut imap, &mut imap_check, &handle).await;
copy_move::test(&mut imap, &mut imap_check).await; copy_move::test(&mut imap, &mut imap_check).await;
thread::test(&mut imap, &mut imap_check).await; thread::test(&mut imap, &mut imap_check).await;
idle::test(&mut imap, &mut imap_check).await; idle::test(&mut imap, &mut imap_check).await;

View file

@ -23,9 +23,11 @@
use imap_proto::ResponseType; use imap_proto::ResponseType;
use super::{AssertResult, ImapConnection, Type}; use crate::jmap::wait_for_index;
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) { use super::{AssertResult, IMAPTest, ImapConnection, Type};
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection, handle: &IMAPTest) {
// Select INBOX // Select INBOX
imap.send("SELECT INBOX").await; imap.send("SELECT INBOX").await;
imap.assert_read(Type::Tagged, ResponseType::Ok) imap.assert_read(Type::Tagged, ResponseType::Ok)
@ -73,6 +75,7 @@ pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) {
.assert_contains("UIDNEXT 11"); .assert_contains("UIDNEXT 11");
// Store using saved searches // Store using saved searches
wait_for_index(&handle.jmap).await;
imap.send("SEARCH RETURN (SAVE) FROM nathaniel").await; imap.send("SEARCH RETURN (SAVE) FROM nathaniel").await;
imap.assert_read(Type::Tagged, ResponseType::Ok).await; imap.assert_read(Type::Tagged, ResponseType::Ok).await;
imap.send("UID STORE $ +FLAGS (\\Answered)").await; imap.send("UID STORE $ +FLAGS (\\Answered)").await;

View file

@ -45,7 +45,7 @@ use crate::{
directory::sql::{ directory::sql::{
add_to_group, create_test_group_with_email, create_test_user_with_email, remove_from_group, add_to_group, create_test_group_with_email, create_test_user_with_email, remove_from_group,
}, },
jmap::{mailbox::destroy_all_mailboxes, test_account_login}, jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, test_account_login},
}; };
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) { pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
@ -777,10 +777,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
admin_client.set_default_account_id(&id.to_string()); admin_client.set_default_account_id(&id.to_string());
destroy_all_mailboxes(admin_client).await; destroy_all_mailboxes(admin_client).await;
} }
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
pub fn assert_forbidden<T: Debug>(result: Result<T, jmap_client::Error>) { pub fn assert_forbidden<T: Debug>(result: Result<T, jmap_client::Error>) {

View file

@ -33,7 +33,7 @@ use jmap_proto::types::id::Id;
use crate::{ use crate::{
directory::sql::{create_test_user_with_email, link_test_address}, directory::sql::{create_test_user_with_email, link_test_address},
jmap::mailbox::destroy_all_mailboxes, jmap::{assert_is_empty, mailbox::destroy_all_mailboxes},
}; };
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) { pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
@ -202,8 +202,5 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
// Destroy test accounts // Destroy test accounts
admin_client.set_default_account_id(&account_id); admin_client.set_default_account_id(&account_id);
destroy_all_mailboxes(admin_client).await; destroy_all_mailboxes(admin_client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }

View file

@ -40,7 +40,10 @@ use reqwest::{header, redirect::Policy};
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
use store::ahash::AHashMap; use store::ahash::AHashMap;
use crate::{directory::sql::create_test_user_with_email, jmap::mailbox::destroy_all_mailboxes}; use crate::{
directory::sql::create_test_user_with_email,
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes},
};
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) { pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
println!("Running OAuth tests..."); println!("Running OAuth tests...");
@ -307,10 +310,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
// Destroy test accounts // Destroy test accounts
admin_client.set_default_account_id(john_id); admin_client.set_default_account_id(john_id);
destroy_all_mailboxes(admin_client).await; destroy_all_mailboxes(admin_client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
async fn post_bytes(url: &str, params: &AHashMap<String, String>) -> Bytes { async fn post_bytes(url: &str, params: &AHashMap<String, String>) -> Bytes {

View file

@ -30,7 +30,7 @@ use serde_json::Value;
use crate::{ use crate::{
directory::sql::create_test_user_with_email, directory::sql::create_test_user_with_email,
jmap::{jmap_json_request, mailbox::destroy_all_mailboxes}, jmap::{assert_is_empty, jmap_json_request, mailbox::destroy_all_mailboxes},
}; };
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) { pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
@ -489,8 +489,5 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
// Remove test data // Remove test data
admin_client.set_default_account_id(account_id.to_string()); admin_client.set_default_account_id(account_id.to_string());
destroy_all_mailboxes(admin_client).await; destroy_all_mailboxes(admin_client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }

View file

@ -34,7 +34,7 @@ use tokio::{
use crate::{ use crate::{
directory::sql::{create_test_user_with_email, link_test_address, remove_test_alias}, directory::sql::{create_test_user_with_email, link_test_address, remove_test_alias},
jmap::mailbox::destroy_all_mailboxes, jmap::{assert_is_empty, mailbox::destroy_all_mailboxes},
}; };
pub async fn test(server: Arc<JMAP>, client: &mut Client) { pub async fn test(server: Arc<JMAP>, client: &mut Client) {
@ -248,10 +248,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
client.set_default_account_id(account_id); client.set_default_account_id(account_id);
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
} }
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
pub struct SmtpConnection { pub struct SmtpConnection {

View file

@ -34,6 +34,8 @@ use store::{
write::{log::ChangeLogBuilder, BatchBuilder}, write::{log::ChangeLogBuilder, BatchBuilder},
}; };
use crate::jmap::assert_is_empty;
pub async fn test(server: Arc<JMAP>, client: &mut Client) { pub async fn test(server: Arc<JMAP>, client: &mut Client) {
println!("Running Email Changes tests..."); println!("Running Email Changes tests...");
@ -315,10 +317,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
assert_eq!(created, vec![2, 3, 11, 12]); assert_eq!(created, vec![2, 3, 11, 12]);
assert_eq!(changes.updated(), Vec::<String>::new()); assert_eq!(changes.updated(), Vec::<String>::new());
assert_eq!(changes.destroyed(), Vec::<String>::new()); assert_eq!(changes.destroyed(), Vec::<String>::new());
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]

View file

@ -27,7 +27,7 @@ use jmap::JMAP;
use jmap_client::{client::Client, mailbox::Role}; use jmap_client::{client::Client, mailbox::Role};
use jmap_proto::types::id::Id; use jmap_proto::types::id::Id;
use crate::jmap::mailbox::destroy_all_mailboxes; use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
pub async fn test(server: Arc<JMAP>, client: &mut Client) { pub async fn test(server: Arc<JMAP>, client: &mut Client) {
println!("Running Email Copy tests..."); println!("Running Email Copy tests...");
@ -116,8 +116,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
client.set_default_account_id(Id::new(2).to_string()); client.set_default_account_id(Id::new(2).to_string());
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }

View file

@ -31,7 +31,7 @@ use jmap_client::{
use jmap_proto::types::id::Id; use jmap_proto::types::id::Id;
use mail_parser::HeaderName; use mail_parser::HeaderName;
use crate::jmap::{mailbox::destroy_all_mailboxes, replace_blob_ids}; use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, replace_blob_ids};
pub async fn test(server: Arc<JMAP>, client: &mut Client) { pub async fn test(server: Arc<JMAP>, client: &mut Client) {
println!("Running Email Get tests..."); println!("Running Email Get tests...");
@ -177,11 +177,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
} }
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
assert_is_empty(server).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
pub fn all_headers() -> Vec<email::Property> { pub fn all_headers() -> Vec<email::Property> {

View file

@ -31,7 +31,9 @@ use jmap_client::{
}; };
use jmap_proto::types::id::Id; use jmap_proto::types::id::Id;
use crate::jmap::{email_get::all_headers, mailbox::destroy_all_mailboxes, replace_blob_ids}; use crate::jmap::{
assert_is_empty, email_get::all_headers, mailbox::destroy_all_mailboxes, replace_blob_ids,
};
pub async fn test(server: Arc<JMAP>, client: &mut Client) { pub async fn test(server: Arc<JMAP>, client: &mut Client) {
println!("Running Email Parse tests..."); println!("Running Email Parse tests...");
@ -243,9 +245,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
} }
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
assert_is_empty(server).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }

View file

@ -24,7 +24,7 @@
use std::{collections::hash_map::Entry, sync::Arc, time::Instant}; use std::{collections::hash_map::Entry, sync::Arc, time::Instant};
use crate::{ use crate::{
jmap::mailbox::destroy_all_mailboxes, jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, wait_for_index},
store::{deflate_artwork_data, query::FIELDS}, store::{deflate_artwork_data, query::FIELDS},
}; };
use jmap::JMAP; use jmap::JMAP;
@ -94,6 +94,9 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client, insert: bool) {
"thread {} found", "thread {} found",
MAX_THREADS MAX_THREADS
); );
// Wait for indexing to complete
wait_for_index(&server).await;
} }
println!("Running JMAP Mail query tests..."); println!("Running JMAP Mail query tests...");
@ -115,10 +118,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client, insert: bool) {
.unwrap(); .unwrap();
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
pub async fn query(client: &mut Client) { pub async fn query(client: &mut Client) {

View file

@ -37,6 +37,7 @@ use store::{
}; };
use crate::jmap::{ use crate::jmap::{
assert_is_empty,
email_changes::{LogAction, ParseState}, email_changes::{LogAction, ParseState},
mailbox::destroy_all_mailboxes, mailbox::destroy_all_mailboxes,
}; };
@ -287,10 +288,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
} }
server.store.write(batch.build_batch()).await.unwrap(); server.store.write(batch.build_batch()).await.unwrap();
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]

View file

@ -23,7 +23,7 @@
use std::{fs, path::PathBuf, sync::Arc}; use std::{fs, path::PathBuf, sync::Arc};
use crate::jmap::mailbox::destroy_all_mailboxes; use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, wait_for_index};
use jmap::{mailbox::INBOX_ID, JMAP}; use jmap::{mailbox::INBOX_ID, JMAP};
use jmap_client::{client::Client, core::query, email::query::Filter}; use jmap_client::{client::Client, core::query, email::query::Filter};
use jmap_proto::types::id::Id; use jmap_proto::types::id::Id;
@ -64,6 +64,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
.take_id(); .take_id();
email_ids.insert(email_name, email_id); email_ids.insert(email_name, email_id);
} }
wait_for_index(&server).await;
// Run tests // Run tests
for (filter, email_name, snippet_subject, snippet_preview) in [ for (filter, email_name, snippet_subject, snippet_preview) in [
@ -179,8 +180,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
// Destroy test data // Destroy test data
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }

View file

@ -23,7 +23,7 @@
use std::{fs, path::PathBuf, sync::Arc}; use std::{fs, path::PathBuf, sync::Arc};
use crate::jmap::mailbox::destroy_all_mailboxes; use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
use jmap::{mailbox::INBOX_ID, JMAP}; use jmap::{mailbox::INBOX_ID, JMAP};
use jmap_client::{ use jmap_client::{
client::Client, client::Client,
@ -46,11 +46,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
update(client, &mailbox_id).await; update(client, &mailbox_id).await;
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
assert_is_empty(server).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
async fn create(client: &mut Client, mailbox_id: &str) { async fn create(client: &mut Client, mailbox_id: &str) {

View file

@ -46,7 +46,7 @@ use tokio::{
use crate::{ use crate::{
directory::sql::create_test_user_with_email, directory::sql::create_test_user_with_email,
jmap::{email_set::assert_email_properties, mailbox::destroy_all_mailboxes}, jmap::{assert_is_empty, email_set::assert_email_properties, mailbox::destroy_all_mailboxes},
}; };
#[derive(Default, Debug, PartialEq, Eq)] #[derive(Default, Debug, PartialEq, Eq)]
@ -471,10 +471,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
client.email_submission_destroy(&id).await.unwrap(); client.email_submission_destroy(&id).await.unwrap();
} }
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
pub fn spawn_mock_smtp_server() -> (mpsc::Receiver<MockMessage>, Arc<Mutex<MockSMTPSettings>>) { pub fn spawn_mock_smtp_server() -> (mpsc::Receiver<MockMessage>, Arc<Mutex<MockSMTPSettings>>) {

View file

@ -25,7 +25,10 @@ use std::{sync::Arc, time::Duration};
use crate::{ use crate::{
directory::sql::create_test_user_with_email, directory::sql::create_test_user_with_email,
jmap::{delivery::SmtpConnection, mailbox::destroy_all_mailboxes, test_account_login}, jmap::{
assert_is_empty, delivery::SmtpConnection, mailbox::destroy_all_mailboxes,
test_account_login,
},
}; };
use futures::StreamExt; use futures::StreamExt;
use jmap::{mailbox::INBOX_ID, JMAP}; use jmap::{mailbox::INBOX_ID, JMAP};
@ -130,10 +133,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
assert_ping(&mut event_rx).await; assert_ping(&mut event_rx).await;
destroy_all_mailboxes(admin_client).await; destroy_all_mailboxes(admin_client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
async fn assert_state( async fn assert_state(

View file

@ -37,6 +37,8 @@ use jmap_proto::types::{id::Id, state::State};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use store::ahash::AHashMap; use store::ahash::AHashMap;
use crate::jmap::assert_is_empty;
pub async fn test(server: Arc<JMAP>, client: &mut Client) { pub async fn test(server: Arc<JMAP>, client: &mut Client) {
println!("Running Mailbox tests..."); println!("Running Mailbox tests...");
@ -606,10 +608,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
client.set_default_account_id(Id::from(1u64)); client.set_default_account_id(Id::from(1u64));
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
async fn create_test_mailboxes(client: &mut Client) -> AHashMap<String, String> { async fn create_test_mailboxes(client: &mut Client) -> AHashMap<String, String> {

View file

@ -25,7 +25,11 @@ use std::{sync::Arc, time::Duration};
use base64::{engine::general_purpose, Engine}; use base64::{engine::general_purpose, Engine};
use directory::config::ConfigDirectory; use directory::config::ConfigDirectory;
use jmap::{api::JmapSessionManager, services::IPC_CHANNEL_BUFFER, JMAP}; use jmap::{
api::JmapSessionManager,
services::{housekeeper::Event, IPC_CHANNEL_BUFFER},
JMAP,
};
use jmap_client::client::{Client, Credentials}; use jmap_client::client::{Client, Credentials};
use jmap_proto::types::id::Id; use jmap_proto::types::id::Id;
use reqwest::header; use reqwest::header;
@ -222,17 +226,23 @@ refresh-token-renew = "2s"
#[tokio::test] #[tokio::test]
pub async fn jmap_tests() { pub async fn jmap_tests() {
let coco = 1; /*let level = "warn";
tracing::subscriber::set_global_default( tracing::subscriber::set_global_default(
tracing_subscriber::FmtSubscriber::builder() tracing_subscriber::FmtSubscriber::builder()
.with_max_level(tracing::Level::WARN) .with_env_filter(
tracing_subscriber::EnvFilter::builder()
.parse(
format!("smtp={level},imap={level},jmap={level},store={level},utils={level},directory={level}"),
)
.unwrap(),
)
.finish(), .finish(),
) )
.unwrap(); .unwrap();*/
let delete = true; let delete = true;
let mut params = init_jmap_tests(delete).await; let mut params = init_jmap_tests(delete).await;
/*email_query::test(params.server.clone(), &mut params.client, delete).await; email_query::test(params.server.clone(), &mut params.client, delete).await;
email_get::test(params.server.clone(), &mut params.client).await; email_get::test(params.server.clone(), &mut params.client).await;
email_set::test(params.server.clone(), &mut params.client).await; email_set::test(params.server.clone(), &mut params.client).await;
email_parse::test(params.server.clone(), &mut params.client).await; email_parse::test(params.server.clone(), &mut params.client).await;
@ -254,7 +264,7 @@ pub async fn jmap_tests() {
email_submission::test(params.server.clone(), &mut params.client).await; email_submission::test(params.server.clone(), &mut params.client).await;
websocket::test(params.server.clone(), &mut params.client).await; websocket::test(params.server.clone(), &mut params.client).await;
quota::test(params.server.clone(), &mut params.client).await; quota::test(params.server.clone(), &mut params.client).await;
crypto::test(params.server.clone(), &mut params.client).await;*/ crypto::test(params.server.clone(), &mut params.client).await;
blob::test(params.server.clone(), &mut params.client).await; blob::test(params.server.clone(), &mut params.client).await;
if delete { if delete {
@ -285,6 +295,33 @@ struct JMAPTest {
shutdown_tx: watch::Sender<bool>, shutdown_tx: watch::Sender<bool>,
} }
pub async fn wait_for_index(server: &JMAP) {
loop {
let (tx, rx) = tokio::sync::oneshot::channel();
server
.housekeeper_tx
.send(Event::IndexIsActive(tx))
.await
.unwrap();
if rx.await.unwrap() {
tokio::time::sleep(Duration::from_millis(100)).await;
} else {
break;
}
}
}
pub async fn assert_is_empty(server: Arc<JMAP>) {
// Wait for pending FTS index tasks
wait_for_index(&server).await;
// Assert is empty
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
}
async fn init_jmap_tests(delete_if_exists: bool) -> JMAPTest { async fn init_jmap_tests(delete_if_exists: bool) -> JMAPTest {
// Load and parse config // Load and parse config
let temp_dir = TempDir::new("jmap_tests", delete_if_exists); let temp_dir = TempDir::new("jmap_tests", delete_if_exists);

View file

@ -53,7 +53,7 @@ use utils::listener::SessionData;
use crate::{ use crate::{
add_test_certs, add_test_certs,
directory::sql::create_test_user_with_email, directory::sql::create_test_user_with_email,
jmap::{mailbox::destroy_all_mailboxes, test_account_login}, jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, test_account_login},
}; };
const SERVER: &str = " const SERVER: &str = "
@ -218,11 +218,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
expect_nothing(&mut event_rx).await; expect_nothing(&mut event_rx).await;
destroy_all_mailboxes(admin_client).await; destroy_all_mailboxes(admin_client).await;
assert_is_empty(server).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
#[derive(Clone)] #[derive(Clone)]

View file

@ -26,8 +26,8 @@ use std::sync::Arc;
use crate::{ use crate::{
directory::sql::{add_to_group, create_test_user_with_email, set_test_quota}, directory::sql::{add_to_group, create_test_user_with_email, set_test_quota},
jmap::{ jmap::{
delivery::SmtpConnection, jmap_raw_request, mailbox::destroy_all_mailboxes, assert_is_empty, delivery::SmtpConnection, jmap_raw_request,
test_account_login, mailbox::destroy_all_mailboxes, test_account_login,
}, },
}; };
use jmap::{blob::upload::DISABLE_UPLOAD_QUOTA, mailbox::INBOX_ID, JMAP}; use jmap::{blob::upload::DISABLE_UPLOAD_QUOTA, mailbox::INBOX_ID, JMAP};
@ -320,10 +320,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
admin_client.set_default_account_id(account_id.to_string()); admin_client.set_default_account_id(account_id.to_string());
destroy_all_mailboxes(admin_client).await; destroy_all_mailboxes(admin_client).await;
} }
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
fn assert_over_quota<T: std::fmt::Debug>(result: Result<T, jmap_client::Error>) { fn assert_over_quota<T: std::fmt::Debug>(result: Result<T, jmap_client::Error>) {

View file

@ -40,6 +40,7 @@ use std::{
use crate::{ use crate::{
directory::sql::create_test_user_with_email, directory::sql::create_test_user_with_email,
jmap::{ jmap::{
assert_is_empty,
delivery::SmtpConnection, delivery::SmtpConnection,
email_submission::{assert_message_delivery, spawn_mock_smtp_server, MockMessage}, email_submission::{assert_message_delivery, spawn_mock_smtp_server, MockMessage},
mailbox::destroy_all_mailboxes, mailbox::destroy_all_mailboxes,
@ -486,10 +487,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
client.sieve_script_destroy(&id).await.unwrap(); client.sieve_script_destroy(&id).await.unwrap();
} }
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
fn get_script(name: &str) -> Vec<u8> { fn get_script(name: &str) -> Vec<u8> {

View file

@ -34,6 +34,8 @@ use jmap_client::{
use jmap_proto::types::{collection::Collection, id::Id, property::Property}; use jmap_proto::types::{collection::Collection, id::Id, property::Property};
use store::rand::{self, Rng}; use store::rand::{self, Rng};
use super::assert_is_empty;
const TEST_USER_ID: u32 = 1; const TEST_USER_ID: u32 = 1;
const NUM_PASSES: usize = 1; const NUM_PASSES: usize = 1;
@ -254,11 +256,7 @@ async fn email_tests(server: Arc<JMAP>, client: Arc<Client>) {
} }
destroy_all_mailboxes(&client).await; destroy_all_mailboxes(&client).await;
assert_is_empty(server.clone()).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
} }
@ -331,10 +329,7 @@ async fn mailbox_tests(server: Arc<JMAP>, client: Arc<Client>) {
join_all(futures).await; join_all(futures).await;
destroy_all_mailboxes(&client).await; destroy_all_mailboxes(&client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
async fn create_mailbox(client: &Client, mailbox: &str) -> Vec<String> { async fn create_mailbox(client: &Client, mailbox: &str) -> Vec<String> {

View file

@ -23,7 +23,7 @@
use std::sync::Arc; use std::sync::Arc;
use crate::jmap::mailbox::destroy_all_mailboxes; use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
use jmap::JMAP; use jmap::JMAP;
use jmap_client::{client::Client, mailbox::Role}; use jmap_client::{client::Client, mailbox::Role};
use jmap_proto::types::id::Id; use jmap_proto::types::id::Id;
@ -66,8 +66,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
); );
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }

View file

@ -23,7 +23,7 @@
use std::sync::Arc; use std::sync::Arc;
use crate::jmap::mailbox::destroy_all_mailboxes; use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
use jmap::JMAP; use jmap::JMAP;
use jmap_client::{client::Client, email, mailbox::Role}; use jmap_client::{client::Client, email, mailbox::Role};
use jmap_proto::types::id::Id; use jmap_proto::types::id::Id;
@ -203,10 +203,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
} }
} }
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
fn build_message(message: usize, in_reply_to: Option<usize>, thread_num: usize) -> String { fn build_message(message: usize, in_reply_to: Option<usize>, thread_num: usize) -> String {

View file

@ -30,6 +30,7 @@ use std::{sync::Arc, time::Instant};
use crate::{ use crate::{
directory::sql::create_test_user_with_email, directory::sql::create_test_user_with_email,
jmap::{ jmap::{
assert_is_empty,
delivery::SmtpConnection, delivery::SmtpConnection,
email_submission::{ email_submission::{
assert_message_delivery, expect_nothing, spawn_mock_smtp_server, MockMessage, assert_message_delivery, expect_nothing, spawn_mock_smtp_server, MockMessage,
@ -173,8 +174,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
// Remove test data // Remove test data
client.vacation_response_destroy().await.unwrap(); client.vacation_response_destroy().await.unwrap();
destroy_all_mailboxes(client).await; destroy_all_mailboxes(client).await;
server assert_is_empty(server).await;
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }

View file

@ -40,7 +40,7 @@ use tokio::sync::mpsc;
use crate::{ use crate::{
directory::sql::create_test_user_with_email, directory::sql::create_test_user_with_email,
jmap::{mailbox::destroy_all_mailboxes, test_account_login}, jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, test_account_login},
}; };
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) { pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
@ -125,11 +125,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
admin_client.set_default_account_id(account_id); admin_client.set_default_account_id(account_id);
destroy_all_mailboxes(admin_client).await; destroy_all_mailboxes(admin_client).await;
assert_is_empty(server).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
} }
async fn expect_response( async fn expect_response(

View file

@ -35,37 +35,12 @@ pub async fn test(db: Store) {
test_1(db.clone()).await; test_1(db.clone()).await;
test_2(db.clone()).await; test_2(db.clone()).await;
test_3(db.clone()).await; test_3(db).await;
test_4(db).await;
ID_ASSIGNMENT_EXPIRY.store(60 * 60, std::sync::atomic::Ordering::Relaxed); ID_ASSIGNMENT_EXPIRY.store(60 * 60, std::sync::atomic::Ordering::Relaxed);
} }
async fn test_1(db: Store) { async fn test_1(db: Store) {
// Test change id assignment
let mut handles = Vec::new();
let mut expected_ids = HashSet::new();
// Create 100 change ids concurrently
for id in 0..100 {
handles.push({
let db = db.clone();
tokio::spawn(async move { db.assign_change_id(0).await })
});
expected_ids.insert(id);
}
for handle in handles {
let assigned_id = handle.await.unwrap().unwrap();
assert!(
expected_ids.remove(&assigned_id),
"already assigned or invalid: {assigned_id} "
);
}
db.destroy().await;
}
async fn test_2(db: Store) {
// Test document id assignment // Test document id assignment
for wait_for_expiry in [true, false] { for wait_for_expiry in [true, false] {
let mut handles = Vec::new(); let mut handles = Vec::new();
@ -101,7 +76,7 @@ async fn test_2(db: Store) {
db.destroy().await; db.destroy().await;
} }
async fn test_3(db: Store) { async fn test_2(db: Store) {
// Create document ids and try reassigning // Create document ids and try reassigning
let mut expected_ids = AHashSet::new(); let mut expected_ids = AHashSet::new();
let mut batch = BatchBuilder::new(); let mut batch = BatchBuilder::new();
@ -132,7 +107,7 @@ async fn test_3(db: Store) {
db.destroy().await; db.destroy().await;
} }
async fn test_4(db: Store) { async fn test_3(db: Store) {
// Try reassigning deleted ids // Try reassigning deleted ids
let mut expected_ids = AHashSet::new(); let mut expected_ids = AHashSet::new();
let mut batch = BatchBuilder::new(); let mut batch = BatchBuilder::new();

View file

@ -25,7 +25,7 @@ pub mod assign_id;
pub mod blob; pub mod blob;
pub mod query; pub mod query;
use std::{io::Read, sync::Arc}; use std::io::Read;
use ::store::Store; use ::store::Store;
@ -56,8 +56,8 @@ pub async fn store_tests() {
if insert { if insert {
db.destroy().await; db.destroy().await;
} }
assign_id::test(db.clone()).await; query::test(db.clone(), insert).await;
query::test(db, insert).await; assign_id::test(db).await;
temp_dir.delete(); temp_dir.delete();
} }

View file

@ -22,13 +22,20 @@
*/ */
use std::{ use std::{
fmt::Display,
sync::{Arc, Mutex}, sync::{Arc, Mutex},
time::Instant, time::Instant,
}; };
use jmap_proto::types::keyword::Keyword; use jmap_proto::types::keyword::Keyword;
use nlp::language::Language; use nlp::language::Language;
use store::{ahash::AHashMap, query::sort::Pagination, write::ValueClass}; use store::{
ahash::AHashMap,
fts::{index::FtsDocument, Field, FtsFilter},
query::sort::Pagination,
write::ValueClass,
FtsStore,
};
use store::{ use store::{
query::{Comparator, Filter}, query::{Comparator, Filter},
@ -93,9 +100,34 @@ const FIELDS_OPTIONS: [FieldType; 20] = [
FieldType::Text, // "url", FieldType::Text, // "url",
]; ];
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct FieldId(u8);
impl From<FieldId> for u8 {
fn from(field_id: FieldId) -> Self {
field_id.0
}
}
impl Display for FieldId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} ({})", FIELDS[self.0 as usize], self.0)
}
}
impl FieldId {
pub fn new(field_id: u8) -> Field<FieldId> {
Field::Header(Self(field_id))
}
pub fn inner(&self) -> u8 {
self.0
}
}
#[allow(clippy::mutex_atomic)] #[allow(clippy::mutex_atomic)]
pub async fn test(db: Store, do_insert: bool) { pub async fn test(db: Store, do_insert: bool) {
println!("Running Store query tests..."); println!("Running Store query tests...");
let fts_store = FtsStore::from(db.clone());
let pool = rayon::ThreadPoolBuilder::new() let pool = rayon::ThreadPoolBuilder::new()
.num_threads(8) .num_threads(8)
@ -116,7 +148,10 @@ pub async fn test(db: Store, do_insert: bool) {
let documents = documents.clone(); let documents = documents.clone();
s.spawn_fifo(move |_| { s.spawn_fifo(move |_| {
/*let mut fts_builder = FtsIndexBuilder::with_default_language(Language::English); let mut fts_builder = FtsDocument::with_default_language(Language::English)
.with_account_id(0)
.with_collection(COLLECTION_ID)
.with_document_id(document_id as u32);
let mut builder = BatchBuilder::new(); let mut builder = BatchBuilder::new();
builder builder
.with_account_id(0) .with_account_id(0)
@ -137,7 +172,7 @@ pub async fn test(db: Store, do_insert: bool) {
FieldType::FullText => { FieldType::FullText => {
if !field.is_empty() { if !field.is_empty() {
fts_builder.index( fts_builder.index(
field_id, FieldId::new(field_id),
field.to_lowercase(), field.to_lowercase(),
Language::English, Language::English,
); );
@ -165,8 +200,10 @@ pub async fn test(db: Store, do_insert: bool) {
} }
} }
builder.custom(fts_builder); documents
documents.lock().unwrap().push(builder.build());*/ .lock()
.unwrap()
.push((builder.build(), fts_builder));
}); });
} }
}); });
@ -180,22 +217,31 @@ pub async fn test(db: Store, do_insert: bool) {
let now = Instant::now(); let now = Instant::now();
let batches = documents.lock().unwrap().drain(..).collect::<Vec<_>>(); let batches = documents.lock().unwrap().drain(..).collect::<Vec<_>>();
let mut chunk = Vec::new(); let mut chunk = Vec::new();
let mut fts_chunk = Vec::new();
for batch in batches { for (batch, fts_batch) in batches {
let chunk_instance = Instant::now(); let chunk_instance = Instant::now();
chunk.push({ chunk.push({
let db = db.clone(); let db = db.clone();
tokio::spawn(async move { db.write(batch).await }) tokio::spawn(async move { db.write(batch).await })
}); });
fts_chunk.push({
let fts_store = fts_store.clone();
tokio::spawn(async move { fts_store.index(fts_batch).await })
});
if chunk.len() == 1000 { if chunk.len() == 1000 {
for handle in chunk { for handle in chunk {
handle.await.unwrap().unwrap(); handle.await.unwrap().unwrap();
} }
for handle in fts_chunk {
handle.await.unwrap().unwrap();
}
println!( println!(
"Chunk insert took {} ms.", "Store insert took {} ms.",
chunk_instance.elapsed().as_millis() chunk_instance.elapsed().as_millis()
); );
chunk = Vec::new(); chunk = Vec::new();
fts_chunk = Vec::new();
} }
} }
@ -209,156 +255,232 @@ pub async fn test(db: Store, do_insert: bool) {
} }
println!("Running filter tests..."); println!("Running filter tests...");
test_filter(db.clone()).await; test_filter(db.clone(), fts_store).await;
println!("Running sort tests..."); println!("Running sort tests...");
test_sort(db).await; test_sort(db).await;
} }
pub async fn test_filter(db: Store) { pub async fn test_filter(db: Store, fts: FtsStore) {
/* let mut fields = AHashMap::default();
let mut fields = AHashMap::default(); let mut fields_u8 = AHashMap::default();
for (field_num, field) in FIELDS.iter().enumerate() { for (field_num, field) in FIELDS.iter().enumerate() {
fields.insert(field.to_string(), field_num as u8); fields.insert(field.to_string(), FieldId::new(field_num as u8));
} fields_u8.insert(field.to_string(), field_num as u8);
}
let tests = [ let tests = [
( (
vec![ vec![
Filter::has_english_text(fields["title"], "water"), Filter::is_in_set(
Filter::eq(fields["year"], 1979u32), fts.query(
], 0,
vec!["p11293"], COLLECTION_ID,
), vec![FtsFilter::has_english_text(
( fields["title"].clone(),
vec![ "water",
Filter::has_english_text(fields["medium"], "gelatin"), )],
Filter::gt(fields["year"], 2000u32), )
Filter::lt(fields["width"], 180u32), .await
Filter::gt(fields["width"], 0u32), .unwrap(),
], ),
vec!["p79426", "p79427", "p79428", "p79429", "p79430"], Filter::eq(fields_u8["year"], 1979u32),
), ],
( vec!["p11293"],
vec![Filter::has_english_text(fields["title"], "'rustic bridge'")], ),
vec!["d05503"], (
), vec![
( Filter::is_in_set(
vec![ fts.query(
Filter::has_english_text(fields["title"], "'rustic'"), 0,
Filter::has_english_text(fields["title"], "study"), COLLECTION_ID,
], vec![FtsFilter::has_english_text(
vec!["d00399", "d05352"], fields["medium"].clone(),
), "gelatin",
( )],
vec![ )
Filter::has_text(fields["artist"], "mauro kunst", Language::None), .await
Filter::is_in_bitmap(fields["artistRole"], Keyword::Other("artist".to_string())), .unwrap(),
Filter::Or, ),
Filter::eq(fields["year"], 1969u32), Filter::gt(fields_u8["year"], 2000u32),
Filter::eq(fields["year"], 1971u32), Filter::lt(fields_u8["width"], 180u32),
Filter::End, Filter::gt(fields_u8["width"], 0u32),
], ],
vec!["p01764", "t05843"], vec!["p79426", "p79427", "p79428", "p79429", "p79430"],
), ),
( (
vec![ vec![Filter::is_in_set(
Filter::Not, fts.query(
Filter::has_english_text(fields["medium"], "oil"), 0,
Filter::End, COLLECTION_ID,
Filter::has_english_text(fields["creditLine"], "bequeath"), vec![FtsFilter::has_english_text(
Filter::Or, fields["title"].clone(),
Filter::And, "'rustic bridge'",
Filter::ge(fields["year"], 1900u32), )],
Filter::lt(fields["year"], 1910u32),
Filter::End,
Filter::And,
Filter::ge(fields["year"], 2000u32),
Filter::lt(fields["year"], 2010u32),
Filter::End,
Filter::End,
],
vec![
"n02478", "n02479", "n03568", "n03658", "n04327", "n04328", "n04721", "n04739",
"n05095", "n05096", "n05145", "n05157", "n05158", "n05159", "n05298", "n05303",
"n06070", "t01181", "t03571", "t05805", "t05806", "t12147", "t12154", "t12155",
],
),
(
vec![
Filter::And,
Filter::has_text(fields["artist"], "warhol", Language::None),
Filter::Not,
Filter::has_english_text(fields["title"], "'campbell'"),
Filter::End,
Filter::Not,
Filter::Or,
Filter::gt(fields["year"], 1980u32),
Filter::And,
Filter::gt(fields["width"], 500u32),
Filter::gt(fields["height"], 500u32),
Filter::End,
Filter::End,
Filter::End,
Filter::eq(fields["acquisitionYear"], 2008u32),
Filter::End,
],
vec!["ar00039", "t12600"],
),
(
vec![
Filter::has_english_text(fields["title"], "study"),
Filter::has_english_text(fields["medium"], "paper"),
Filter::has_english_text(fields["creditLine"], "'purchased'"),
Filter::Not,
Filter::has_english_text(fields["title"], "'anatomical'"),
Filter::has_english_text(fields["title"], "'for'"),
Filter::End,
Filter::gt(fields["year"], 1900u32),
Filter::gt(fields["acquisitionYear"], 2000u32),
],
vec![
"p80042", "p80043", "p80044", "p80045", "p80203", "t11937", "t12172",
],
),
];
for (filter, expected_results) in tests {
//println!("Running test: {:?}", filter);
let docset = db.filter(0, COLLECTION_ID, filter).await.unwrap();
let sorted_docset = db
.sort(
docset,
vec![Comparator::ascending(fields["accession_number"])],
Pagination::new(0, 0, None, 0),
) )
.await .await
.unwrap(); .unwrap(),
)],
assert_eq!( vec!["d05503"],
db.get_values::<String>( ),
sorted_docset (
.ids vec![Filter::is_in_set(
.into_iter() fts.query(
.map(|document_id| ValueKey { 0,
account_id: 0, COLLECTION_ID,
collection: COLLECTION_ID, vec![
document_id: document_id as u32, FtsFilter::has_english_text(fields["title"].clone(), "'rustic'"),
family: 0, FtsFilter::has_english_text(fields["title"].clone(), "study"),
field: fields["accession_number"], ],
})
.collect()
) )
.await .await
.unwrap() .unwrap(),
.into_iter() )],
.flatten() vec!["d00399", "d05352"],
.collect::<Vec<_>>(), ),
expected_results (
); vec![
} Filter::has_text(fields_u8["artist"], "mauro kunst"),
Filter::is_in_bitmap(
fields_u8["artistRole"],
Keyword::Other("artist".to_string()),
),
Filter::Or,
Filter::eq(fields_u8["year"], 1969u32),
Filter::eq(fields_u8["year"], 1971u32),
Filter::End,
],
vec!["p01764", "t05843"],
),
(
vec![
Filter::is_in_set(
fts.query(
0,
COLLECTION_ID,
vec![
FtsFilter::Not,
FtsFilter::has_english_text(fields["medium"].clone(), "oil"),
FtsFilter::End,
FtsFilter::has_english_text(fields["creditLine"].clone(), "bequeath"),
],
)
.await
.unwrap(),
),
Filter::Or,
Filter::And,
Filter::ge(fields_u8["year"], 1900u32),
Filter::lt(fields_u8["year"], 1910u32),
Filter::End,
Filter::And,
Filter::ge(fields_u8["year"], 2000u32),
Filter::lt(fields_u8["year"], 2010u32),
Filter::End,
Filter::End,
],
vec![
"n02478", "n02479", "n03568", "n03658", "n04327", "n04328", "n04721", "n04739",
"n05095", "n05096", "n05145", "n05157", "n05158", "n05159", "n05298", "n05303",
"n06070", "t01181", "t03571", "t05805", "t05806", "t12147", "t12154", "t12155",
],
),
(
vec![
Filter::And,
Filter::has_text(fields_u8["artist"], "warhol"),
Filter::Not,
Filter::is_in_set(
fts.query(
0,
COLLECTION_ID,
vec![FtsFilter::has_english_text(
fields["title"].clone(),
"'campbell'",
)],
)
.await
.unwrap(),
),
Filter::End,
Filter::Not,
Filter::Or,
Filter::gt(fields_u8["year"], 1980u32),
Filter::And,
Filter::gt(fields_u8["width"], 500u32),
Filter::gt(fields_u8["height"], 500u32),
Filter::End,
Filter::End,
Filter::End,
Filter::eq(fields_u8["acquisitionYear"], 2008u32),
Filter::End,
],
vec!["ar00039", "t12600"],
),
(
vec![
Filter::is_in_set(
fts.query(
0,
COLLECTION_ID,
vec![
FtsFilter::has_english_text(fields["title"].clone(), "study"),
FtsFilter::has_english_text(fields["medium"].clone(), "paper"),
FtsFilter::has_english_text(
fields["creditLine"].clone(),
"'purchased'",
),
FtsFilter::Not,
FtsFilter::has_english_text(fields["title"].clone(), "'anatomical'"),
FtsFilter::has_english_text(fields["title"].clone(), "'for'"),
FtsFilter::End,
],
)
.await
.unwrap(),
),
Filter::gt(fields_u8["year"], 1900u32),
Filter::gt(fields_u8["acquisitionYear"], 2000u32),
],
vec![
"p80042", "p80043", "p80044", "p80045", "p80203", "t11937", "t12172",
],
),
];
*/ for (filter, expected_results) in tests {
//println!("Running test: {:?}", filter);
let docset = db.filter(0, COLLECTION_ID, filter).await.unwrap();
let sorted_docset = db
.sort(
docset,
vec![Comparator::ascending(fields_u8["accession_number"])],
Pagination::new(0, 0, None, 0),
)
.await
.unwrap();
assert_eq!(
db.get_values::<String>(
sorted_docset
.ids
.into_iter()
.map(|document_id| ValueKey {
account_id: 0,
collection: COLLECTION_ID,
document_id: document_id as u32,
class: ValueClass::Property(fields_u8["accession_number"])
})
.collect()
)
.await
.unwrap()
.into_iter()
.flatten()
.collect::<Vec<_>>(),
expected_results
);
}
} }
pub async fn test_sort(db: Store) { pub async fn test_sort(db: Store) {