FTS storage implementation and background indexing

This commit is contained in:
mdecimus 2023-11-17 14:59:09 +01:00
parent a3e6d152c9
commit bcc05340b2
88 changed files with 3105 additions and 2214 deletions

3
Cargo.lock generated
View file

@ -2306,6 +2306,7 @@ dependencies = [
"chrono",
"jmap_proto",
"mail-parser",
"store",
"tokio",
]
@ -5131,6 +5132,7 @@ dependencies = [
"futures",
"lazy_static",
"lru-cache",
"lz4_flex",
"nlp",
"num_cpus",
"parking_lot",
@ -5923,6 +5925,7 @@ dependencies = [
"opentelemetry-otlp",
"opentelemetry-semantic-conventions",
"privdrop",
"rand 0.8.5",
"rustls 0.21.7",
"rustls-pemfile",
"serde",

View file

@ -6,6 +6,7 @@ resolver = "2"
[dependencies]
jmap_proto = { path = "../jmap-proto" }
store = { path = "../store" }
mail-parser = { git = "https://github.com/stalwartlabs/mail-parser", features = ["full_encoding", "serde_support", "ludicrous_mode"] }
ahash = { version = "0.8" }
chrono = { version = "0.4"}

View file

@ -21,6 +21,8 @@
* for more details.
*/
use store::fts::{FilterItem, FilterType};
use super::{quoted_string, serialize_sequence, Flag, Sequence};
#[derive(Debug, Clone, PartialEq, Eq)]
@ -129,6 +131,38 @@ pub enum Filter {
ThreadId(String),
}
impl FilterItem for Filter {
fn filter_type(&self) -> FilterType {
match self {
Filter::From(_)
| Filter::To(_)
| Filter::Cc(_)
| Filter::Bcc(_)
| Filter::Subject(_)
| Filter::Body(_)
| Filter::Text(_)
| Filter::Header(_, _) => FilterType::Fts,
Filter::And => FilterType::And,
Filter::Or => FilterType::Or,
Filter::Not => FilterType::Not,
Filter::End => FilterType::End,
_ => FilterType::Store,
}
}
}
impl From<FilterType> for Filter {
fn from(value: FilterType) -> Self {
match value {
FilterType::And => Filter::And,
FilterType::Or => Filter::Or,
FilterType::Not => Filter::Not,
FilterType::End => Filter::End,
_ => unreachable!(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ModSeqEntry {
Shared(Flag),

View file

@ -36,6 +36,7 @@ use jmap_proto::types::{collection::Collection, id::Id, keyword::Keyword, proper
use mail_parser::HeaderName;
use nlp::language::Language;
use store::{
fts::{Field, FilterGroup, FtsFilter, IntoFilterGroup},
query::{self, log::Query, sort::Pagination, ResultSet},
roaring::RoaringBitmap,
write::now,
@ -275,8 +276,153 @@ impl SessionData {
// Convert query
let mut include_highest_modseq = false;
for filter in imap_filter {
match filter {
for filter_group in imap_filter.into_filter_group() {
match filter_group {
FilterGroup::Fts(conds) => {
let mut fts_filters = Vec::with_capacity(filters.len());
for cond in conds {
match cond {
search::Filter::Bcc(text) => {
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Bcc),
text,
Language::None,
));
}
search::Filter::Body(text) => {
fts_filters.push(FtsFilter::has_text_detect(
Field::Body,
text,
self.jmap.config.default_language,
));
}
search::Filter::Cc(text) => {
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Cc),
text,
Language::None,
));
}
search::Filter::From(text) => {
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::From),
text,
Language::None,
));
}
search::Filter::Header(header, value) => {
match HeaderName::parse(header) {
Some(HeaderName::Other(header_name)) => {
return Err(StatusResponse::no(format!(
"Querying header '{header_name}' is not supported.",
)));
}
Some(header_name) => {
if !value.is_empty() {
if matches!(
header_name,
HeaderName::MessageId
| HeaderName::InReplyTo
| HeaderName::References
| HeaderName::ResentMessageId
) {
fts_filters.push(FtsFilter::has_keyword(
Field::Header(header_name),
value,
));
} else {
fts_filters.push(FtsFilter::has_text(
Field::Header(header_name),
value,
Language::None,
));
}
} else {
fts_filters.push(FtsFilter::has_keyword(
Field::Keyword,
header_name.as_str().to_lowercase(),
));
}
}
None => (),
}
}
search::Filter::Subject(text) => {
fts_filters.push(FtsFilter::has_text_detect(
Field::Header(HeaderName::Subject),
text,
self.jmap.config.default_language,
));
}
search::Filter::Text(text) => {
fts_filters.push(FtsFilter::Or);
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::From),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::To),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Cc),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Bcc),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Header(HeaderName::Subject),
&text,
self.jmap.config.default_language,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Body,
&text,
self.jmap.config.default_language,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Attachment,
text,
self.jmap.config.default_language,
));
fts_filters.push(FtsFilter::End);
}
search::Filter::To(text) => {
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::To),
text,
Language::None,
));
}
search::Filter::And => {
fts_filters.push(FtsFilter::And);
}
search::Filter::Or => {
fts_filters.push(FtsFilter::Or);
}
search::Filter::Not => {
fts_filters.push(FtsFilter::Not);
}
search::Filter::End => {
fts_filters.push(FtsFilter::End);
}
_ => (),
}
}
filters.push(query::Filter::is_in_set(
self.jmap
.fts_filter(mailbox.id.account_id, Collection::Email, fts_filters)
.await?,
));
}
FilterGroup::Store(cond) => match cond {
search::Filter::Sequence(sequence, uid_filter) => {
let mut set = RoaringBitmap::new();
if let (Sequence::SavedSearch, Some(prev_saved_search)) =
@ -312,22 +458,9 @@ impl SessionData {
Keyword::Answered,
));
}
/*search::Filter::Bcc(text) => {
filters.push(query::Filter::has_text(Property::Bcc, text, Language::None));
}
search::Filter::Before(date) => {
filters.push(query::Filter::lt(Property::ReceivedAt, date as u64));
}
search::Filter::Body(text) => {
filters.push(query::Filter::has_text_detect(
Property::TextBody,
text,
self.jmap.config.default_language,
));
}
search::Filter::Cc(text) => {
filters.push(query::Filter::has_text(Property::Cc, text, Language::None));
}
search::Filter::Deleted => {
filters.push(query::Filter::is_in_bitmap(
Property::Keywords,
@ -346,73 +479,6 @@ impl SessionData {
Keyword::Flagged,
));
}
search::Filter::From(text) => {
filters.push(query::Filter::has_text(
Property::From,
text,
Language::None,
));
}
search::Filter::Header(header, value) => match HeaderName::parse(&header) {
Some(HeaderName::Other(_)) | None => {
return Err(StatusResponse::no(format!(
"Querying non-RFC header '{header}' is not allowed.",
)));
}
Some(header_name) => {
let is_id = matches!(
header_name,
HeaderName::MessageId
| HeaderName::InReplyTo
| HeaderName::References
| HeaderName::ResentMessageId
);
let tokens = if !value.is_empty() {
let header_num = header_name.id().to_string();
value
.split_ascii_whitespace()
.filter_map(|token| {
if token.len() < MAX_TOKEN_LENGTH {
if is_id {
format!("{header_num}{token}")
} else {
format!("{header_num}{}", token.to_lowercase())
}
.into()
} else {
None
}
})
.collect::<Vec<_>>()
} else {
vec![]
};
match tokens.len() {
0 => {
filters.push(query::Filter::has_raw_text(
Property::Headers,
header_name.id().to_string(),
));
}
1 => {
filters.push(query::Filter::has_raw_text(
Property::Headers,
tokens.into_iter().next().unwrap(),
));
}
_ => {
filters.push(query::Filter::And);
for token in tokens {
filters.push(query::Filter::has_raw_text(
Property::Headers,
token,
));
}
filters.push(query::Filter::End);
}
}
}
},
search::Filter::Keyword(keyword) => {
filters.push(query::Filter::is_in_bitmap(
Property::Keywords,
@ -455,47 +521,6 @@ impl SessionData {
search::Filter::Smaller(size) => {
filters.push(query::Filter::lt(Property::Size, size));
}
search::Filter::Subject(text) => {
filters.push(query::Filter::has_text_detect(
Property::Subject,
text,
self.jmap.config.default_language,
));
}
search::Filter::Text(text) => {
filters.push(query::Filter::Or);
filters.push(query::Filter::has_text(
Property::From,
&text,
Language::None,
));
filters.push(query::Filter::has_text(Property::To, &text, Language::None));
filters.push(query::Filter::has_text(Property::Cc, &text, Language::None));
filters.push(query::Filter::has_text(
Property::Bcc,
&text,
Language::None,
));
filters.push(query::Filter::has_text_detect(
Property::Subject,
&text,
self.jmap.config.default_language,
));
filters.push(query::Filter::has_text_detect(
Property::TextBody,
&text,
self.jmap.config.default_language,
));
filters.push(query::Filter::has_text_detect(
Property::Attachments,
text,
self.jmap.config.default_language,
));
filters.push(query::Filter::End);
}
search::Filter::To(text) => {
filters.push(query::Filter::has_text(Property::To, text, Language::None));
}*/
search::Filter::Unanswered => {
filters.push(query::Filter::Not);
filters.push(query::Filter::is_in_bitmap(
@ -640,6 +665,7 @@ impl SessionData {
}
}
_ => (),
},
}
}

View file

@ -23,6 +23,8 @@
use std::fmt::Display;
use store::fts::{FilterItem, FilterType, FtsFilter};
use crate::{
error::method::MethodError,
object::{email, mailbox},
@ -785,3 +787,47 @@ impl From<Filter> for store::query::Filter {
}
}
}
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<Filter> for FtsFilter<T> {
fn from(value: Filter) -> Self {
match value {
Filter::And => Self::And,
Filter::Or => Self::Or,
Filter::Not => Self::Not,
Filter::Close => Self::End,
_ => unreachable!(),
}
}
}
impl FilterItem for Filter {
fn filter_type(&self) -> FilterType {
match self {
Filter::Text(_)
| Filter::From(_)
| Filter::To(_)
| Filter::Cc(_)
| Filter::Bcc(_)
| Filter::Subject(_)
| Filter::Body(_)
| Filter::Header(_) => FilterType::Fts,
Filter::And => FilterType::And,
Filter::Or => FilterType::Or,
Filter::Not => FilterType::Not,
Filter::Close => FilterType::End,
_ => FilterType::Store,
}
}
}
impl From<FilterType> for Filter {
fn from(value: FilterType) -> Self {
match value {
FilterType::And => Filter::And,
FilterType::Or => Filter::Or,
FilterType::Not => Filter::Not,
FilterType::End => Filter::Close,
_ => unreachable!(),
}
}
}

View file

@ -25,8 +25,8 @@ use std::{borrow::Cow, collections::HashSet};
use store::{
write::{
assert::HashedValue, BatchBuilder, BitmapClass, IntoOperations, Operation, TagValue,
TokenizeText, ValueClass, ValueOp,
assert::HashedValue, BatchBuilder, BitmapClass, BitmapHash, IntoOperations, Operation,
TagValue, TokenizeText, ValueClass, ValueOp,
},
Serialize,
};
@ -238,7 +238,7 @@ fn merge_batch(
batch.ops.push(Operation::Bitmap {
class: BitmapClass::Text {
field,
token: token.into(),
token: BitmapHash::new(token),
},
set,
});
@ -301,7 +301,7 @@ fn merge_batch(
batch.ops.push(Operation::Bitmap {
class: BitmapClass::Text {
field,
token: token.into_bytes(),
token: BitmapHash::new(token),
},
set,
});
@ -480,7 +480,7 @@ fn build_batch(
batch.ops.push(Operation::Bitmap {
class: BitmapClass::Text {
field,
token: token.into_bytes(),
token: BitmapHash::new(token),
},
set,
});
@ -512,7 +512,7 @@ fn build_batch(
batch.ops.push(Operation::Bitmap {
class: BitmapClass::Text {
field,
token: token.into_bytes(),
token: BitmapHash::new(token),
},
set,
});

View file

@ -39,6 +39,9 @@ impl JMAP {
// Delete account data
self.store.purge_account(account_id).await?;
// Remove FTS index
let todo = 1;
// Delete account
let mut batch = BatchBuilder::new();
batch

View file

@ -41,6 +41,9 @@ impl crate::Config {
changes_max_results: settings
.property("jmap.protocol.changes.max-results")?
.unwrap_or(5000),
snippet_max_results: settings
.property("jmap.protocol.search-snippet.max-results")?
.unwrap_or(100),
request_max_size: settings
.property("jmap.protocol.request.max-size")?
.unwrap_or(10000000),

View file

@ -377,6 +377,19 @@ pub async fn parse_jmap_request(
.into_http_response(),
};
}
("db", "purge", &Method::GET) => {
return match jmap.store.purge_bitmaps().await {
Ok(_) => {
JsonResponse::new(Value::String("success".into())).into_http_response()
}
Err(err) => RequestError::blank(
StatusCode::INTERNAL_SERVER_ERROR.as_u16(),
"Purge database failed",
err.to_string(),
)
.into_http_response(),
};
}
(path_1 @ ("queue" | "report"), path_2, &Method::GET) => {
return jmap
.smtp

View file

@ -33,8 +33,9 @@ impl JMAP {
.map(ChangeLogBuilder::with_change_id)
}
pub async fn assign_change_id(&self, account_id: u32) -> Result<u64, MethodError> {
self.store
pub async fn assign_change_id(&self, _: u32) -> Result<u64, MethodError> {
self.generate_snowflake_id()
/*self.store
.assign_change_id(account_id)
.await
.map_err(|err| {
@ -44,6 +45,17 @@ impl JMAP {
error = ?err,
"Failed to assign changeId.");
MethodError::ServerPartialFail
})*/
}
pub fn generate_snowflake_id(&self) -> Result<u64, MethodError> {
self.snowflake_id.generate().ok_or_else(|| {
tracing::error!(
event = "error",
context = "change_log",
"Failed to generate snowflake id."
);
MethodError::ServerPartialFail
})
}

View file

@ -53,7 +53,7 @@ use store::{
};
use utils::map::vec_map::VecMap;
use crate::{auth::AccessToken, Bincode, JMAP};
use crate::{auth::AccessToken, services::housekeeper::Event, Bincode, NamedKey, JMAP};
use super::{
index::{EmailIndexBuilder, TrimTextValue, MAX_SORT_FIELD_LENGTH},
@ -291,7 +291,7 @@ impl JMAP {
keywords: Vec<Keyword>,
received_at: Option<UTCDate>,
) -> Result<Result<IngestedEmail, SetError>, MethodError> {
// Obtain term index and metadata
// Obtain metadata
let mut metadata = if let Some(metadata) = self
.get_property::<Bincode<MessageMetadata>>(
from_account_id,
@ -405,6 +405,14 @@ impl JMAP {
.value(Property::MailboxIds, mailboxes, F_VALUE | F_BITMAP)
.value(Property::Keywords, keywords, F_VALUE | F_BITMAP)
.value(Property::Cid, changes.change_id, F_VALUE)
.set(
NamedKey::IndexEmail::<&[u8]> {
account_id,
document_id: message_id,
seq: self.generate_snowflake_id()?,
},
metadata.blob_hash.clone(),
)
.custom(EmailIndexBuilder::set(metadata))
.custom(changes);
@ -417,6 +425,9 @@ impl JMAP {
MethodError::ServerPartialFail
})?;
// Request FTS index
let _ = self.housekeeper_tx.send(Event::IndexStart).await;
Ok(Ok(email))
}
}

View file

@ -32,6 +32,8 @@ use mail_parser::{
};
use nlp::language::Language;
use store::{
backend::MAX_TOKEN_LENGTH,
fts::{index::FtsDocument, Field},
write::{BatchBuilder, BlobOp, IntoOperations, F_BITMAP, F_CLEAR, F_INDEX, F_VALUE},
BlobHash,
};
@ -60,13 +62,13 @@ pub(super) trait IndexMessage {
keywords: Vec<Keyword>,
mailbox_ids: Vec<u32>,
received_at: u64,
) -> store::Result<&mut Self>;
) -> &mut Self;
fn index_headers(&mut self, headers: &[Header<'_>], options: u32);
}
pub(super) trait IndexMessageText<'x> {
fn index_message(&mut self, message: &'x Message<'x>);
pub trait IndexMessageText<'x>: Sized {
fn index_message(self, message: &'x Message<'x>) -> Self;
}
impl IndexMessage for BatchBuilder {
@ -77,7 +79,7 @@ impl IndexMessage for BatchBuilder {
keywords: Vec<Keyword>,
mailbox_ids: Vec<u32>,
received_at: u64,
) -> store::Result<&mut Self> {
) -> &mut Self {
// Index keywords
self.value(Property::Keywords, keywords, F_VALUE | F_BITMAP);
@ -164,7 +166,7 @@ impl IndexMessage for BatchBuilder {
F_VALUE,
);
Ok(self)
self
}
fn index_headers(&mut self, headers: &[Header<'_>], options: u32) {
@ -262,9 +264,8 @@ impl IndexMessage for BatchBuilder {
}
}
/*
impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
fn index_message(&mut self, message: &'x Message<'x>) {
impl<'x> IndexMessageText<'x> for FtsDocument<'x, HeaderName<'x>> {
fn index_message(mut self, message: &'x Message<'x>) -> Self {
let mut language = Language::Unknown;
for (part_id, part) in message.parts.iter().take(MAX_MESSAGE_PARTS).enumerate() {
@ -277,9 +278,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
continue;
}
// Index hasHeader property
self.index_raw_token(Property::Headers, header.name.as_str());
self.index_keyword(Field::Keyword, header.name.as_str().to_ascii_lowercase());
match header.name {
match &header.name {
HeaderName::MessageId
| HeaderName::InReplyTo
| HeaderName::References
@ -287,45 +288,35 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
header.value.visit_text(|id| {
// Index ids without stemming
if id.len() < MAX_TOKEN_LENGTH {
let fix = "true";
self.index_raw_token(Property::MessageId, id.to_string());
self.index_keyword(
Field::Header(header.name.clone()),
id.to_string(),
);
}
});
}
HeaderName::From | HeaderName::To | HeaderName::Cc | HeaderName::Bcc => {
let property = Property::from_header(&header.name);
header.value.visit_addresses(|_, value| {
// Index an address name or email without stemming
self.index_raw(property.clone(), value.to_string());
self.index_tokenized(
Field::Header(header.name.clone()),
value.to_string(),
);
});
}
HeaderName::Subject => {
// Index subject for FTS
self.index(
Property::Subject,
match &header.value {
HeaderValue::Text(text) => text.clone(),
HeaderValue::TextList(list) if !list.is_empty() => {
list.first().unwrap().clone()
if let Some(subject) = header.value.as_text() {
self.index(Field::Header(HeaderName::Subject), subject, language);
}
_ => "".into(),
},
language,
);
}
HeaderName::Comments | HeaderName::Keywords | HeaderName::ListId => {
// Index headers
header.value.visit_text(|text| {
for token in text.split_ascii_whitespace() {
if token.len() < MAX_TOKEN_LENGTH {
let fix = "true";
self.index_raw_token(
Property::Headers,
token.to_lowercase(),
self.index_tokenized(
Field::Header(header.name.clone()),
text.to_string(),
);
}
}
});
}
_ => (),
@ -337,9 +328,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
PartType::Text(text) => {
if message.text_body.contains(&part_id) || message.html_body.contains(&part_id)
{
self.index(Property::TextBody, text.as_ref(), part_language);
self.index(Field::Body, text.as_ref(), part_language);
} else {
self.index(Property::Attachments, text.as_ref(), part_language);
self.index(Field::Attachment, text.as_ref(), part_language);
}
}
PartType::Html(html) => {
@ -347,9 +338,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
if message.text_body.contains(&part_id) || message.html_body.contains(&part_id)
{
self.index(Property::TextBody, text, part_language);
self.index(Field::Body, text, part_language);
} else {
self.index(Property::Attachments, text, part_language);
self.index(Field::Attachment, text, part_language);
}
}
PartType::Message(nested_message) => {
@ -360,21 +351,17 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
if let Some(HeaderValue::Text(subject)) =
nested_message.header(HeaderName::Subject)
{
self.index(
Property::Attachments,
subject.as_ref(),
nested_message_language,
);
self.index(Field::Attachment, subject.as_ref(), nested_message_language);
}
for sub_part in nested_message.parts.iter().take(MAX_MESSAGE_PARTS) {
let language = sub_part.language().unwrap_or(nested_message_language);
match &sub_part.body {
PartType::Text(text) => {
self.index(Property::Attachments, text.as_ref(), language);
self.index(Field::Attachment, text.as_ref(), language);
}
PartType::Html(html) => {
self.index(Property::Attachments, html_to_text(html), language);
self.index(Field::Attachment, html_to_text(html), language);
}
_ => (),
}
@ -383,9 +370,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
_ => {}
}
}
self
}
}
*/
pub struct EmailIndexBuilder<'x> {
inner: Bincode<MessageMetadata<'x>>,

View file

@ -33,6 +33,7 @@ use jmap_proto::{
use mail_parser::{
parsers::fields::thread::thread_name, HeaderName, HeaderValue, Message, PartType,
};
use store::{
ahash::AHashSet,
query::Filter,
@ -46,7 +47,8 @@ use utils::map::vec_map::VecMap;
use crate::{
email::index::{IndexMessage, MAX_ID_LENGTH},
IngestError, JMAP,
services::housekeeper::Event,
IngestError, NamedKey, JMAP,
};
use super::{
@ -237,15 +239,14 @@ impl JMAP {
IngestError::Temporary
})?;
let change_id = self
.store
.assign_change_id(params.account_id)
.await
.map_err(|err| {
.map_err(|_| {
tracing::error!(
event = "error",
context = "email_ingest",
error = ?err,
"Failed to assign changeId.");
"Failed to assign changeId."
);
IngestError::Temporary
})?;
@ -307,17 +308,19 @@ impl JMAP {
params.mailbox_ids,
params.received_at.unwrap_or_else(now),
)
.map_err(|err| {
tracing::error!(
event = "error",
context = "email_ingest",
error = ?err,
"Failed to index message.");
IngestError::Temporary
})?
.value(Property::Cid, change_id, F_VALUE)
.value(Property::ThreadId, thread_id, F_VALUE | F_BITMAP)
.custom(changes);
.custom(changes)
.set(
NamedKey::IndexEmail::<&[u8]> {
account_id: params.account_id,
document_id,
seq: self
.generate_snowflake_id()
.map_err(|_| IngestError::Temporary)?,
},
blob_id.hash.clone(),
);
self.store.write(batch.build()).await.map_err(|err| {
tracing::error!(
event = "error",
@ -327,6 +330,9 @@ impl JMAP {
IngestError::Temporary
})?;
// Request FTS index
let _ = self.housekeeper_tx.send(Event::IndexStart).await;
Ok(IngestedEmail {
id,
change_id,
@ -434,16 +440,12 @@ impl JMAP {
// Delete all but the most common threadId
let mut batch = BatchBuilder::new();
let change_id = self
.store
.assign_change_id(account_id)
.await
.map_err(|err| {
let change_id = self.assign_change_id(account_id).await.map_err(|_| {
tracing::error!(
event = "error",
context = "find_or_merge_thread",
error = ?err,
"Failed to assign changeId for thread merge.");
"Failed to assign changeId for thread merge."
);
IngestError::Temporary
})?;
let mut changes = ChangeLogBuilder::with_change_id(change_id);

View file

@ -27,7 +27,10 @@ use jmap_proto::{
object::email::QueryArguments,
types::{acl::Acl, collection::Collection, keyword::Keyword, property::Property},
};
use mail_parser::HeaderName;
use nlp::language::Language;
use store::{
fts::{Field, FilterGroup, FtsFilter, IntoFilterGroup},
query::{self},
roaring::RoaringBitmap,
write::ValueClass,
@ -45,7 +48,137 @@ impl JMAP {
let account_id = request.account_id.document_id();
let mut filters = Vec::with_capacity(request.filter.len());
for cond in std::mem::take(&mut request.filter) {
for cond_group in std::mem::take(&mut request.filter).into_filter_group() {
match cond_group {
FilterGroup::Fts(conds) => {
let mut fts_filters = Vec::with_capacity(filters.len());
for cond in conds {
match cond {
Filter::Text(text) => {
fts_filters.push(FtsFilter::Or);
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::From),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::To),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Cc),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Bcc),
&text,
Language::None,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Header(HeaderName::Subject),
&text,
self.config.default_language,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Body,
&text,
self.config.default_language,
));
fts_filters.push(FtsFilter::has_text_detect(
Field::Attachment,
text,
self.config.default_language,
));
fts_filters.push(FtsFilter::End);
}
Filter::From(text) => fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::From),
text,
Language::None,
)),
Filter::To(text) => fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::To),
text,
Language::None,
)),
Filter::Cc(text) => fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Cc),
text,
Language::None,
)),
Filter::Bcc(text) => fts_filters.push(FtsFilter::has_text(
Field::Header(HeaderName::Bcc),
text,
Language::None,
)),
Filter::Subject(text) => fts_filters.push(FtsFilter::has_text_detect(
Field::Header(HeaderName::Subject),
text,
self.config.default_language,
)),
Filter::Body(text) => fts_filters.push(FtsFilter::has_text_detect(
Field::Body,
text,
self.config.default_language,
)),
Filter::Header(header) => {
let mut header = header.into_iter();
let header_name = header.next().ok_or_else(|| {
MethodError::InvalidArguments(
"Header name is missing.".to_string(),
)
})?;
match HeaderName::parse(header_name) {
Some(HeaderName::Other(header_name)) => {
return Err(MethodError::InvalidArguments(format!(
"Querying header '{header_name}' is not supported.",
)));
}
Some(header_name) => {
if let Some(header_value) = header.next() {
if matches!(
header_name,
HeaderName::MessageId
| HeaderName::InReplyTo
| HeaderName::References
| HeaderName::ResentMessageId
) {
fts_filters.push(FtsFilter::has_keyword(
Field::Header(header_name),
header_value,
));
} else {
fts_filters.push(FtsFilter::has_text(
Field::Header(header_name),
header_value,
Language::None,
));
}
} else {
fts_filters.push(FtsFilter::has_keyword(
Field::Keyword,
header_name.as_str().to_lowercase(),
));
}
}
None => (),
}
}
Filter::And | Filter::Or | Filter::Not | Filter::Close => {
fts_filters.push(cond.into());
}
other => return Err(MethodError::UnsupportedFilter(other.to_string())),
}
}
filters.push(query::Filter::is_in_set(
self.fts_filter(account_id, Collection::Email, fts_filters)
.await?,
));
}
FilterGroup::Store(cond) => {
match cond {
Filter::InMailbox(mailbox) => filters.push(query::Filter::is_in_bitmap(
Property::MailboxIds,
@ -63,16 +196,28 @@ impl JMAP {
filters.push(query::Filter::End);
filters.push(query::Filter::End);
}
Filter::Before(date) => filters.push(query::Filter::lt(Property::ReceivedAt, date)),
Filter::After(date) => filters.push(query::Filter::gt(Property::ReceivedAt, date)),
Filter::MinSize(size) => filters.push(query::Filter::ge(Property::Size, size)),
Filter::MaxSize(size) => filters.push(query::Filter::lt(Property::Size, size)),
Filter::AllInThreadHaveKeyword(keyword) => filters.push(query::Filter::is_in_set(
Filter::Before(date) => {
filters.push(query::Filter::lt(Property::ReceivedAt, date))
}
Filter::After(date) => {
filters.push(query::Filter::gt(Property::ReceivedAt, date))
}
Filter::MinSize(size) => {
filters.push(query::Filter::ge(Property::Size, size))
}
Filter::MaxSize(size) => {
filters.push(query::Filter::lt(Property::Size, size))
}
Filter::AllInThreadHaveKeyword(keyword) => {
filters.push(query::Filter::is_in_set(
self.thread_keywords(account_id, keyword, true).await?,
)),
Filter::SomeInThreadHaveKeyword(keyword) => filters.push(query::Filter::is_in_set(
))
}
Filter::SomeInThreadHaveKeyword(keyword) => {
filters.push(query::Filter::is_in_set(
self.thread_keywords(account_id, keyword, false).await?,
)),
))
}
Filter::NoneInThreadHaveKeyword(keyword) => {
filters.push(query::Filter::Not);
filters.push(query::Filter::is_in_set(
@ -97,129 +242,7 @@ impl JMAP {
filters.push(query::Filter::End);
}
}
/*Filter::Text(text) => {
filters.push(query::Filter::Or);
filters.push(query::Filter::has_text(
Property::From,
&text,
Language::None,
));
filters.push(query::Filter::has_text(Property::To, &text, Language::None));
filters.push(query::Filter::has_text(Property::Cc, &text, Language::None));
filters.push(query::Filter::has_text(
Property::Bcc,
&text,
Language::None,
));
filters.push(query::Filter::has_text_detect(
Property::Subject,
&text,
self.config.default_language,
));
filters.push(query::Filter::has_text_detect(
Property::TextBody,
&text,
self.config.default_language,
));
filters.push(query::Filter::has_text_detect(
Property::Attachments,
text,
self.config.default_language,
));
filters.push(query::Filter::End);
}
Filter::From(text) => filters.push(query::Filter::has_text(
Property::From,
text,
Language::None,
)),
Filter::To(text) => {
filters.push(query::Filter::has_text(Property::To, text, Language::None))
}
Filter::Cc(text) => {
filters.push(query::Filter::has_text(Property::Cc, text, Language::None))
}
Filter::Bcc(text) => {
filters.push(query::Filter::has_text(Property::Bcc, text, Language::None))
}
Filter::Subject(text) => filters.push(query::Filter::has_text_detect(
Property::Subject,
text,
self.config.default_language,
)),
Filter::Body(text) => filters.push(query::Filter::has_text_detect(
Property::TextBody,
text,
self.config.default_language,
)),
Filter::Header(header) => {
let mut header = header.into_iter();
let header_name = header.next().ok_or_else(|| {
MethodError::InvalidArguments("Header name is missing.".to_string())
})?;
match HeaderName::parse(&header_name) {
Some(HeaderName::Other(_)) | None => {
return Err(MethodError::InvalidArguments(format!(
"Querying non-RFC header '{header_name}' is not allowed.",
)));
}
Some(header_name) => {
let is_id = matches!(
header_name,
HeaderName::MessageId
| HeaderName::InReplyTo
| HeaderName::References
| HeaderName::ResentMessageId
);
let tokens = if let Some(header_value) = header.next() {
let header_num = header_name.id().to_string();
header_value
.split_ascii_whitespace()
.filter_map(|token| {
if token.len() < MAX_TOKEN_LENGTH {
if is_id {
format!("{header_num}{token}")
} else {
format!("{header_num}{}", token.to_lowercase())
}
.into()
} else {
None
}
})
.collect::<Vec<_>>()
} else {
vec![]
};
match tokens.len() {
0 => {
filters.push(query::Filter::has_raw_text(
Property::Headers,
header_name.id().to_string(),
));
}
1 => {
filters.push(query::Filter::has_raw_text(
Property::Headers,
tokens.into_iter().next().unwrap(),
));
}
_ => {
filters.push(query::Filter::And);
for token in tokens {
filters.push(query::Filter::has_raw_text(
Property::Headers,
token,
));
}
filters.push(query::Filter::End);
}
}
}
}
}
*/
// Non-standard
Filter::Id(ids) => {
let mut set = RoaringBitmap::new();
@ -228,8 +251,12 @@ impl JMAP {
}
filters.push(query::Filter::is_in_set(set));
}
Filter::SentBefore(date) => filters.push(query::Filter::lt(Property::SentAt, date)),
Filter::SentAfter(date) => filters.push(query::Filter::gt(Property::SentAt, date)),
Filter::SentBefore(date) => {
filters.push(query::Filter::lt(Property::SentAt, date))
}
Filter::SentAfter(date) => {
filters.push(query::Filter::gt(Property::SentAt, date))
}
Filter::InThread(id) => filters.push(query::Filter::is_in_bitmap(
Property::ThreadId,
id.document_id(),
@ -241,6 +268,8 @@ impl JMAP {
other => return Err(MethodError::UnsupportedFilter(other.to_string())),
}
}
}
}
let mut result_set = self.filter(account_id, Collection::Email, filters).await?;
if access_token.is_shared(account_id) {

View file

@ -59,7 +59,9 @@ use store::{
Serialize,
};
use crate::{auth::AccessToken, Bincode, IngestError, JMAP};
use crate::{
auth::AccessToken, services::housekeeper::Event, Bincode, IngestError, NamedKey, JMAP,
};
use super::{
headers::{BuildHeader, ValueToHeader},
@ -1208,6 +1210,16 @@ impl JMAP {
.delete_document(thread_id);
}
// Remove message from FTS index
batch.set(
NamedKey::IndexEmail::<&[u8]> {
account_id,
document_id,
seq: self.generate_snowflake_id()?,
},
vec![],
);
// Commit batch
match self.store.write(batch.build()).await {
Ok(_) => (),
@ -1226,6 +1238,9 @@ impl JMAP {
}
}
// Request FTS index
let _ = self.housekeeper_tx.send(Event::IndexStart).await;
Ok(Ok(changes))
}
}

View file

@ -27,15 +27,15 @@ use jmap_proto::{
query::Filter,
search_snippet::{GetSearchSnippetRequest, GetSearchSnippetResponse, SearchSnippet},
},
types::{acl::Acl, collection::Collection},
types::{acl::Acl, collection::Collection, property::Property},
};
use mail_parser::{decoders::html::html_to_text, MessageParser, PartType};
use nlp::language::{stemmer::Stemmer, Language};
use store::BlobHash;
use mail_parser::{decoders::html::html_to_text, GetHeader, HeaderName, PartType};
use nlp::language::{search_snippet::generate_snippet, stemmer::Stemmer, Language};
use store::backend::MAX_TOKEN_LENGTH;
use crate::{auth::AccessToken, JMAP};
use crate::{auth::AccessToken, Bincode, JMAP};
use super::index::MAX_MESSAGE_PARTS;
use super::metadata::{MessageMetadata, MetadataPartType};
impl JMAP {
pub async fn email_search_snippet(
@ -45,37 +45,33 @@ impl JMAP {
) -> Result<GetSearchSnippetResponse, MethodError> {
let mut filter_stack = vec![];
let mut include_term = true;
//let mut terms = vec![];
let mut match_phrase = false;
let mut terms = vec![];
let mut is_exact = false;
let mut language = self.config.default_language;
for cond in request.filter {
match cond {
Filter::Text(text) | Filter::Subject(text) | Filter::Body(text) => {
/*if include_term {
let (text, language) = Language::detect(text, self.config.default_language);
if include_term {
let (text, language_) =
Language::detect(text, self.config.default_language);
language = language_;
if (text.starts_with('"') && text.ends_with('"'))
|| (text.starts_with('\'') && text.ends_with('\''))
{
terms.push(
language
.tokenize_text(&text, MAX_TOKEN_LENGTH)
.map(|token| (token.word.into_owned(), None))
.collect::<Vec<_>>(),
);
match_phrase = true;
} else {
terms.push(
Stemmer::new(&text, language, MAX_TOKEN_LENGTH)
.map(|token| {
(
token.word.into_owned(),
token.stemmed_word.map(|w| w.into_owned()),
)
})
.collect::<Vec<_>>(),
);
for token in language.tokenize_text(&text, MAX_TOKEN_LENGTH) {
terms.push(token.word.into_owned());
}
is_exact = true;
} else {
for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH) {
terms.push(token.word.into_owned());
if let Some(stemmed_word) = token.stemmed_word {
terms.push(stemmed_word.into_owned());
}
}
}
}
}*/
}
Filter::And | Filter::Or => {
filter_stack.push(cond);
@ -103,11 +99,10 @@ impl JMAP {
not_found: vec![],
};
if email_ids.len() > self.config.get_max_objects {
if email_ids.len() > self.config.snippet_max_results {
return Err(MethodError::RequestTooLarge);
}
/*
for email_id in email_ids {
let document_id = email_id.document_id();
let mut snippet = SearchSnippet {
@ -122,131 +117,94 @@ impl JMAP {
response.list.push(snippet);
continue;
}
// Obtain the term index and raw message
let (term_index, raw_message) = if let (Some(term_index), Some(raw_message)) = (
self.get_term_index::<TermIndex>(account_id, Collection::Email, document_id)
.await?,
self.get_blob(
&BlobHash::LinkedMaildir {
let metadata = match self
.get_property::<Bincode<MessageMetadata>>(
account_id,
Collection::Email,
document_id,
},
0..u32::MAX,
&Property::BodyStructure,
)
.await?,
) {
(term_index, raw_message)
} else {
response.not_found.push(email_id);
continue;
};
// Parse message
let message = if let Some(message) = MessageParser::new().parse(&raw_message) {
message
} else {
response.not_found.push(email_id);
continue;
};
// Build the match terms
let mut match_terms = Vec::new();
for term in &terms {
for (word, stemmed_word) in term {
match_terms.push(term_index.get_match_term(word, stemmed_word.as_deref()));
}
}
'outer: for term_group in term_index
.match_terms(&match_terms, None, match_phrase, true, true)
.map_err(|err| match err {
term_index::Error::InvalidArgument => {
MethodError::UnsupportedFilter("Too many search terms.".to_string())
}
err => {
tracing::error!(
account_id = account_id,
document_id = document_id,
reason = ?err,
"Failed to generate search snippet.");
MethodError::UnsupportedFilter(
"Failed to generate search snippet.".to_string(),
)
}
})?
.unwrap_or_default()
.await?
{
if term_group.part_id == 0 {
// Generate subject snippent
snippet.subject =
generate_snippet(&term_group.terms, message.subject().unwrap_or_default());
Some(metadata) => metadata.inner,
None => {
response.not_found.push(email_id);
continue;
}
};
// Add subject snippet
if let Some(subject) = metadata
.contents
.root_part()
.headers
.header_value(&HeaderName::Subject)
.and_then(|v| v.as_text())
.and_then(|v| generate_snippet(v, &terms, language, is_exact))
{
snippet.subject = subject.into();
}
// Check if the snippet can be generated from the preview
/*if let Some(body) = generate_snippet(&metadata.preview, &terms) {
snippet.preview = body.into();
} else {*/
// Download message
let raw_message =
if let Some(raw_message) = self.get_blob(&metadata.blob_hash, 0..u32::MAX).await? {
raw_message
} else {
let mut part_num = 1;
for part in &message.parts {
tracing::warn!(event = "not-found",
account_id = account_id,
collection = ?Collection::Email,
document_id = email_id.document_id(),
blob_id = ?metadata.blob_hash,
"Blob not found");
response.not_found.push(email_id);
continue;
};
// Find a matching part
'outer: for part in &metadata.contents.parts {
match &part.body {
PartType::Text(text) => {
if part_num == term_group.part_id {
snippet.preview = generate_snippet(&term_group.terms, text);
MetadataPartType::Text | MetadataPartType::Html => {
let text = match part.decode_contents(&raw_message) {
PartType::Text(text) => text,
PartType::Html(html) => html_to_text(&html).into(),
_ => unreachable!(),
};
if let Some(body) = generate_snippet(&text, &terms, language, is_exact) {
snippet.preview = body.into();
break;
}
}
MetadataPartType::Message(message) => {
for part in &message.parts {
if let MetadataPartType::Text | MetadataPartType::Html = part.body {
let text = match part.decode_contents(&raw_message) {
PartType::Text(text) => text,
PartType::Html(html) => html_to_text(&html).into(),
_ => unreachable!(),
};
if let Some(body) =
generate_snippet(&text, &terms, language, is_exact)
{
snippet.preview = body.into();
break 'outer;
} else {
part_num += 1;
}
}
PartType::Html(html) => {
if part_num == term_group.part_id {
snippet.preview =
generate_snippet(&term_group.terms, &html_to_text(html));
break 'outer;
} else {
part_num += 1;
}
}
PartType::Message(message) => {
if let Some(subject) = message.subject() {
if part_num == term_group.part_id {
snippet.preview =
generate_snippet(&term_group.terms, subject);
break 'outer;
} else {
part_num += 1;
}
}
for sub_part in message.parts.iter().take(MAX_MESSAGE_PARTS) {
match &sub_part.body {
PartType::Text(text) => {
if part_num == term_group.part_id {
snippet.preview =
generate_snippet(&term_group.terms, text);
break 'outer;
} else {
part_num += 1;
}
}
PartType::Html(html) => {
if part_num == term_group.part_id {
snippet.preview = generate_snippet(
&term_group.terms,
&html_to_text(html),
);
break 'outer;
} else {
part_num += 1;
}
}
_ => (),
}
}
}
_ => (),
}
}
}
}
//}
response.list.push(snippet);
}
*/
Ok(response)
}
}

View file

@ -21,7 +21,7 @@
* for more details.
*/
use std::{collections::hash_map::RandomState, sync::Arc, time::Duration};
use std::{collections::hash_map::RandomState, fmt::Display, sync::Arc, time::Duration};
use ::sieve::{Compiler, Runtime};
use api::session::BaseCapabilities;
@ -49,17 +49,23 @@ use services::{
use smtp::core::SMTP;
use store::{
backend::{fs::FsStore, sqlite::SqliteStore},
fts::FtsFilter,
parking_lot::Mutex,
query::{sort::Pagination, Comparator, Filter, ResultSet, SortedResultSet},
roaring::RoaringBitmap,
write::{key::KeySerializer, BatchBuilder, BitmapClass, TagValue, ToBitmaps, ValueClass},
BitmapKey, BlobStore, Deserialize, Key, Serialize, Store, ValueKey, SUBSPACE_VALUES,
write::{
key::{DeserializeBigEndian, KeySerializer},
BatchBuilder, BitmapClass, TagValue, ToBitmaps, ValueClass,
},
BitmapKey, BlobStore, Deserialize, FtsStore, Key, Serialize, Store, ValueKey, SUBSPACE_VALUES,
U32_LEN, U64_LEN,
};
use tokio::sync::mpsc;
use utils::{
config::Rate,
ipc::DeliveryEvent,
map::ttl_dashmap::{TtlDashMap, TtlMap},
snowflake::SnowflakeIdGenerator,
UnwrapFailure,
};
@ -85,11 +91,13 @@ pub const LONG_SLUMBER: Duration = Duration::from_secs(60 * 60 * 24);
pub struct JMAP {
pub store: Store,
pub blob_store: BlobStore,
pub fts_store: FtsStore,
pub config: Config,
pub directory: Arc<dyn Directory>,
pub sessions: TtlDashMap<String, u32>,
pub access_tokens: TtlDashMap<u32, Arc<AccessToken>>,
pub snowflake_id: SnowflakeIdGenerator,
pub rate_limit_auth: DashMap<u32, Arc<Mutex<AuthenticatedLimiter>>>,
pub rate_limit_unauth: DashMap<RemoteAddress, Arc<Mutex<AnonymousLimiter>>>,
@ -108,6 +116,7 @@ pub struct Config {
pub default_language: Language,
pub query_max_results: usize,
pub changes_max_results: usize,
pub snippet_max_results: usize,
pub request_max_size: usize,
pub request_max_calls: usize,
@ -187,6 +196,11 @@ impl JMAP {
.property::<u64>("global.shared-map.shard")?
.unwrap_or(32)
.next_power_of_two() as usize;
let store = Store::SQLite(Arc::new(
SqliteStore::open(config)
.await
.failed("Unable to open database"),
));
let jmap_server = Arc::new(JMAP {
directory: directory_config
@ -197,11 +211,12 @@ impl JMAP {
config.value_require("jmap.directory")?
))
.clone(),
store: Store::SQLite(Arc::new(
SqliteStore::open(config)
.await
.failed("Unable to open database"),
)),
snowflake_id: config
.property::<u64>("global.node-id")?
.map(SnowflakeIdGenerator::with_node_id)
.unwrap_or_else(SnowflakeIdGenerator::new),
fts_store: FtsStore::Store(store.clone()),
store,
blob_store: BlobStore::Fs(Arc::new(
FsStore::open(config)
.await
@ -618,7 +633,28 @@ impl JMAP {
.await
.map_err(|err| {
tracing::error!(event = "error",
context = "mailbox_set",
context = "filter",
account_id = account_id,
collection = ?collection,
error = ?err,
"Failed to execute filter.");
MethodError::ServerPartialFail
})
}
pub async fn fts_filter<T: Into<u8> + Display + Clone + std::fmt::Debug>(
&self,
account_id: u32,
collection: Collection,
filters: Vec<FtsFilter<T>>,
) -> Result<RoaringBitmap, MethodError> {
self.fts_store
.query(account_id, collection, filters)
.await
.map_err(|err| {
tracing::error!(event = "error",
context = "fts-filter",
account_id = account_id,
collection = ?collection,
error = ?err,
@ -805,6 +841,11 @@ pub enum NamedKey<T: AsRef<[u8]>> {
Name(T),
Id(u32),
Quota(u32),
IndexEmail {
account_id: u32,
document_id: u32,
seq: u64,
},
}
impl<T: AsRef<[u8]>> From<&NamedKey<T>> for ValueClass {
@ -817,21 +858,44 @@ impl<T: AsRef<[u8]>> From<&NamedKey<T>> for ValueClass {
.finalize(),
),
NamedKey::Id(id) => ValueClass::Named(
KeySerializer::new(std::mem::size_of::<u32>())
KeySerializer::new(std::mem::size_of::<u32>() + 1)
.write(1u8)
.write_leb128(*id)
.finalize(),
),
NamedKey::Quota(id) => ValueClass::Named(
KeySerializer::new(std::mem::size_of::<u32>())
KeySerializer::new(std::mem::size_of::<u32>() + 1)
.write(2u8)
.write_leb128(*id)
.finalize(),
),
NamedKey::IndexEmail {
account_id,
document_id,
seq,
} => ValueClass::Named(
KeySerializer::new(std::mem::size_of::<u32>() * 4 + 1)
.write(3u8)
.write(*seq)
.write(*account_id)
.write(*document_id)
.finalize(),
),
}
}
}
impl<T: AsRef<[u8]>> NamedKey<T> {
pub fn deserialize_index_email(bytes: &[u8]) -> store::Result<Self> {
let len = bytes.len();
Ok(NamedKey::IndexEmail {
seq: bytes.deserialize_be_u64(len - U64_LEN - (U32_LEN * 2))?,
account_id: bytes.deserialize_be_u32(len - U32_LEN * 2)?,
document_id: bytes.deserialize_be_u32(len - U32_LEN)?,
})
}
}
impl<T: AsRef<[u8]>> From<NamedKey<T>> for ValueClass {
fn from(key: NamedKey<T>) -> Self {
(&key).into()

View file

@ -36,43 +36,73 @@ use super::IPC_CHANNEL_BUFFER;
pub enum Event {
PurgeDb,
PurgeBlobs,
PurgeSessions,
IndexStart,
IndexDone,
#[cfg(feature = "test_mode")]
IndexIsActive(tokio::sync::oneshot::Sender<bool>),
Exit,
}
const TASK_PURGE_DB: usize = 0;
const TASK_PURGE_BLOBS: usize = 1;
const TASK_PURGE_SESSIONS: usize = 2;
const TASK_PURGE_SESSIONS: usize = 1;
pub fn spawn_housekeeper(core: Arc<JMAP>, settings: &Config, mut rx: mpsc::Receiver<Event>) {
let purge_db_at = settings
.property_or_static::<SimpleCron>("jmap.purge.schedule.db", "0 3 *")
.failed("Initialize housekeeper");
let purge_blobs_at = settings
.property_or_static::<SimpleCron>("jmap.purge.schedule.blobs", "30 3 *")
.failed("Initialize housekeeper");
let purge_cache = settings
.property_or_static::<SimpleCron>("jmap.purge.schedule.sessions", "15 * *")
.failed("Initialize housekeeper");
tokio::spawn(async move {
tracing::debug!("Housekeeper task started.");
let mut index_busy = true;
let mut index_pending = false;
// Index any queued messages
let core_ = core.clone();
tokio::spawn(async move {
core_.fts_index_queued().await;
});
loop {
let time_to_next = [
purge_db_at.time_to_next(),
purge_blobs_at.time_to_next(),
purge_cache.time_to_next(),
];
let mut tasks_to_run = [false, false, false];
let time_to_next = [purge_db_at.time_to_next(), purge_cache.time_to_next()];
let mut tasks_to_run = [false, false];
let start_time = Instant::now();
match tokio::time::timeout(time_to_next.iter().min().copied().unwrap(), rx.recv()).await
{
Ok(Some(event)) => match event {
Event::PurgeDb => tasks_to_run[TASK_PURGE_DB] = true,
Event::PurgeBlobs => tasks_to_run[TASK_PURGE_BLOBS] = true,
Event::PurgeSessions => tasks_to_run[TASK_PURGE_SESSIONS] = true,
Event::IndexStart => {
if !index_busy {
index_busy = true;
let core = core.clone();
tokio::spawn(async move {
core.fts_index_queued().await;
});
} else {
index_pending = true;
}
}
Event::IndexDone => {
if index_pending {
index_pending = false;
let core = core.clone();
tokio::spawn(async move {
core.fts_index_queued().await;
});
} else {
index_busy = false;
}
}
#[cfg(feature = "test_mode")]
Event::IndexIsActive(tx) => {
tx.send(index_busy).ok();
}
Event::Exit => {
tracing::debug!("Housekeeper task exiting.");
return;
@ -104,13 +134,12 @@ pub fn spawn_housekeeper(core: Arc<JMAP>, settings: &Config, mut rx: mpsc::Recei
tokio::spawn(async move {
match task_id {
TASK_PURGE_DB => {
tracing::info!("Purging database.");
tracing::info!("Purging database...");
if let Err(err) = core.store.purge_bitmaps().await {
tracing::error!("Error while purging bitmaps: {}", err);
}
}
TASK_PURGE_BLOBS => {
tracing::info!("Purging temporary blobs.",);
tracing::info!("Purging blobs...",);
if let Err(err) =
core.store.blob_hash_purge(core.blob_store.clone()).await
{

View file

@ -0,0 +1,224 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use jmap_proto::types::{collection::Collection, property::Property};
use store::{
fts::index::FtsDocument,
write::{BatchBuilder, ValueClass},
IterateParams, ValueKey,
};
use crate::{
email::{index::IndexMessageText, metadata::MessageMetadata},
Bincode, NamedKey, JMAP,
};
use super::housekeeper::Event;
impl JMAP {
pub async fn fts_index_queued(&self) {
let from_key = ValueKey::<ValueClass> {
account_id: 0,
collection: 0,
document_id: 0,
class: NamedKey::IndexEmail::<&[u8]> {
account_id: 0,
document_id: 0,
seq: 0,
}
.into(),
};
let to_key = ValueKey::<ValueClass> {
account_id: u32::MAX,
collection: u8::MAX,
document_id: u32::MAX,
class: NamedKey::IndexEmail::<&[u8]> {
account_id: u32::MAX,
document_id: u32::MAX,
seq: u64::MAX,
}
.into(),
};
// Retrieve entries pending to be indexed
// TODO: Support indexing from multiple nodes
let mut entries = Vec::new();
let _ = self
.store
.iterate(
IterateParams::new(from_key, to_key).ascending(),
|key, value| {
entries.push((
NamedKey::<Vec<u8>>::deserialize_index_email(key)?,
value.to_vec(),
));
Ok(true)
},
)
.await
.map_err(|err| {
tracing::error!(
context = "fts_index_queued",
event = "error",
reason = ?err,
"Failed to iterate over index emails"
);
});
// Index entries
for (key, blob_hash) in entries {
if let NamedKey::IndexEmail {
account_id,
document_id,
..
} = &key
{
if !blob_hash.is_empty() {
match self
.get_property::<Bincode<MessageMetadata>>(
*account_id,
Collection::Email,
*document_id,
Property::BodyStructure,
)
.await
{
Ok(Some(metadata))
if metadata.inner.blob_hash.as_slice() == blob_hash.as_slice() =>
{
// Obtain raw message
let raw_message = if let Ok(Some(raw_message)) =
self.get_blob(&metadata.inner.blob_hash, 0..u32::MAX).await
{
raw_message
} else {
tracing::warn!(
context = "fts_index_queued",
event = "error",
account_id = *account_id,
document_id = *document_id,
blob_hash = ?metadata.inner.blob_hash,
"Message blob not found"
);
continue;
};
let message = metadata.inner.contents.into_message(&raw_message);
// Index message
let document =
FtsDocument::with_default_language(self.config.default_language)
.with_account_id(*account_id)
.with_collection(Collection::Email)
.with_document_id(*document_id)
.index_message(&message);
if let Err(err) = self.fts_store.index(document).await {
tracing::error!(
context = "fts_index_queued",
event = "error",
account_id = *account_id,
document_id = *document_id,
reason = ?err,
"Failed to index email in FTS index"
);
continue;
}
tracing::debug!(
context = "fts_index_queued",
event = "index",
account_id = *account_id,
document_id = *document_id,
"Indexed document in FTS index"
);
}
Err(err) => {
tracing::error!(
context = "fts_index_queued",
event = "error",
account_id = *account_id,
document_id = *document_id,
reason = ?err,
"Failed to retrieve email metadata"
);
break;
}
_ => {
// The message was probably deleted or overwritten
tracing::debug!(
context = "fts_index_queued",
event = "error",
account_id = *account_id,
document_id = *document_id,
"Email metadata not found"
);
}
}
} else {
if let Err(err) = self
.fts_store
.remove(*account_id, Collection::Email.into(), *document_id)
.await
{
tracing::error!(
context = "fts_index_queued",
event = "error",
account_id = *account_id,
document_id = *document_id,
reason = ?err,
"Failed to remove document from FTS index"
);
continue;
}
tracing::debug!(
context = "fts_index_queued",
event = "delete",
account_id = *account_id,
document_id = *document_id,
"Deleted document from FTS index"
);
}
}
// Remove entry from queue
if let Err(err) = self
.store
.write(BatchBuilder::new().clear(key).build_batch())
.await
{
tracing::error!(
context = "fts_index_queued",
event = "error",
reason = ?err,
"Failed to remove index email from queue"
);
break;
}
}
if let Err(err) = self.housekeeper_tx.send(Event::IndexDone).await {
tracing::warn!("Failed to send index done event to housekeeper: {}", err);
}
}
}

View file

@ -23,6 +23,7 @@
pub mod delivery;
pub mod housekeeper;
pub mod index;
pub mod ingest;
pub mod state;

View file

@ -22,6 +22,7 @@
*/
pub mod detect;
pub mod search_snippet;
pub mod stemmer;
pub mod stopwords;

View file

@ -21,7 +21,7 @@
* for more details.
*/
use super::term_index::Term;
use super::Language;
fn escape_char(c: char, string: &mut String) {
match c {
@ -45,9 +45,53 @@ fn escape_char_len(c: char) -> usize {
}
}
pub fn generate_snippet(terms: &[Term], text: &str) -> Option<String> {
pub struct Term {
offset: usize,
len: usize,
}
pub fn generate_snippet(
text: &str,
needles: &[impl AsRef<str>],
language: Language,
is_exact: bool,
) -> Option<String> {
let mut terms = Vec::new();
if is_exact {
let tokens = language.tokenize_text(text, 200).collect::<Vec<_>>();
for tokens in tokens.windows(needles.len()) {
if needles
.iter()
.zip(tokens)
.all(|(needle, token)| needle.as_ref() == token.word.as_ref())
{
for token in tokens {
terms.push(Term {
offset: token.from,
len: token.to - token.from,
});
}
}
}
} else {
for token in language.tokenize_text(text, 200) {
if needles.iter().any(|needle| {
let needle = needle.as_ref();
needle == token.word.as_ref() || needle.len() > 2 && token.word.contains(needle)
}) {
terms.push(Term {
offset: token.from,
len: token.to - token.from,
});
}
}
}
if terms.is_empty() {
return None;
}
let mut snippet = String::with_capacity(text.len());
let start_offset = terms.get(0)?.offset as usize;
let start_offset = terms.get(0)?.offset;
if start_offset > 0 {
let mut word_count = 0;
@ -92,25 +136,22 @@ pub fn generate_snippet(terms: &[Term], text: &str) -> Option<String> {
let mut terms = terms.iter().peekable();
'outer: while let Some(term) = terms.next() {
if snippet.len() + ("<mark>".len() * 2) + term.len as usize + 1 > 255 {
if snippet.len() + ("<mark>".len() * 2) + term.len + 1 > 255 {
break;
}
snippet.push_str("<mark>");
snippet.push_str(text.get(term.offset as usize..term.offset as usize + term.len as usize)?);
snippet.push_str(text.get(term.offset..term.offset + term.len)?);
snippet.push_str("</mark>");
let next_offset = if let Some(next_term) = terms.peek() {
next_term.offset as usize
next_term.offset
} else {
text.len()
};
let mut last_is_space = false;
for char in text
.get(term.offset as usize + term.len as usize..next_offset)?
.chars()
{
for char in text.get(term.offset + term.len..next_offset)?.chars() {
if !char.is_whitespace() {
last_is_space = false;
} else {
@ -133,15 +174,7 @@ pub fn generate_snippet(terms: &[Term], text: &str) -> Option<String> {
#[cfg(test)]
mod tests {
use nlp::language::Language;
use crate::{
fts::term_index::{TermIndex, TermIndexBuilder},
Deserialize, Serialize,
};
use super::*;
use crate::language::{search_snippet::generate_snippet, Language};
#[test]
fn search_snippets() {
@ -236,39 +269,18 @@ mod tests {
];
for (parts, tests) in inputs {
let mut builder = TermIndexBuilder::new();
for (needles, snippets) in tests {
let mut results = Vec::new();
for (field_num, part) in parts.iter().enumerate() {
let mut terms = Vec::new();
for token in Language::English.tokenize_text(part, 40) {
terms.push(builder.add_token(token));
for part in &parts {
if let Some(matched) =
generate_snippet(part, &needles, Language::English, false)
{
results.push(matched);
}
builder.add_terms(field_num as u8, 0, terms);
}
let compressed_term_index = builder.serialize();
let term_index = TermIndex::deserialize(&compressed_term_index[..]).unwrap();
for (match_words, snippets) in tests {
let mut match_terms = Vec::new();
for word in &match_words {
match_terms.push(term_index.get_match_term(word, None));
}
let term_groups = term_index
.match_terms(&match_terms, None, false, true, true)
.unwrap()
.unwrap();
assert_eq!(term_groups.len(), snippets.len());
for (term_group, snippet) in term_groups.iter().zip(snippets.iter()) {
assert_eq!(
snippet,
&generate_snippet(&term_group.terms, parts[term_group.field_id as usize])
.unwrap()
);
}
assert_eq!(snippets, results);
}
}
}

View file

@ -141,6 +141,7 @@ pub static STEMMER_MAP: &[Option<Algorithm>] = &[
None, // Tagalog = 67,
None, // Armenian = 68,
None, // Unknown = 69,
None, // None = 70,
];
#[cfg(test)]

View file

@ -93,6 +93,7 @@ pub static STOP_WORDS: &[Option<&Set<&'static str>>] = &[
None, // Tagalog = 67,
None, // Armenian = 68,
None, // Unknown = 69,
None, // None = 70,
];
static ARABIC: Set<&'static str> = phf_set! {

View file

@ -30,6 +30,7 @@ num_cpus = { version = "1.15.0", optional = true }
blake3 = "1.3.3"
tracing = "0.1"
async-trait = "0.1.68"
lz4_flex = { version = "0.11" }
[dev-dependencies]
tokio = { version = "1.23", features = ["full"] }

View file

@ -0,0 +1,44 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::ops::Range;
use super::FdbStore;
impl FdbStore {
pub(crate) async fn get_blob(
&self,
key: &[u8],
range: Range<u32>,
) -> crate::Result<Option<Vec<u8>>> {
todo!()
}
pub(crate) async fn put_blob(&self, key: &[u8], data: &[u8]) -> crate::Result<()> {
todo!()
}
pub(crate) async fn delete_blob(&self, key: &[u8]) -> crate::Result<bool> {
todo!()
}
}

View file

@ -28,10 +28,7 @@ use futures::StreamExt;
use rand::Rng;
use std::time::Instant;
use crate::{
write::{key::KeySerializer, now},
BitmapKey, IndexKey, SUBSPACE_VALUES,
};
use crate::{write::now, BitmapKey, IndexKey};
use super::{
bitmap::{next_available_index, BITS_PER_BLOCK},
@ -183,36 +180,4 @@ impl FdbStore {
}
}
}
pub(crate) async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
let start = Instant::now();
let counter = KeySerializer::new(U32_LEN + 2)
.write(SUBSPACE_VALUES)
.write(account_id)
.finalize();
loop {
// Read id
let trx = self.db.create_trx()?;
let id = if let Some(bytes) = trx.get(&counter, false).await? {
u64::deserialize(&bytes)? + 1
} else {
0
};
trx.set(&counter, &id.serialize());
match trx.commit().await {
Ok(_) => {
return Ok(id);
}
Err(err) => {
if start.elapsed() < MAX_COMMIT_TIME {
err.on_error().await?;
} else {
return Err(FdbError::from(err).into());
}
}
}
}
}
}

View file

@ -26,6 +26,7 @@ use foundationdb::{api::NetworkAutoStop, Database, FdbError};
use crate::Error;
pub mod bitmap;
pub mod blob;
pub mod id_assign;
pub mod main;
pub mod purge;

View file

@ -95,7 +95,7 @@ impl FdbStore {
account_id: u32,
collection: u8,
field: u8,
value: Vec<u8>,
value: &[u8],
op: query::Operator,
) -> crate::Result<Option<RoaringBitmap>> {
let k1 =
@ -116,27 +116,23 @@ impl FdbStore {
let (begin, end) = match op {
Operator::LowerThan => (
KeySelector::first_greater_or_equal(k1.finalize()),
KeySelector::first_greater_or_equal(k2.write(&value[..]).write(0u32).finalize()),
KeySelector::first_greater_or_equal(k2.write(value).write(0u32).finalize()),
),
Operator::LowerEqualThan => (
KeySelector::first_greater_or_equal(k1.finalize()),
KeySelector::first_greater_or_equal(
k2.write(&value[..]).write(u32::MAX).finalize(),
),
KeySelector::first_greater_or_equal(k2.write(value).write(u32::MAX).finalize()),
),
Operator::GreaterThan => (
KeySelector::first_greater_than(k1.write(&value[..]).write(u32::MAX).finalize()),
KeySelector::first_greater_than(k1.write(value).write(u32::MAX).finalize()),
KeySelector::first_greater_or_equal(k2.finalize()),
),
Operator::GreaterEqualThan => (
KeySelector::first_greater_or_equal(k1.write(&value[..]).write(0u32).finalize()),
KeySelector::first_greater_or_equal(k1.write(value).write(0u32).finalize()),
KeySelector::first_greater_or_equal(k2.finalize()),
),
Operator::Equal => (
KeySelector::first_greater_or_equal(k1.write(&value[..]).write(0u32).finalize()),
KeySelector::first_greater_or_equal(
k2.write(&value[..]).write(u32::MAX).finalize(),
),
KeySelector::first_greater_or_equal(k1.write(value).write(0u32).finalize()),
KeySelector::first_greater_or_equal(k2.write(value).write(u32::MAX).finalize()),
),
};
let key_len = begin.key().len();

View file

@ -52,9 +52,7 @@ impl FsStore {
)))
}
}
}
impl FsStore {
pub(crate) async fn get_blob(
&self,
key: &[u8],
@ -113,9 +111,7 @@ impl FsStore {
Ok(false)
}
}
}
impl FsStore {
fn build_path(&self, key: &[u8]) -> PathBuf {
let mut path = self.path.clone();

View file

@ -30,8 +30,8 @@ pub mod s3;
#[cfg(feature = "sqlite")]
pub mod sqlite;
pub(crate) const MAX_TOKEN_LENGTH: usize = (u8::MAX >> 2) as usize;
pub(crate) const MAX_TOKEN_MASK: usize = MAX_TOKEN_LENGTH - 1;
pub const MAX_TOKEN_LENGTH: usize = (u8::MAX >> 1) as usize;
pub const MAX_TOKEN_MASK: usize = MAX_TOKEN_LENGTH - 1;
#[cfg(feature = "test_mode")]
pub static ID_ASSIGNMENT_EXPIRY: std::sync::atomic::AtomicU64 =

View file

@ -140,3 +140,8 @@ impl From<rocksdb::Error> for crate::Error {
Self::InternalError(format!("RocksDB error: {}", value))
}
}
#[cfg(feature = "rocks")]
pub struct Store {
db: rocksdb::OptimisticTransactionDB<rocksdb::MultiThreaded>,
}

View file

@ -0,0 +1,83 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::ops::Range;
use rusqlite::OptionalExtension;
use super::SqliteStore;
impl SqliteStore {
pub(crate) async fn get_blob(
&self,
key: &[u8],
range: Range<u32>,
) -> crate::Result<Option<Vec<u8>>> {
let conn = self.conn_pool.get()?;
self.spawn_worker(move || {
let mut result = conn.prepare_cached("SELECT v FROM t WHERE k = ?")?;
result
.query_row([&key], |row| {
Ok({
let bytes = row.get_ref(0)?.as_bytes()?;
if range.start == 0 && range.end == u32::MAX {
bytes.to_vec()
} else {
bytes
.get(
range.start as usize
..std::cmp::min(bytes.len(), range.end as usize),
)
.unwrap_or_default()
.to_vec()
}
})
})
.optional()
.map_err(Into::into)
})
.await
}
pub(crate) async fn put_blob(&self, key: &[u8], data: &[u8]) -> crate::Result<()> {
let conn = self.conn_pool.get()?;
self.spawn_worker(move || {
conn.prepare_cached("INSERT OR REPLACE INTO t (k, v) VALUES (?, ?)")?
.execute([key, data])
.map_err(|e| crate::Error::InternalError(format!("Failed to insert blob: {}", e)))
.map(|_| ())
})
.await
}
pub(crate) async fn delete_blob(&self, key: &[u8]) -> crate::Result<bool> {
let conn = self.conn_pool.get()?;
self.spawn_worker(move || {
conn.prepare_cached("DELETE FROM t WHERE k = ?")?
.execute([key])
.map_err(|e| crate::Error::InternalError(format!("Failed to delete blob: {}", e)))
.map(|_| true)
})
.await
}
}

View file

@ -23,7 +23,7 @@
use roaring::RoaringBitmap;
use crate::{write::key::DeserializeBigEndian, BitmapKey, IterateParams, LogKey, U64_LEN};
use crate::BitmapKey;
use super::SqliteStore;
@ -46,15 +46,13 @@ impl IdCacheKey {
pub struct IdAssigner {
pub freed_document_ids: Option<RoaringBitmap>,
pub next_document_id: u32,
pub next_change_id: u64,
}
impl IdAssigner {
pub fn new(used_ids: Option<RoaringBitmap>, next_change_id: u64) -> Self {
pub fn new(used_ids: Option<RoaringBitmap>) -> Self {
let mut assigner = IdAssigner {
freed_document_ids: None,
next_document_id: 0,
next_change_id,
};
if let Some(used_ids) = used_ids {
if let Some(max) = used_ids.max() {
@ -85,28 +83,9 @@ impl IdAssigner {
id
}
}
pub fn assign_change_id(&mut self) -> u64 {
let id = self.next_change_id;
self.next_change_id += 1;
id
}
}
impl SqliteStore {
pub(crate) async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
let collection = u8::MAX;
let key = IdCacheKey::new(account_id, collection);
for _ in 0..2 {
if let Some(assigner) = self.id_assigner.lock().get_mut(&key) {
return Ok(assigner.assign_change_id());
}
self.build_id_assigner(key).await?;
}
unreachable!()
}
pub(crate) async fn assign_document_id(
&self,
account_id: u32,
@ -128,56 +107,16 @@ impl SqliteStore {
let used_ids = self
.get_bitmap(BitmapKey::document_ids(key.account_id, key.collection))
.await?;
let next_change_id = self
.get_last_change_id(key.account_id, key.collection)
.await?
.map(|id| id + 1)
.unwrap_or(0);
let id_assigner = self.id_assigner.clone();
let mut id_assigner = id_assigner.lock();
// Make sure id assigner was not added by another thread
if id_assigner.get_mut(&key).is_none() {
id_assigner.insert(key, IdAssigner::new(used_ids, next_change_id));
id_assigner.insert(key, IdAssigner::new(used_ids));
}
Ok(())
}
async fn get_last_change_id(
&self,
account_id: u32,
collection: impl Into<u8> + Sync + Send,
) -> crate::Result<Option<u64>> {
let collection = collection.into();
let from_key = LogKey {
account_id,
collection,
change_id: u64::MAX,
};
let to_key = LogKey {
account_id,
collection,
change_id: 0,
};
let mut last_change_id = None;
self.iterate(
IterateParams::new(from_key, to_key)
.descending()
.no_values()
.only_first(),
|key, _| {
last_change_id = key.deserialize_be_u64(key.len() - U64_LEN)?.into();
Ok(false)
},
)
.await?;
Ok(last_change_id)
}
}
#[cfg(test)]
@ -188,7 +127,7 @@ mod tests {
#[test]
fn id_assigner() {
let mut assigner = IdAssigner::new(None, 0);
let mut assigner = IdAssigner::new(None);
assert_eq!(assigner.assign_document_id(), 0);
assert_eq!(assigner.assign_document_id(), 1);
assert_eq!(assigner.assign_document_id(), 2);
@ -197,7 +136,6 @@ mod tests {
RoaringBitmap::from_sorted_iter([0, 2, 4, 6])
.unwrap()
.into(),
0,
);
assert_eq!(assigner.assign_document_id(), 1);
assert_eq!(assigner.assign_document_id(), 3);

View file

@ -30,8 +30,8 @@ use tokio::sync::oneshot;
use utils::{config::Config, UnwrapFailure};
use crate::{
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_COUNTERS, SUBSPACE_INDEXES,
SUBSPACE_LOGS, SUBSPACE_VALUES,
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_BLOB_DATA, SUBSPACE_COUNTERS,
SUBSPACE_INDEXES, SUBSPACE_LOGS, SUBSPACE_VALUES,
};
use super::{pool::SqliteConnectionManager, SqliteStore};
@ -78,7 +78,12 @@ impl SqliteStore {
pub(super) fn create_tables(&self) -> crate::Result<()> {
let conn = self.conn_pool.get()?;
for table in [SUBSPACE_VALUES, SUBSPACE_LOGS, SUBSPACE_ACLS] {
for table in [
SUBSPACE_VALUES,
SUBSPACE_LOGS,
SUBSPACE_ACLS,
SUBSPACE_BLOB_DATA,
] {
let table = char::from(table);
conn.execute(
&format!(

View file

@ -34,6 +34,7 @@ use self::{
pool::SqliteConnectionManager,
};
pub mod blob;
pub mod id_assign;
pub mod main;
pub mod pool;

View file

@ -32,7 +32,6 @@ impl SqliteStore {
pub(crate) async fn purge_bitmaps(&self) -> crate::Result<()> {
let conn = self.conn_pool.get()?;
self.spawn_worker(move || {
//Todo
conn.prepare_cached(concat!(
"DELETE FROM b WHERE ",
"a = 0 AND ",

View file

@ -110,7 +110,7 @@ impl SqliteStore {
account_id: u32,
collection: u8,
field: u8,
value: Vec<u8>,
value: &[u8],
op: query::Operator,
) -> crate::Result<Option<RoaringBitmap>> {
let conn = self.conn_pool.get()?;
@ -132,27 +132,27 @@ impl SqliteStore {
Operator::LowerThan => (
("SELECT k FROM i WHERE k >= ? AND k < ?"),
(k1.finalize()),
(k2.write(&value[..]).write(0u32).finalize()),
(k2.write(value).write(0u32).finalize()),
),
Operator::LowerEqualThan => (
("SELECT k FROM i WHERE k >= ? AND k <= ?"),
(k1.finalize()),
(k2.write(&value[..]).write(u32::MAX).finalize()),
(k2.write(value).write(u32::MAX).finalize()),
),
Operator::GreaterThan => (
("SELECT k FROM i WHERE k > ? AND k <= ?"),
(k1.write(&value[..]).write(u32::MAX).finalize()),
(k1.write(value).write(u32::MAX).finalize()),
(k2.finalize()),
),
Operator::GreaterEqualThan => (
("SELECT k FROM i WHERE k >= ? AND k <= ?"),
(k1.write(&value[..]).write(0u32).finalize()),
(k1.write(value).write(0u32).finalize()),
(k2.finalize()),
),
Operator::Equal => (
("SELECT k FROM i WHERE k >= ? AND k <= ?"),
(k1.write(&value[..]).write(0u32).finalize()),
(k2.write(&value[..]).write(u32::MAX).finalize()),
(k1.write(value).write(0u32).finalize()),
(k2.write(value).write(u32::MAX).finalize()),
),
};
@ -314,7 +314,7 @@ impl SqliteStore {
// Values
let mut has_errors = false;
for table in [crate::SUBSPACE_VALUES, crate::SUBSPACE_ACLS, crate::SUBSPACE_COUNTERS] {
for table in [crate::SUBSPACE_VALUES, crate::SUBSPACE_ACLS, crate::SUBSPACE_COUNTERS, crate::SUBSPACE_BLOB_DATA] {
let table = char::from(table);
let mut query = conn.prepare_cached(&format!("SELECT k, v FROM {table}")).unwrap();
let mut rows = query.query([]).unwrap();
@ -370,7 +370,7 @@ impl SqliteStore {
// Bitmaps
let mut query = conn
.prepare_cached("SELECT z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p FROM b")
.prepare_cached(&format!("SELECT z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p FROM {}", char::from(crate::SUBSPACE_BITMAPS)))
.unwrap();
let mut rows = query.query([]).unwrap();

View file

@ -274,8 +274,8 @@ impl SqliteStore {
#[cfg(feature = "test_mode")]
pub(crate) async fn destroy(&self) {
use crate::{
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_COUNTERS, SUBSPACE_INDEXES,
SUBSPACE_LOGS, SUBSPACE_VALUES,
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_BLOB_DATA, SUBSPACE_COUNTERS,
SUBSPACE_INDEXES, SUBSPACE_LOGS, SUBSPACE_VALUES,
};
let conn = self.conn_pool.get().unwrap();
@ -287,6 +287,7 @@ impl SqliteStore {
SUBSPACE_BLOBS,
SUBSPACE_ACLS,
SUBSPACE_COUNTERS,
SUBSPACE_BLOB_DATA,
] {
conn.execute(&format!("DROP TABLE {}", char::from(table)), [])
.unwrap();

View file

@ -21,23 +21,27 @@
* for more details.
*/
use std::ops::{BitAndAssign, Range};
use std::{
fmt::Display,
ops::{BitAndAssign, Range},
};
use roaring::RoaringBitmap;
use crate::{
fts::{index::FtsDocument, FtsFilter},
query,
write::{Batch, BitmapClass, ValueClass},
BitmapKey, BlobStore, Deserialize, IterateParams, Key, Store, ValueKey,
BitmapKey, BlobStore, Deserialize, FtsStore, IterateParams, Key, Store, ValueKey,
};
impl Store {
pub async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
/*pub async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
match self {
Self::SQLite(store) => store.assign_change_id(account_id).await,
Self::FoundationDb(store) => store.assign_change_id(account_id).await,
}
}
}*/
pub async fn assign_document_id(
&self,
@ -110,7 +114,7 @@ impl Store {
account_id: u32,
collection: u8,
field: u8,
value: Vec<u8>,
value: &[u8],
op: query::Operator,
) -> crate::Result<Option<RoaringBitmap>> {
match self {
@ -149,7 +153,7 @@ impl Store {
}
}
pub(crate) async fn iterate<T: Key>(
pub async fn iterate<T: Key>(
&self,
params: IterateParams<T>,
cb: impl for<'x> FnMut(&'x [u8], &'x [u8]) -> crate::Result<bool> + Sync + Send,
@ -190,6 +194,27 @@ impl Store {
}
}
pub async fn get_blob(&self, key: &[u8], range: Range<u32>) -> crate::Result<Option<Vec<u8>>> {
match self {
Self::SQLite(store) => store.get_blob(key, range).await,
Self::FoundationDb(store) => store.get_blob(key, range).await,
}
}
pub async fn put_blob(&self, key: &[u8], data: &[u8]) -> crate::Result<()> {
match self {
Self::SQLite(store) => store.put_blob(key, data).await,
Self::FoundationDb(store) => store.put_blob(key, data).await,
}
}
pub async fn delete_blob(&self, key: &[u8]) -> crate::Result<bool> {
match self {
Self::SQLite(store) => store.delete_blob(key).await,
Self::FoundationDb(store) => store.delete_blob(key).await,
}
}
#[cfg(feature = "test_mode")]
pub async fn destroy(&self) {
match self {
@ -269,6 +294,8 @@ impl BlobStore {
match self {
Self::Fs(store) => store.get_blob(key, range).await,
Self::S3(store) => store.get_blob(key, range).await,
Self::Sqlite(store) => store.get_blob(key, range).await,
Self::FoundationDb(store) => store.get_blob(key, range).await,
}
}
@ -276,6 +303,8 @@ impl BlobStore {
match self {
Self::Fs(store) => store.put_blob(key, data).await,
Self::S3(store) => store.put_blob(key, data).await,
Self::Sqlite(store) => store.put_blob(key, data).await,
Self::FoundationDb(store) => store.put_blob(key, data).await,
}
}
@ -283,6 +312,47 @@ impl BlobStore {
match self {
Self::Fs(store) => store.delete_blob(key).await,
Self::S3(store) => store.delete_blob(key).await,
Self::Sqlite(store) => store.delete_blob(key).await,
Self::FoundationDb(store) => store.delete_blob(key).await,
}
}
}
impl FtsStore {
pub async fn index<T: Into<u8> + Display + Clone + std::fmt::Debug>(
&self,
document: FtsDocument<'_, T>,
) -> crate::Result<()> {
match self {
FtsStore::Store(store) => store.fts_index(document).await,
}
}
pub async fn query<T: Into<u8> + Display + Clone + std::fmt::Debug>(
&self,
account_id: u32,
collection: impl Into<u8>,
filters: Vec<FtsFilter<T>>,
) -> crate::Result<RoaringBitmap> {
match self {
FtsStore::Store(store) => store.fts_query(account_id, collection, filters).await,
}
}
pub async fn remove(
&self,
account_id: u32,
collection: u8,
document_id: u32,
) -> crate::Result<bool> {
match self {
FtsStore::Store(store) => store.fts_remove(account_id, collection, document_id).await,
}
}
pub async fn remove_all(&self, account_id: u32) -> crate::Result<()> {
match self {
FtsStore::Store(store) => store.fts_remove_all(account_id).await,
}
}
}

View file

@ -1,257 +0,0 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::{
borrow::Cow,
f64::consts::LN_2,
hash::{Hash, Hasher},
};
use nlp::{language::stemmer::StemmedToken, tokenizers::Token};
use roaring::RoaringBitmap;
use utils::codec::leb128::{Leb128Reader, Leb128Vec};
use crate::{Deserialize, Error, Serialize};
pub struct BloomFilter {
m: u64,
b: RoaringBitmap,
}
#[derive(Debug)]
pub struct BloomHash {
pub h: [u64; 7],
}
#[derive(Debug)]
pub struct BloomHashGroup {
pub h1: BloomHash,
pub h2: Option<BloomHash>,
}
const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
0xaf1f2242106c64b3,
0x60ca4cfb4b3ed0ce,
0xc7dbc0bb615e82b3,
0x520ad065378daf88,
);
lazy_static::lazy_static! {
static ref SIPHASHER: siphasher::sip::SipHasher13 =
siphasher::sip::SipHasher13::new_with_keys(0x56205cbdba8f02a6, 0xbd0dbc4bb06d687b);
}
const P: f64 = 0.01;
impl BloomFilter {
pub fn new(items: usize) -> Self {
Self {
m: if items > 0 {
std::cmp::max(Self::estimate_m(items, P), 10240)
} else {
0
},
b: RoaringBitmap::new(),
}
}
fn from_params(m: u64, b: RoaringBitmap) -> Self {
Self { m, b }
}
fn estimate_m(n: usize, p: f64) -> u64 {
(((n as f64) * f64::ln(p) / (-8.0 * LN_2.powi(2))).ceil() as u64) * 8
}
#[allow(dead_code)]
fn estimate_k(m: u64, n: usize) -> u32 {
std::cmp::max(((m as f64) / (n as f64) * f64::ln(2.0f64)).ceil() as u32, 1)
}
pub fn insert(&mut self, hash: &BloomHash) {
self.b.insert((hash.h[0] % self.m) as u32);
self.b.insert((hash.h[1] % self.m) as u32);
self.b.insert((hash.h[2] % self.m) as u32);
self.b.insert((hash.h[3] % self.m) as u32);
self.b.insert((hash.h[4] % self.m) as u32);
self.b.insert((hash.h[5] % self.m) as u32);
self.b.insert((hash.h[6] % self.m) as u32);
}
pub fn contains(&self, hash: &BloomHash) -> bool {
self.b.contains((hash.h[0] % self.m) as u32)
&& self.b.contains((hash.h[1] % self.m) as u32)
&& self.b.contains((hash.h[2] % self.m) as u32)
&& self.b.contains((hash.h[3] % self.m) as u32)
&& self.b.contains((hash.h[4] % self.m) as u32)
&& self.b.contains((hash.h[5] % self.m) as u32)
&& self.b.contains((hash.h[6] % self.m) as u32)
}
pub fn is_subset(&self, other: &Self) -> bool {
self.b.is_subset(&other.b)
}
pub fn is_empty(&self) -> bool {
self.m == 0 || self.b.is_empty()
}
}
pub trait BloomHasher {
fn hash<T: Hash + AsRef<[u8]> + ?Sized>(item: &T) -> Self;
}
impl BloomHash {
pub fn hash<T: Hash + AsRef<[u8]> + ?Sized>(item: &T) -> Self {
let h1 = xxhash_rust::xxh3::xxh3_64(item.as_ref());
let h2 = farmhash::hash64(item.as_ref());
let h3 = AHASHER.hash_one(item);
let mut sh = *SIPHASHER;
sh.write(item.as_ref());
let h4 = sh.finish();
Self {
h: [h1, h2, h3, h4, h1 ^ h2, h2 ^ h3, h3 ^ h4],
}
}
}
pub fn hash_token(item: &str) -> Vec<u8> {
let h1 = xxhash_rust::xxh3::xxh3_64(item.as_ref()).to_le_bytes();
let h2 = farmhash::hash64(item.as_ref()).to_le_bytes();
let h3 = AHASHER.hash_one(item).to_le_bytes();
let mut sh = *SIPHASHER;
sh.write(item.as_ref());
let h4 = sh.finish().to_le_bytes();
match item.len() {
0..=8 => {
let mut hash = Vec::with_capacity(6);
hash.extend_from_slice(&h1[..2]);
hash.extend_from_slice(&h2[..2]);
hash.push(h3[0]);
hash.push(h4[0]);
hash
}
9..=16 => {
let mut hash = Vec::with_capacity(8);
hash.extend_from_slice(&h1[..2]);
hash.extend_from_slice(&h2[..2]);
hash.extend_from_slice(&h3[..2]);
hash.extend_from_slice(&h4[..2]);
hash
}
17..=32 => {
let mut hash = Vec::with_capacity(12);
hash.extend_from_slice(&h1[..3]);
hash.extend_from_slice(&h2[..3]);
hash.extend_from_slice(&h3[..3]);
hash.extend_from_slice(&h4[..3]);
hash
}
_ => {
let mut hash = Vec::with_capacity(16);
hash.extend_from_slice(&h1[..4]);
hash.extend_from_slice(&h2[..4]);
hash.extend_from_slice(&h3[..4]);
hash.extend_from_slice(&h4[..4]);
hash
}
}
}
impl From<&str> for BloomHash {
fn from(s: &str) -> Self {
Self::hash(&s)
}
}
impl From<String> for BloomHash {
fn from(s: String) -> Self {
Self::hash(&s)
}
}
impl From<&String> for BloomHash {
fn from(s: &String) -> Self {
Self::hash(&s)
}
}
impl From<Cow<'_, str>> for BloomHash {
fn from(s: Cow<'_, str>) -> Self {
Self::hash(s.as_ref())
}
}
impl From<Token<Cow<'_, str>>> for BloomHashGroup {
fn from(t: Token<Cow<'_, str>>) -> Self {
Self {
h1: BloomHash::hash(t.word.as_ref()),
h2: None,
}
}
}
impl From<StemmedToken<'_>> for BloomHashGroup {
fn from(t: StemmedToken<'_>) -> Self {
Self {
h1: BloomHash::hash(t.word.as_ref()),
h2: t.stemmed_word.map(|w| BloomHash::hash(&format!("{w}_"))),
}
}
}
impl From<Cow<'_, str>> for BloomHashGroup {
fn from(t: Cow<'_, str>) -> Self {
Self {
h1: BloomHash::hash(t.as_ref()),
h2: None,
}
}
}
impl Serialize for BloomFilter {
fn serialize(self) -> Vec<u8> {
let mut buf = Vec::with_capacity(U64_LEN + self.b.serialized_size());
buf.push_leb128(self.m);
let _ = self.b.serialize_into(&mut buf);
buf
}
}
impl Deserialize for BloomFilter {
fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
let (m, pos) = bytes.read_leb128().ok_or_else(|| {
Error::InternalError(
"Failed to read 'm' value while deserializing bloom filter.".to_string(),
)
})?;
RoaringBitmap::deserialize_unchecked_from(bytes.get(pos..).ok_or_else(|| {
Error::InternalError(
"Failed to read bitmap while deserializing bloom filter.".to_string(),
)
})?)
.map_err(|err| Error::InternalError(format!("Failed to deserialize bloom filter: {err}.")))
.map(|b| Self::from_params(m, b))
}
}

View file

@ -1,250 +0,0 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::{borrow::Cow, collections::HashSet, fmt::Display};
use ahash::AHashSet;
use nlp::{
language::{
detect::{LanguageDetector, MIN_LANGUAGE_SCORE},
stemmer::Stemmer,
Language,
},
tokenizers::{space::SpaceTokenizer, Token},
};
use utils::map::vec_map::VecMap;
use crate::{
query::RawValue,
write::{BatchBuilder, IntoOperations, Operation, ValueClass},
Serialize, HASH_EXACT, HASH_STEMMED,
};
use super::term_index::{TermIndexBuilder, TokenIndex};
pub const MAX_TOKEN_LENGTH: usize = (u8::MAX >> 2) as usize;
pub const MAX_TOKEN_MASK: usize = MAX_TOKEN_LENGTH - 1;
struct Text<'x, T: Into<u8> + Display> {
field: T,
text: Cow<'x, str>,
language: Type,
}
enum Type {
Stem(Language),
Tokenize,
Static,
}
pub struct FtsIndexBuilder<'x, T: Into<u8> + Display> {
parts: Vec<Text<'x, T>>,
default_language: Language,
}
impl<'x, T: Into<u8> + Display> FtsIndexBuilder<'x, T> {
pub fn with_default_language(default_language: Language) -> FtsIndexBuilder<'x, T> {
FtsIndexBuilder {
parts: vec![],
default_language,
}
}
pub fn index(&mut self, field: T, text: impl Into<Cow<'x, str>>, language: Language) {
self.parts.push(Text {
field,
text: text.into(),
language: Type::Stem(language),
});
}
pub fn index_raw(&mut self, field: T, text: impl Into<Cow<'x, str>>) {
self.parts.push(Text {
field,
text: text.into(),
language: Type::Tokenize,
});
}
pub fn index_raw_token(&mut self, field: T, text: impl Into<Cow<'x, str>>) {
self.parts.push(Text {
field,
text: text.into(),
language: Type::Static,
});
}
}
impl<'x, T: Into<u8> + Display> IntoOperations for FtsIndexBuilder<'x, T> {
fn build(self, batch: &mut BatchBuilder) {
let mut detect = LanguageDetector::new();
let mut tokens: VecMap<u8, AHashSet<String>> = VecMap::new();
let mut parts = Vec::new();
for text in self.parts {
match text.language {
Type::Stem(language) => {
let language = if language == Language::Unknown {
detect.detect(&text.text, MIN_LANGUAGE_SCORE)
} else {
language
};
parts.push((text.field, language, text.text));
}
Type::Tokenize => {
let tokens = tokens.get_mut_or_insert(text.field.into());
for token in SpaceTokenizer::new(text.text.as_ref(), MAX_TOKEN_LENGTH) {
tokens.insert(token);
}
}
Type::Static => {
tokens
.get_mut_or_insert(text.field.into())
.insert(text.text.into_owned());
}
}
}
let default_language = detect
.most_frequent_language()
.unwrap_or(self.default_language);
let mut term_index = TermIndexBuilder::new();
let mut ops = AHashSet::new();
for (part_id, (field, language, text)) in parts.into_iter().enumerate() {
let language = if language != Language::Unknown {
language
} else {
default_language
};
let mut terms = Vec::new();
let field: u8 = field.into();
for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH).collect::<Vec<_>>() {
ops.insert(Operation::hash(&token.word, HASH_EXACT, field, true));
if let Some(stemmed_word) = &token.stemmed_word {
ops.insert(Operation::hash(stemmed_word, HASH_STEMMED, field, true));
}
terms.push(term_index.add_stemmed_token(token));
}
if !terms.is_empty() {
term_index.add_terms(field, part_id as u32, terms);
}
}
for (field, tokens) in tokens {
let mut terms = Vec::with_capacity(tokens.len());
for token in tokens {
ops.insert(Operation::hash(&token, HASH_EXACT, field, true));
terms.push(term_index.add_token(Token {
word: token.into(),
from: 0,
to: 0,
}));
}
term_index.add_terms(field, 0, terms);
}
for op in ops {
batch.ops.push(op);
}
batch.ops.push(Operation::Value {
class: ValueClass::Property {
field: u8::MAX,
family: u8::MAX,
},
set: term_index.serialize().into(),
});
}
}
impl TokenIndex {
fn build_index(self, batch: &mut BatchBuilder, set: bool) {
let mut ops = AHashSet::with_capacity(self.tokens.len() * 2);
for term in self.terms {
for (term_ids, is_exact) in [(term.exact_terms, true), (term.stemmed_terms, false)] {
for term_id in term_ids {
if let Some(word) = self.tokens.get(term_id as usize) {
ops.insert(Operation::hash(
word,
if is_exact { HASH_EXACT } else { HASH_STEMMED },
term.field_id,
set,
));
}
}
}
}
for op in ops {
batch.ops.push(op);
}
}
}
impl IntoOperations for TokenIndex {
fn build(self, batch: &mut BatchBuilder) {
self.build_index(batch, false);
batch.ops.push(Operation::Value {
class: ValueClass::Property {
field: u8::MAX,
family: u8::MAX,
},
set: None,
});
}
}
impl IntoOperations for RawValue<TokenIndex> {
fn build(self, batch: &mut BatchBuilder) {
self.inner.build_index(batch, true);
batch.ops.push(Operation::Value {
class: ValueClass::Property {
field: u8::MAX,
family: u8::MAX,
},
set: self.raw.into(),
});
}
}
pub trait ToTokens {
fn to_tokens(&self) -> HashSet<String>;
}
impl ToTokens for &str {
fn to_tokens(&self) -> HashSet<String> {
let mut tokens = HashSet::new();
for token in SpaceTokenizer::new(self, MAX_TOKEN_LENGTH) {
tokens.insert(token);
}
tokens
}
}
impl ToTokens for &String {
fn to_tokens(&self) -> HashSet<String> {
self.as_str().to_tokens()
}
}

View file

@ -0,0 +1,372 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::{borrow::Cow, fmt::Display};
use ahash::{AHashMap, AHashSet};
use nlp::{
language::{
detect::{LanguageDetector, MIN_LANGUAGE_SCORE},
stemmer::Stemmer,
Language,
},
tokenizers::word::WordTokenizer,
};
use crate::{
backend::MAX_TOKEN_LENGTH,
write::{
hash::TokenType, key::KeySerializer, BatchBuilder, BitmapClass, BitmapHash, Operation,
ValueClass,
},
Deserialize, Error, Store, ValueKey, U64_LEN,
};
use super::Field;
#[derive(Debug)]
struct Text<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> {
field: Field<T>,
text: Cow<'x, str>,
typ: Type,
}
#[derive(Debug)]
enum Type {
Text(Language),
Tokenize,
Keyword,
}
#[derive(Debug)]
pub struct FtsDocument<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> {
parts: Vec<Text<'x, T>>,
default_language: Language,
account_id: u32,
collection: u8,
document_id: u32,
}
impl<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> FtsDocument<'x, T> {
pub fn with_default_language(default_language: Language) -> FtsDocument<'x, T> {
FtsDocument {
parts: vec![],
default_language,
account_id: 0,
document_id: 0,
collection: 0,
}
}
pub fn with_account_id(mut self, account_id: u32) -> Self {
self.account_id = account_id;
self
}
pub fn with_document_id(mut self, document_id: u32) -> Self {
self.document_id = document_id;
self
}
pub fn with_collection(mut self, collection: impl Into<u8>) -> Self {
self.collection = collection.into();
self
}
pub fn index(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>, language: Language) {
self.parts.push(Text {
field,
text: text.into(),
typ: Type::Text(language),
});
}
pub fn index_tokenized(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>) {
self.parts.push(Text {
field,
text: text.into(),
typ: Type::Tokenize,
});
}
pub fn index_keyword(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>) {
self.parts.push(Text {
field,
text: text.into(),
typ: Type::Keyword,
});
}
}
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<Field<T>> for u8 {
fn from(value: Field<T>) -> Self {
match value {
Field::Body => 0,
Field::Attachment => 1,
Field::Keyword => 2,
Field::Header(value) => 3 + value.into(),
}
}
}
impl Store {
pub async fn fts_index<T: Into<u8> + Display + Clone + std::fmt::Debug>(
&self,
document: FtsDocument<'_, T>,
) -> crate::Result<()> {
let mut detect = LanguageDetector::new();
let mut tokens: AHashMap<BitmapHash, AHashSet<u8>> = AHashMap::new();
let mut parts = Vec::new();
for text in document.parts {
match text.typ {
Type::Text(language) => {
let language = if language == Language::Unknown {
detect.detect(&text.text, MIN_LANGUAGE_SCORE)
} else {
language
};
parts.push((text.field, language, text.text));
}
Type::Tokenize => {
let field = u8::from(text.field);
for token in WordTokenizer::new(text.text.as_ref(), MAX_TOKEN_LENGTH) {
tokens
.entry(BitmapHash::new(token.word.as_ref()))
.or_default()
.insert(TokenType::word(field));
}
}
Type::Keyword => {
let field = u8::from(text.field);
tokens
.entry(BitmapHash::new(text.text.as_ref()))
.or_default()
.insert(TokenType::word(field));
}
}
}
let default_language = detect
.most_frequent_language()
.unwrap_or(document.default_language);
for (field, language, text) in parts.into_iter() {
let language = if language != Language::Unknown {
language
} else {
default_language
};
let field: u8 = field.into();
let mut last_token = Cow::Borrowed("");
for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH) {
if !last_token.is_empty() {
tokens
.entry(BitmapHash::new(&format!("{} {}", last_token, token.word)))
.or_default()
.insert(TokenType::bigram(field));
}
tokens
.entry(BitmapHash::new(token.word.as_ref()))
.or_default()
.insert(TokenType::word(field));
if let Some(stemmed_word) = token.stemmed_word {
tokens
.entry(BitmapHash::new(stemmed_word.as_ref()))
.or_default()
.insert(TokenType::stemmed(field));
}
last_token = token.word;
}
}
if tokens.is_empty() {
return Ok(());
}
// Serialize tokens
let mut serializer = KeySerializer::new(tokens.len() * U64_LEN * 2);
let mut keys = Vec::with_capacity(tokens.len());
for (hash, fields) in tokens.into_iter() {
serializer = serializer
.write(hash.hash.as_slice())
.write(hash.len)
.write(fields.len() as u8);
for field in fields.into_iter() {
serializer = serializer.write(field);
keys.push(Operation::Bitmap {
class: BitmapClass::Text { field, token: hash },
set: true,
});
}
}
// Write term index
let mut batch = BatchBuilder::new();
batch
.with_account_id(document.account_id)
.with_collection(document.collection)
.update_document(document.document_id)
.set(
ValueClass::TermIndex,
lz4_flex::compress_prepend_size(&serializer.finalize()),
);
self.write(batch.build()).await?;
let mut batch = BatchBuilder::new();
batch
.with_account_id(document.account_id)
.with_collection(document.collection)
.update_document(document.document_id);
for (pos, key) in keys.into_iter().enumerate() {
if pos > 0 && pos & 1023 == 0 {
self.write(batch.build()).await?;
batch = BatchBuilder::new();
batch
.with_account_id(document.account_id)
.with_collection(document.collection)
.update_document(document.document_id);
}
batch.ops.push(key);
}
if !batch.is_empty() {
self.write(batch.build()).await?;
}
Ok(())
}
pub async fn fts_remove(
&self,
account_id: u32,
collection: u8,
document_id: u32,
) -> crate::Result<bool> {
// Obtain term index
let term_index = if let Some(term_index) = self
.get_value::<TermIndex>(ValueKey {
account_id,
collection,
document_id,
class: ValueClass::TermIndex,
})
.await?
{
term_index
} else {
return Ok(false);
};
// Remove keys
let mut batch = BatchBuilder::new();
batch
.with_account_id(account_id)
.with_collection(collection)
.update_document(document_id);
for (pos, key) in term_index.ops.into_iter().enumerate() {
if pos > 0 && pos & 1023 == 0 {
self.write(batch.build()).await?;
batch = BatchBuilder::new();
batch
.with_account_id(account_id)
.with_collection(collection)
.update_document(document_id);
}
batch.ops.push(key);
}
if !batch.is_empty() {
self.write(batch.build()).await?;
}
// Remove term index
let mut batch = BatchBuilder::new();
batch
.with_account_id(account_id)
.with_collection(collection)
.update_document(document_id)
.clear(ValueClass::TermIndex);
self.write(batch.build()).await?;
Ok(true)
}
pub async fn fts_remove_all(&self, _: u32) -> crate::Result<()> {
// No-op
// Term indexes are stored in the same key range as the document
Ok(())
}
}
struct TermIndex {
ops: Vec<Operation>,
}
impl Deserialize for TermIndex {
fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
let bytes = lz4_flex::decompress_size_prepended(bytes)
.map_err(|_| Error::InternalError("Failed to decompress term index".to_string()))?;
let mut ops = Vec::new();
let mut bytes = bytes.iter().peekable();
while bytes.peek().is_some() {
let mut hash = BitmapHash {
hash: [0; 8],
len: 0,
};
for byte in hash.hash.iter_mut() {
*byte = *bytes.next().ok_or(Error::InternalError(
"Unexpected EOF reading term index".to_string(),
))?;
}
hash.len = *bytes.next().ok_or(Error::InternalError(
"Unexpected EOF reading term index".to_string(),
))?;
let num_fields = *bytes.next().ok_or(Error::InternalError(
"Unexpected EOF reading term index".to_string(),
))?;
for _ in 0..num_fields {
let field = *bytes.next().ok_or(Error::InternalError(
"Unexpected EOF reading term index".to_string(),
))?;
ops.push(Operation::Bitmap {
class: BitmapClass::Text { field, token: hash },
set: false,
});
}
}
Ok(Self { ops })
}
}

View file

@ -21,55 +21,188 @@
* for more details.
*/
use crate::{
write::{BitmapFamily, Operation},
BitmapKey, Serialize, BM_HASH,
};
use std::fmt::Display;
use self::{bloom::hash_token, builder::MAX_TOKEN_MASK};
use nlp::language::Language;
pub mod bloom;
pub mod builder;
pub mod index;
pub mod query;
pub mod search_snippet;
pub mod term_index;
impl BitmapKey<Vec<u8>> {
pub fn hash(word: &str, account_id: u32, collection: u8, family: u8, field: u8) -> Self {
BitmapKey {
account_id,
collection,
family: BM_HASH | family | (word.len() & MAX_TOKEN_MASK) as u8,
field,
block_num: 0,
key: hash_token(word),
}
#[derive(Clone, Debug)]
pub enum Field<T: Into<u8> + Display + Clone + std::fmt::Debug> {
Header(T),
Body,
Attachment,
Keyword,
}
pub fn value(
account_id: u32,
collection: impl Into<u8>,
field: impl Into<u8>,
value: impl BitmapFamily + Serialize,
#[derive(Debug)]
pub enum FtsFilter<T: Into<u8> + Display + Clone + std::fmt::Debug> {
Exact {
field: Field<T>,
text: String,
language: Language,
},
Contains {
field: Field<T>,
text: String,
language: Language,
},
Keyword {
field: Field<T>,
text: String,
},
And,
Or,
Not,
End,
}
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> FtsFilter<T> {
pub fn has_text_detect(
field: Field<T>,
text: impl Into<String>,
default_language: Language,
) -> Self {
BitmapKey {
account_id,
collection: collection.into(),
family: value.family(),
field: field.into(),
block_num: 0,
key: value.serialize(),
let (text, language) = Language::detect(text.into(), default_language);
Self::has_text(field, text, language)
}
pub fn has_text(field: Field<T>, text: impl Into<String>, language: Language) -> Self {
let text = text.into();
if !matches!(language, Language::None) && (text.starts_with('"') && text.ends_with('"'))
|| (text.starts_with('\'') && text.ends_with('\''))
{
FtsFilter::Exact {
field,
text,
language,
}
} else {
FtsFilter::Contains {
field,
text,
language,
}
}
}
impl Operation {
pub fn hash(word: &str, family: u8, field: u8, set: bool) -> Self {
Operation::Bitmap {
family: BM_HASH | family | (word.len() & MAX_TOKEN_MASK) as u8,
pub fn has_keyword(field: Field<T>, text: impl Into<String>) -> Self {
FtsFilter::Keyword {
field,
key: hash_token(word),
set,
text: text.into(),
}
}
pub fn has_english_text(field: Field<T>, text: impl Into<String>) -> Self {
Self::has_text(field, text, Language::English)
}
}
#[derive(Clone, Copy)]
pub enum FilterType {
And,
Or,
Not,
End,
Store,
Fts,
}
pub enum FilterGroup<T: FilterItem> {
Fts(Vec<T>),
Store(T),
}
pub trait FilterItem: Clone {
fn filter_type(&self) -> FilterType;
}
pub trait IntoFilterGroup<T: FilterItem + From<FilterType>> {
fn into_filter_group(self) -> Vec<FilterGroup<T>>;
}
impl<T: FilterItem + From<FilterType>> IntoFilterGroup<T> for Vec<T> {
fn into_filter_group(self) -> Vec<FilterGroup<T>> {
let mut filter = Vec::with_capacity(self.len());
let mut iter = self.into_iter();
let mut logical_op = None;
while let Some(item) = iter.next() {
if matches!(item.filter_type(), FilterType::Fts) {
let mut store_item = None;
let mut depth = 0;
let mut fts = Vec::with_capacity(5);
// Add the logical operator if there is one
let in_logical_op = if let Some(op) = logical_op.take() {
fts.push(op);
true
} else {
false
};
fts.push(item);
for item in iter.by_ref() {
match item.filter_type() {
FilterType::And | FilterType::Or | FilterType::Not => {
depth += 1;
fts.push(item);
}
FilterType::End if depth > 0 => {
depth -= 1;
fts.push(item);
}
FilterType::Fts => {
fts.push(item);
}
_ => {
store_item = Some(item);
break;
}
}
}
if in_logical_op {
fts.push(T::from(FilterType::End));
}
if depth > 0 {
let mut store = Vec::with_capacity(depth * 2);
while depth > 0 {
let item = fts.pop().unwrap();
if matches!(
item.filter_type(),
FilterType::And | FilterType::Or | FilterType::Not
) {
depth -= 1;
}
store.push(FilterGroup::Store(item));
}
filter.push(FilterGroup::Fts(fts));
filter.extend(store);
} else {
filter.push(FilterGroup::Fts(fts));
}
if let Some(item) = store_item {
filter.push(FilterGroup::Store(item));
}
} else {
match item.filter_type() {
FilterType::And | FilterType::Or => {
logical_op = Some(item.clone());
}
FilterType::Not => {
logical_op = Some(T::from(FilterType::And));
}
_ => {}
}
filter.push(FilterGroup::Store(item));
}
}
filter
}
}

View file

@ -21,138 +21,210 @@
* for more details.
*/
use std::ops::BitOrAssign;
use nlp::language::{stemmer::Stemmer, Language};
use roaring::RoaringBitmap;
use crate::{fts::builder::MAX_TOKEN_LENGTH, BitmapKey, ValueKey, HASH_EXACT, HASH_STEMMED};
use super::term_index::TermIndex;
#[async_trait::async_trait]
pub trait StoreFts: StoreRead {
async fn fts_query(
&mut self,
account_id: u32,
collection: u8,
field: u8,
text: &str,
language: Language,
match_phrase: bool,
) -> crate::Result<Option<RoaringBitmap>> {
if match_phrase {
let mut phrase = Vec::new();
let mut bit_keys = Vec::new();
for token in language.tokenize_text(text, MAX_TOKEN_LENGTH) {
let key = BitmapKey::hash(
token.word.as_ref(),
account_id,
collection,
HASH_EXACT,
field,
);
if !bit_keys.contains(&key) {
bit_keys.push(key);
}
phrase.push(token.word);
}
let bitmaps = match self.get_bitmaps_intersection(bit_keys).await? {
Some(b) if !b.is_empty() => b,
_ => return Ok(None),
use std::{
fmt::Display,
ops::{BitAndAssign, BitOrAssign, BitXorAssign},
};
match phrase.len() {
0 => return Ok(None),
1 => return Ok(Some(bitmaps)),
_ => (),
use nlp::language::stemmer::Stemmer;
use roaring::RoaringBitmap;
use crate::{backend::MAX_TOKEN_LENGTH, fts::FtsFilter, write::BitmapClass, BitmapKey, Store};
struct State<T: Into<u8> + Display + Clone + std::fmt::Debug> {
pub op: FtsFilter<T>,
pub bm: Option<RoaringBitmap>,
}
let mut results = RoaringBitmap::new();
for document_id in bitmaps {
if let Some(term_index) = self
.get_value::<TermIndex>(ValueKey::term_index(
impl Store {
pub async fn fts_query<T: Into<u8> + Display + Clone + std::fmt::Debug>(
&self,
account_id: u32,
collection: impl Into<u8>,
filters: Vec<FtsFilter<T>>,
) -> crate::Result<RoaringBitmap> {
let collection = collection.into();
let mut not_mask = RoaringBitmap::new();
let mut not_fetch = false;
let mut state: State<T> = FtsFilter::And.into();
let mut stack = Vec::new();
let mut filters = filters.into_iter().peekable();
while let Some(filter) = filters.next() {
let mut result = match filter {
FtsFilter::Exact {
field,
text,
language,
} => {
let field: u8 = field.clone().into();
let tokens = language
.tokenize_text(text.as_ref(), MAX_TOKEN_LENGTH)
.map(|t| t.word)
.collect::<Vec<_>>();
let keys = if tokens.len() > 1 {
tokens
.windows(2)
.map(|bg| BitmapKey {
account_id,
collection,
document_id,
))
.await?
{
if term_index
.match_terms(
&phrase
.iter()
.map(|w| term_index.get_match_term(w, None))
.collect::<Vec<_>>(),
field.into(),
true,
false,
false,
)
.map_err(|e| {
crate::Error::InternalError(format!(
"TermIndex match_terms failed for {account_id}/{collection}/{document_id}: {e:?}"
))
})?
.is_some()
{
results.insert(document_id);
}
class: BitmapClass::bigram(format!("{} {}", bg[0], bg[1]), field),
block_num: 0,
})
.collect::<Vec<_>>()
} else {
tracing::debug!(
event = "error",
context = "fts_query",
account_id = account_id,
collection = collection,
document_id = document_id,
"Document is missing a term index",
);
}
}
tokens
.into_iter()
.map(|word| BitmapKey {
account_id,
collection,
class: BitmapClass::word(word.as_ref(), field),
block_num: 0,
})
.collect::<Vec<_>>()
};
if !results.is_empty() {
Ok(Some(results))
} else {
Ok(None)
self.get_bitmaps_intersection(keys).await?
}
} else {
let mut bitmaps = RoaringBitmap::new();
FtsFilter::Contains {
field,
text,
language,
} => {
let mut result = RoaringBitmap::new();
let field: u8 = field.clone().into();
for token in Stemmer::new(text, language, MAX_TOKEN_LENGTH) {
let token1 =
BitmapKey::hash(&token.word, account_id, collection, HASH_EXACT, field);
let token2 = if let Some(stemmed_word) = token.stemmed_word {
BitmapKey::hash(&stemmed_word, account_id, collection, HASH_STEMMED, field)
for token in Stemmer::new(text.as_ref(), language, MAX_TOKEN_LENGTH) {
let token1 = BitmapKey {
account_id,
collection,
class: BitmapClass::word(token.word.as_ref(), field),
block_num: 0,
};
let token2 = BitmapKey {
account_id,
collection,
class: BitmapClass::stemmed(
if let Some(stemmed_word) = token.stemmed_word {
stemmed_word
} else {
let mut token2 = token1.clone();
token2.family &= !HASH_EXACT;
token2.family |= HASH_STEMMED;
token2
token.word
}
.as_ref(),
field,
),
block_num: 0,
};
match self.get_bitmaps_union(vec![token1, token2]).await? {
Some(b) if !b.is_empty() => {
if !bitmaps.is_empty() {
bitmaps &= b;
if bitmaps.is_empty() {
return Ok(None);
if !result.is_empty() {
result &= b;
if result.is_empty() {
break;
}
} else {
bitmaps = b;
result = b;
}
}
_ => break,
}
}
if !result.is_empty() {
Some(result)
} else {
None
}
}
FtsFilter::Keyword { field, text } => {
self.get_bitmap(BitmapKey {
account_id,
collection,
class: BitmapClass::word(text, field),
block_num: 0,
})
.await?
}
op @ (FtsFilter::And | FtsFilter::Or | FtsFilter::Not) => {
stack.push(state);
state = op.into();
continue;
}
FtsFilter::End => {
if let Some(prev_state) = stack.pop() {
let bm = state.bm;
state = prev_state;
bm
} else {
break;
}
}
_ => return Ok(None),
};
// Only fetch not mask if we need it
if matches!(state.op, FtsFilter::Not) && !not_fetch {
not_mask = self
.get_bitmap(BitmapKey::document_ids(account_id, collection))
.await?
.unwrap_or_else(RoaringBitmap::new);
not_fetch = true;
}
Ok(Some(bitmaps))
// Apply logical operation
if let Some(dest) = &mut state.bm {
match state.op {
FtsFilter::And => {
if let Some(result) = result {
dest.bitand_assign(result);
} else {
dest.clear();
}
}
FtsFilter::Or => {
if let Some(result) = result {
dest.bitor_assign(result);
}
}
FtsFilter::Not => {
if let Some(mut result) = result {
result.bitxor_assign(&not_mask);
dest.bitand_assign(result);
}
}
_ => unreachable!(),
}
} else if let Some(ref mut result_) = result {
if let FtsFilter::Not = state.op {
result_.bitxor_assign(&not_mask);
}
state.bm = result;
} else if let FtsFilter::Not = state.op {
state.bm = Some(not_mask.clone());
} else {
state.bm = Some(RoaringBitmap::new());
}
// And short circuit
if matches!(state.op, FtsFilter::And) && state.bm.as_ref().unwrap().is_empty() {
while let Some(filter) = filters.peek() {
if matches!(filter, FtsFilter::End) {
break;
} else {
filters.next();
}
}
}
}
async fn get_bitmaps_union<T: AsRef<[u8]> + Sync + Send>(
Ok(state.bm.unwrap_or_default())
}
async fn get_bitmaps_union(
&self,
keys: Vec<BitmapKey<T>>,
keys: Vec<BitmapKey<BitmapClass>>,
) -> crate::Result<Option<RoaringBitmap>> {
let mut bm = RoaringBitmap::new();
@ -165,3 +237,12 @@ pub trait StoreFts: StoreRead {
Ok(if !bm.is_empty() { Some(bm) } else { None })
}
}
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<FtsFilter<T>> for State<T> {
fn from(value: FtsFilter<T>) -> Self {
Self {
op: value,
bm: None,
}
}
}

View file

@ -23,7 +23,7 @@
use std::{borrow::Cow, convert::TryInto};
use crate::{Deserialize, Serialize};
use crate::{Deserialize, Serialize, U32_LEN, U64_LEN};
use ahash::{AHashMap, AHashSet};
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};

View file

@ -24,8 +24,8 @@
use std::{fmt::Display, sync::Arc};
pub mod backend;
//pub mod fts;
pub mod dispatch;
pub mod fts;
pub mod query;
pub mod write;
@ -37,11 +37,6 @@ pub use rand;
pub use roaring;
use write::{BitmapClass, BlobOp, ValueClass};
#[cfg(feature = "rocks")]
pub struct Store {
db: rocksdb::OptimisticTransactionDB<rocksdb::MultiThreaded>,
}
pub trait Deserialize: Sized + Sync + Send {
fn deserialize(bytes: &[u8]) -> crate::Result<Self>;
}
@ -103,9 +98,9 @@ pub struct LogKey {
pub change_id: u64,
}
const BLOB_HASH_LEN: usize = 32;
const U64_LEN: usize = std::mem::size_of::<u64>();
const U32_LEN: usize = std::mem::size_of::<u32>();
pub const BLOB_HASH_LEN: usize = 32;
pub const U64_LEN: usize = std::mem::size_of::<u64>();
pub const U32_LEN: usize = std::mem::size_of::<u32>();
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
pub struct BlobHash([u8; BLOB_HASH_LEN]);
@ -158,6 +153,7 @@ pub const SUBSPACE_VALUES: u8 = b'v';
pub const SUBSPACE_LOGS: u8 = b'l';
pub const SUBSPACE_INDEXES: u8 = b'i';
pub const SUBSPACE_BLOBS: u8 = b'o';
pub const SUBSPACE_BLOB_DATA: u8 = b't';
pub const SUBSPACE_ACLS: u8 = b'a';
pub const SUBSPACE_COUNTERS: u8 = b'c';
@ -179,6 +175,13 @@ pub enum Store {
pub enum BlobStore {
Fs(Arc<FsStore>),
S3(Arc<S3Store>),
Sqlite(Arc<SqliteStore>),
FoundationDb(Arc<FdbStore>),
}
#[derive(Clone)]
pub enum FtsStore {
Store(Store),
}
impl From<SqliteStore> for Store {
@ -204,3 +207,9 @@ impl From<S3Store> for BlobStore {
Self::S3(Arc::new(store))
}
}
impl From<Store> for FtsStore {
fn from(store: Store) -> Self {
Self::Store(store)
}
}

View file

@ -24,7 +24,7 @@
use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign};
use ahash::HashSet;
use nlp::tokenizers::space::SpaceTokenizer;
use nlp::tokenizers::word::WordTokenizer;
use roaring::RoaringBitmap;
use crate::{backend::MAX_TOKEN_LENGTH, BitmapKey, Store};
@ -32,8 +32,8 @@ use crate::{backend::MAX_TOKEN_LENGTH, BitmapKey, Store};
use super::{Filter, ResultSet};
struct State {
op: Filter,
bm: Option<RoaringBitmap>,
pub op: Filter,
pub bm: Option<RoaringBitmap>,
}
impl Store {
@ -44,8 +44,6 @@ impl Store {
filters: Vec<Filter>,
) -> crate::Result<ResultSet> {
let collection = collection.into();
let mut not_mask = RoaringBitmap::new();
let mut not_fetch = false;
if filters.is_empty() {
return Ok(ResultSet {
account_id,
@ -61,10 +59,13 @@ impl Store {
let mut stack = Vec::new();
let mut filters = filters.into_iter().peekable();
let mut not_mask = RoaringBitmap::new();
let mut not_fetch = false;
while let Some(filter) = filters.next() {
let result = match filter {
let mut result = match filter {
Filter::MatchValue { field, op, value } => {
self.range_to_bitmap(account_id, collection, field, value, op)
self.range_to_bitmap(account_id, collection, field, &value, op)
.await?
}
Filter::HasText {
@ -74,7 +75,8 @@ impl Store {
} => {
if tokenize {
self.get_bitmaps_intersection(
SpaceTokenizer::new(&text, MAX_TOKEN_LENGTH)
WordTokenizer::new(&text, MAX_TOKEN_LENGTH)
.map(|token| token.word.into_owned())
.collect::<HashSet<String>>()
.into_iter()
.map(|word| {
@ -114,6 +116,7 @@ impl Store {
}
};
// Only fetch not mask if we need it
if matches!(state.op, Filter::Not) && !not_fetch {
not_mask = self
.get_bitmap(BitmapKey::document_ids(account_id, collection))
@ -122,8 +125,41 @@ impl Store {
not_fetch = true;
}
state.op.apply(&mut state.bm, result, &not_mask);
// Apply logical operation
if let Some(dest) = &mut state.bm {
match state.op {
Filter::And => {
if let Some(result) = result {
dest.bitand_assign(result);
} else {
dest.clear();
}
}
Filter::Or => {
if let Some(result) = result {
dest.bitor_assign(result);
}
}
Filter::Not => {
if let Some(mut result) = result {
result.bitxor_assign(&not_mask);
dest.bitand_assign(result);
}
}
_ => unreachable!(),
}
} else if let Some(ref mut result_) = result {
if let Filter::Not = state.op {
result_.bitxor_assign(&not_mask);
}
state.bm = result;
} else if let Filter::Not = state.op {
state.bm = Some(not_mask.clone());
} else {
state.bm = Some(RoaringBitmap::new());
}
// And short-circuit
if matches!(state.op, Filter::And) && state.bm.as_ref().unwrap().is_empty() {
while let Some(filter) = filters.peek() {
if matches!(filter, Filter::End) {
@ -143,49 +179,6 @@ impl Store {
}
}
impl Filter {
#[inline(always)]
pub fn apply(
&self,
dest: &mut Option<RoaringBitmap>,
mut src: Option<RoaringBitmap>,
not_mask: &RoaringBitmap,
) {
if let Some(dest) = dest {
match self {
Filter::And => {
if let Some(src) = src {
dest.bitand_assign(src);
} else {
dest.clear();
}
}
Filter::Or => {
if let Some(src) = src {
dest.bitor_assign(src);
}
}
Filter::Not => {
if let Some(mut src) = src {
src.bitxor_assign(not_mask);
dest.bitand_assign(src);
}
}
_ => unreachable!(),
}
} else if let Some(ref mut src_) = src {
if let Filter::Not = self {
src_.bitxor_assign(not_mask);
}
*dest = src;
} else if let Filter::Not = self {
*dest = Some(not_mask.clone());
} else {
*dest = Some(RoaringBitmap::new());
}
}
}
impl From<Filter> for State {
fn from(value: Filter) -> Self {
Self {

View file

@ -130,12 +130,12 @@ impl Store {
let from_key = LogKey {
account_id,
collection,
change_id: u64::MAX,
change_id: 0,
};
let to_key = LogKey {
account_id,
collection,
change_id: 0,
change_id: u64::MAX,
};
let mut last_change_id = None;

View file

@ -29,7 +29,7 @@ pub mod sort;
use roaring::RoaringBitmap;
use crate::{
write::{BitmapClass, TagValue},
write::{BitmapClass, BitmapHash, TagValue},
BitmapKey, IterateParams, Key, Serialize,
};
@ -144,48 +144,6 @@ impl Filter {
}
}
/*pub fn has_text_detect(
field: impl Into<u8>,
text: impl Into<String>,
default_language: Language,
) -> Self {
let (text, language) = Language::detect(text.into(), default_language);
Self::has_text(field, text, language)
}
pub fn has_text(field: impl Into<u8>, text: impl Into<String>, language: Language) -> Self {
let text = text.into();
let op = if !matches!(language, Language::None) {
if (text.starts_with('"') && text.ends_with('"'))
|| (text.starts_with('\'') && text.ends_with('\''))
{
TextMatch::Exact(language)
} else {
TextMatch::Stemmed(language)
}
} else {
TextMatch::Tokenized
};
Filter::HasText {
field: field.into(),
text,
op,
}
}
pub fn has_raw_text(field: impl Into<u8>, text: impl Into<String>) -> Self {
Filter::HasText {
field: field.into(),
text: text.into(),
op: TextMatch::Raw,
}
}
pub fn has_english_text(field: impl Into<u8>, text: impl Into<String>) -> Self {
Self::has_text(field, text, Language::English)
}*/
pub fn has_text(field: impl Into<u8>, text: impl Into<String>) -> Self {
Filter::HasText {
field: field.into(),
@ -255,14 +213,14 @@ impl BitmapKey<BitmapClass> {
account_id: u32,
collection: impl Into<u8>,
field: impl Into<u8>,
token: impl Into<Vec<u8>>,
token: impl AsRef<[u8]>,
) -> Self {
BitmapKey {
account_id,
collection: collection.into(),
class: BitmapClass::Text {
field: field.into(),
token: token.into(),
token: BitmapHash::new(token),
},
block_num: 0,
}
@ -317,20 +275,3 @@ impl<T: Key> IterateParams<T> {
self
}
}
/*
#[derive(Debug)]
pub struct RawValue<T: Deserialize> {
pub raw: Vec<u8>,
pub inner: T,
}
impl<T: Deserialize> Deserialize for RawValue<T> {
fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
Ok(RawValue {
inner: T::deserialize(bytes)?,
raw: bytes.to_vec(),
})
}
}
*/

View file

@ -160,10 +160,10 @@ impl BatchBuilder {
self
}
pub fn set(&mut self, class: impl Into<ValueClass>, value: Vec<u8>) -> &mut Self {
pub fn set(&mut self, class: impl Into<ValueClass>, value: impl Into<Vec<u8>>) -> &mut Self {
self.ops.push(Operation::Value {
class: class.into(),
op: ValueOp::Set(value),
op: ValueOp::Set(value.into()),
});
self
}

View file

@ -0,0 +1,158 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of the Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use crate::backend::MAX_TOKEN_LENGTH;
use super::{BitmapClass, BitmapHash};
impl BitmapClass {
pub fn word(token: impl AsRef<[u8]>, field: impl Into<u8>) -> Self {
BitmapClass::Text {
field: field.into(),
token: BitmapHash::new(token),
}
}
pub fn stemmed(token: impl AsRef<[u8]>, field: impl Into<u8>) -> Self {
BitmapClass::Text {
field: field.into() | 1 << 6,
token: BitmapHash::new(token),
}
}
pub fn bigram(token: impl AsRef<[u8]>, field: impl Into<u8>) -> Self {
BitmapClass::Text {
field: field.into() | 1 << 7,
token: BitmapHash::new(token),
}
}
}
impl BitmapHash {
pub fn new(item: impl AsRef<[u8]>) -> Self {
Self {
len: std::cmp::min(item.as_ref().len(), MAX_TOKEN_LENGTH) as u8,
hash: hash(item),
}
}
pub fn to_u64(&self) -> u64 {
u64::from_be_bytes(self.hash)
}
}
fn hash(item: impl AsRef<[u8]>) -> [u8; 8] {
let item = item.as_ref();
let mut result = [0u8; 8];
if item.len() <= 8 {
result[..item.len()].copy_from_slice(item);
} else {
result[..4].copy_from_slice(&xxhash_rust::xxh3::xxh3_64(item).to_le_bytes()[..4]);
result[4..8].copy_from_slice(&farmhash::hash64(item).to_le_bytes()[..4]);
}
result
}
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
pub struct TokenType {}
impl TokenType {
pub fn word(field: u8) -> u8 {
field
}
pub fn stemmed(field: u8) -> u8 {
1 << 6 | field
}
pub fn bigram(field: u8) -> u8 {
1 << 7 | field
}
}
/*
const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
0xaf1f2242106c64b3,
0x60ca4cfb4b3ed0ce,
0xc7dbc0bb615e82b3,
0x520ad065378daf88,
);
lazy_static::lazy_static! {
static ref SIPHASHER: siphasher::sip::SipHasher13 =
siphasher::sip::SipHasher13::new_with_keys(0x56205cbdba8f02a6, 0xbd0dbc4bb06d687b);
}
let h1 = xxhash_rust::xxh3::xxh3_64(item).to_le_bytes();
let h2 = farmhash::hash64(item).to_le_bytes();
let h3 = AHASHER.hash_one(item).to_le_bytes();
let mut sh = *SIPHASHER;
sh.write(item.as_ref());
let h4 = sh.finish().to_le_bytes();
result[..2].copy_from_slice(&h1[..2]);
result[2..4].copy_from_slice(&h2[..2]);
result[4..6].copy_from_slice(&h3[..2]);
result[6..8].copy_from_slice(&h4[..2]);
impl KeySerializer {
pub fn hash_text(mut self, item: impl AsRef<[u8]>) -> Self {
let item = item.as_ref();
if item.len() <= 8 {
self.buf.extend_from_slice(item);
} else {
let h1 = xxhash_rust::xxh3::xxh3_64(item).to_le_bytes();
let h2 = farmhash::hash64(item).to_le_bytes();
let h3 = AHASHER.hash_one(item).to_le_bytes();
let mut sh = *SIPHASHER;
sh.write(item.as_ref());
let h4 = sh.finish().to_le_bytes();
match item.len() {
9..=16 => {
self.buf.extend_from_slice(&h1[..2]);
self.buf.extend_from_slice(&h2[..2]);
self.buf.extend_from_slice(&h3[..2]);
self.buf.extend_from_slice(&h4[..2]);
}
17..=32 => {
self.buf.extend_from_slice(&h1[..3]);
self.buf.extend_from_slice(&h2[..3]);
self.buf.extend_from_slice(&h3[..3]);
self.buf.extend_from_slice(&h4[..3]);
}
_ => {
self.buf.extend_from_slice(&h1[..4]);
self.buf.extend_from_slice(&h2[..4]);
self.buf.extend_from_slice(&h3[..4]);
self.buf.extend_from_slice(&h4[..4]);
}
}
}
self
}
}
*/

View file

@ -21,19 +21,19 @@
* for more details.
*/
use std::{convert::TryInto, hash::Hasher};
use std::convert::TryInto;
use utils::codec::leb128::Leb128_;
use crate::{
backend::MAX_TOKEN_MASK, BitmapKey, BlobHash, BlobKey, IndexKey, IndexKeyPrefix, Key, LogKey,
ValueKey, BLOB_HASH_LEN, SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_INDEXES, SUBSPACE_LOGS,
SUBSPACE_VALUES, U32_LEN, U64_LEN,
BitmapKey, BlobHash, BlobKey, IndexKey, IndexKeyPrefix, Key, LogKey, ValueKey, BLOB_HASH_LEN,
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_INDEXES, SUBSPACE_LOGS, SUBSPACE_VALUES, U32_LEN,
U64_LEN,
};
use super::{BitmapClass, BlobOp, TagValue, ValueClass};
pub struct KeySerializer {
buf: Vec<u8>,
pub buf: Vec<u8>,
}
pub trait KeySerialize {
@ -241,6 +241,15 @@ impl<T: AsRef<ValueClass> + Sync + Send> Key for ValueKey<T> {
}
.write(u32::MAX)
.write(name.as_slice()),
ValueClass::TermIndex => if include_subspace {
KeySerializer::new(U32_LEN * 2 + 3).write(crate::SUBSPACE_VALUES)
} else {
KeySerializer::new(U32_LEN * 2 + 2)
}
.write(self.account_id)
.write(self.collection)
.write_leb128(self.document_id)
.write(u8::MAX),
}
.finalize()
}
@ -277,35 +286,64 @@ impl<T: AsRef<BitmapClass> + Sync + Send> Key for BitmapKey<T> {
fn serialize(&self, include_subspace: bool) -> Vec<u8> {
const BM_DOCUMENT_IDS: u8 = 0;
const BM_TAG: u8 = 1 << 5;
const BM_TEXT: u8 = 1 << 6;
const BM_TAG: u8 = 1 << 6;
const BM_TEXT: u8 = 1 << 7;
const TAG_ID: u8 = 0;
const TAG_TEXT: u8 = 1 << 0;
const TAG_STATIC: u8 = 1 << 1;
let ks = if include_subspace {
KeySerializer::new(self.len() + 1).write(crate::SUBSPACE_BITMAPS)
match self.class.as_ref() {
BitmapClass::DocumentIds => if include_subspace {
KeySerializer::new(U32_LEN + 3).write(SUBSPACE_BITMAPS)
} else {
KeySerializer::new(self.len())
KeySerializer::new(U32_LEN + 2)
}
.write(self.account_id)
.write(self.collection);
match self.class.as_ref() {
BitmapClass::DocumentIds => ks.write(BM_DOCUMENT_IDS),
.write(self.collection)
.write(BM_DOCUMENT_IDS),
BitmapClass::Tag { field, value } => match value {
TagValue::Id(id) => ks.write(BM_TAG | TAG_ID).write(*field).write_leb128(*id),
TagValue::Text(text) => ks
TagValue::Id(id) => if include_subspace {
KeySerializer::new((U32_LEN * 2) + 4).write(SUBSPACE_BITMAPS)
} else {
KeySerializer::new((U32_LEN * 2) + 3)
}
.write(self.account_id)
.write(self.collection)
.write(BM_TAG | TAG_ID)
.write(*field)
.write_leb128(*id),
TagValue::Text(text) => if include_subspace {
KeySerializer::new(U32_LEN + 4 + text.len()).write(SUBSPACE_BITMAPS)
} else {
KeySerializer::new(U32_LEN + 3 + text.len())
}
.write(self.account_id)
.write(self.collection)
.write(BM_TAG | TAG_TEXT)
.write(*field)
.write(text.as_slice()),
TagValue::Static(id) => ks.write(BM_TAG | TAG_STATIC).write(*field).write(*id),
},
BitmapClass::Text { field, token } => ks
.write(BM_TEXT | (token.len() & MAX_TOKEN_MASK) as u8)
TagValue::Static(id) => if include_subspace {
KeySerializer::new(U32_LEN + 5).write(SUBSPACE_BITMAPS)
} else {
KeySerializer::new(U32_LEN + 4)
}
.write(self.account_id)
.write(self.collection)
.write(BM_TAG | TAG_STATIC)
.write(*field)
.hash_text(token),
.write(*id),
},
BitmapClass::Text { field, token } => if include_subspace {
KeySerializer::new(U32_LEN + 16 + 3 + 1).write(SUBSPACE_BITMAPS)
} else {
KeySerializer::new(U32_LEN + 16 + 3)
}
.write(self.account_id)
.write(self.collection)
.write(BM_TEXT | token.len)
.write(*field)
.write(token.hash.as_slice()),
}
.write(self.block_num)
.finalize()
@ -349,81 +387,3 @@ impl<T: AsRef<BlobHash> + Sync + Send> Key for BlobKey<T> {
crate::SUBSPACE_BLOBS
}
}
const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
0xaf1f2242106c64b3,
0x60ca4cfb4b3ed0ce,
0xc7dbc0bb615e82b3,
0x520ad065378daf88,
);
lazy_static::lazy_static! {
static ref SIPHASHER: siphasher::sip::SipHasher13 =
siphasher::sip::SipHasher13::new_with_keys(0x56205cbdba8f02a6, 0xbd0dbc4bb06d687b);
}
impl KeySerializer {
fn hash_text(mut self, item: impl AsRef<[u8]>) -> Self {
let item = item.as_ref();
if item.len() <= 8 {
self.buf.extend_from_slice(item);
} else {
let h1 = xxhash_rust::xxh3::xxh3_64(item).to_le_bytes();
let h2 = farmhash::hash64(item).to_le_bytes();
let h3 = AHASHER.hash_one(item).to_le_bytes();
let mut sh = *SIPHASHER;
sh.write(item.as_ref());
let h4 = sh.finish().to_le_bytes();
match item.len() {
9..=16 => {
self.buf.extend_from_slice(&h1[..2]);
self.buf.extend_from_slice(&h2[..2]);
self.buf.extend_from_slice(&h3[..2]);
self.buf.extend_from_slice(&h4[..2]);
}
17..=32 => {
self.buf.extend_from_slice(&h1[..3]);
self.buf.extend_from_slice(&h2[..3]);
self.buf.extend_from_slice(&h3[..3]);
self.buf.extend_from_slice(&h4[..3]);
}
_ => {
self.buf.extend_from_slice(&h1[..4]);
self.buf.extend_from_slice(&h2[..4]);
self.buf.extend_from_slice(&h3[..4]);
self.buf.extend_from_slice(&h4[..4]);
}
}
}
self
}
}
impl<T: AsRef<BitmapClass>> BitmapKey<T> {
#[allow(clippy::len_without_is_empty)]
pub fn len(&self) -> usize {
std::mem::size_of::<BitmapKey<BitmapClass>>()
+ match self.class.as_ref() {
BitmapClass::DocumentIds => 0,
BitmapClass::Tag { value, .. } => match value {
TagValue::Id(_) => U32_LEN,
TagValue::Text(v) => v.len(),
TagValue::Static(_) => 1,
},
BitmapClass::Text { token, .. } => token.len(),
}
}
}
impl<T: AsRef<ValueClass>> ValueKey<T> {
#[allow(clippy::len_without_is_empty)]
pub fn len(&self) -> usize {
std::mem::size_of::<ValueKey<ValueClass>>()
+ match self.class.as_ref() {
ValueClass::Property(_) => 1,
ValueClass::Acl(_) => U32_LEN,
ValueClass::Named(v) => v.len(),
}
}
}

View file

@ -23,7 +23,7 @@
use std::{collections::HashSet, hash::Hash, slice::Iter, time::SystemTime};
use nlp::tokenizers::space::SpaceTokenizer;
use nlp::tokenizers::word::WordTokenizer;
use utils::codec::leb128::{Leb128Iterator, Leb128Vec};
use crate::{
@ -35,6 +35,7 @@ use self::assert::AssertValue;
pub mod assert;
pub mod batch;
pub mod blob;
pub mod hash;
pub mod key;
pub mod log;
@ -92,14 +93,20 @@ pub enum Operation {
},
}
#[derive(Debug, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum BitmapClass {
DocumentIds,
Tag { field: u8, value: TagValue },
Text { field: u8, token: Vec<u8> },
Text { field: u8, token: BitmapHash },
}
#[derive(Debug, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct BitmapHash {
pub hash: [u8; 8],
pub len: u8,
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum TagValue {
Id(u32),
Text(Vec<u8>),
@ -111,6 +118,7 @@ pub enum ValueClass {
Property(u8),
Acl(u32),
Named(Vec<u8>),
TermIndex,
}
#[derive(Debug, PartialEq, Eq, Hash, Default)]
@ -352,7 +360,7 @@ impl ToBitmaps for &str {
ops.push(Operation::Bitmap {
class: BitmapClass::Text {
field,
token: token.into_bytes(),
token: BitmapHash::new(token),
},
set,
});
@ -362,8 +370,8 @@ impl ToBitmaps for &str {
impl TokenizeText for &str {
fn tokenize_into(&self, tokens: &mut HashSet<String>) {
for token in SpaceTokenizer::new(self, MAX_TOKEN_LENGTH) {
tokens.insert(token);
for token in WordTokenizer::new(self, MAX_TOKEN_LENGTH) {
tokens.insert(token.word.into_owned());
}
}
@ -479,6 +487,10 @@ impl BlobHash {
pub fn try_from_hash_slice(value: &[u8]) -> Result<BlobHash, std::array::TryFromSliceError> {
value.try_into().map(BlobHash)
}
pub fn as_slice(&self) -> &[u8] {
self.0.as_ref()
}
}
impl From<&[u8]> for BlobHash {
@ -523,6 +535,12 @@ impl AsRef<BlobClass> for BlobClass {
}
}
impl From<BlobHash> for Vec<u8> {
fn from(value: BlobHash) -> Self {
value.0.to_vec()
}
}
impl BlobClass {
pub fn account_id(&self) -> u32 {
match self {

View file

@ -23,6 +23,7 @@ opentelemetry-semantic-conventions = { version = "0.12.0" }
dashmap = "5.4"
ahash = { version = "0.8" }
chrono = "0.4"
rand = "0.8.5"
[target.'cfg(unix)'.dependencies]
privdrop = "0.5.3"

View file

@ -30,6 +30,7 @@ pub mod config;
pub mod ipc;
pub mod listener;
pub mod map;
pub mod snowflake;
pub mod suffixlist;
use opentelemetry::{

View file

@ -0,0 +1,69 @@
/*
* Copyright (c) 2023 Stalwart Labs Ltd.
*
* This file is part of Stalwart Mail Server.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
* in the LICENSE file at the top-level directory of this distribution.
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* You can be released from the requirements of the AGPLv3 license by
* purchasing a commercial license. Please contact licensing@stalw.art
* for more details.
*/
use std::{
sync::atomic::{AtomicU64, Ordering},
time::{Duration, SystemTime},
};
pub struct SnowflakeIdGenerator {
epoch: SystemTime,
node_id: u64,
sequence: AtomicU64,
}
const SEQUENCE_LEN: u64 = 12;
const NODE_ID_LEN: u64 = 9;
const SEQUENCE_MASK: u64 = (1 << SEQUENCE_LEN) - 1;
const NODE_ID_MASK: u64 = (1 << NODE_ID_LEN) - 1;
impl SnowflakeIdGenerator {
pub fn new() -> Self {
Self::with_node_id(rand::random::<u64>())
}
pub fn with_node_id(node_id: u64) -> Self {
Self {
epoch: SystemTime::UNIX_EPOCH + Duration::from_secs(1632280000), // 52 years after UNIX_EPOCH
node_id,
sequence: 0.into(),
}
}
pub fn generate(&self) -> Option<u64> {
let elapsed = self.epoch.elapsed().ok()?.as_millis() as u64;
let sequence = self.sequence.fetch_add(1, Ordering::Relaxed);
(elapsed << (SEQUENCE_LEN + NODE_ID_LEN)
| (self.node_id & NODE_ID_MASK) << SEQUENCE_LEN
| (sequence & SEQUENCE_MASK))
.into()
}
}
impl Default for SnowflakeIdGenerator {
fn default() -> Self {
Self::new()
}
}

View file

@ -25,9 +25,11 @@ use std::{fs, io};
use imap_proto::ResponseType;
use super::{resources_dir, AssertResult, ImapConnection, Type};
use crate::jmap::wait_for_index;
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) {
use super::{resources_dir, AssertResult, IMAPTest, ImapConnection, Type};
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection, handle: &IMAPTest) {
// Invalid APPEND commands
imap.send("APPEND \"All Mail\" {1+}\r\na").await;
imap.assert_read(Type::Tagged, ResponseType::No)
@ -80,6 +82,8 @@ pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) {
assert_eq!(code.next(), Some(expected_uid.to_string().as_str()));
expected_uid += 1;
}
wait_for_index(&handle.jmap).await;
}
pub async fn assert_append_message(

View file

@ -225,7 +225,7 @@ refresh-token-renew = "2s"
"#;
#[allow(dead_code)]
struct IMAPTest {
pub struct IMAPTest {
jmap: Arc<JMAP>,
imap: Arc<IMAP>,
temp_dir: TempDir,
@ -331,7 +331,7 @@ async fn init_imap_tests(delete_if_exists: bool) -> IMAPTest {
pub async fn imap_tests() {
/*tracing::subscriber::set_global_default(
tracing_subscriber::FmtSubscriber::builder()
.with_max_level(tracing::Level::TRACE)
.with_max_level(tracing::Level::DEBUG)
.finish(),
)
.unwrap();*/
@ -364,10 +364,10 @@ pub async fn imap_tests() {
}
mailbox::test(&mut imap, &mut imap_check).await;
append::test(&mut imap, &mut imap_check).await;
append::test(&mut imap, &mut imap_check, &handle).await;
search::test(&mut imap, &mut imap_check).await;
fetch::test(&mut imap, &mut imap_check).await;
store::test(&mut imap, &mut imap_check).await;
store::test(&mut imap, &mut imap_check, &handle).await;
copy_move::test(&mut imap, &mut imap_check).await;
thread::test(&mut imap, &mut imap_check).await;
idle::test(&mut imap, &mut imap_check).await;

View file

@ -23,9 +23,11 @@
use imap_proto::ResponseType;
use super::{AssertResult, ImapConnection, Type};
use crate::jmap::wait_for_index;
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) {
use super::{AssertResult, IMAPTest, ImapConnection, Type};
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection, handle: &IMAPTest) {
// Select INBOX
imap.send("SELECT INBOX").await;
imap.assert_read(Type::Tagged, ResponseType::Ok)
@ -73,6 +75,7 @@ pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) {
.assert_contains("UIDNEXT 11");
// Store using saved searches
wait_for_index(&handle.jmap).await;
imap.send("SEARCH RETURN (SAVE) FROM nathaniel").await;
imap.assert_read(Type::Tagged, ResponseType::Ok).await;
imap.send("UID STORE $ +FLAGS (\\Answered)").await;

View file

@ -45,7 +45,7 @@ use crate::{
directory::sql::{
add_to_group, create_test_group_with_email, create_test_user_with_email, remove_from_group,
},
jmap::{mailbox::destroy_all_mailboxes, test_account_login},
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, test_account_login},
};
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
@ -777,10 +777,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
admin_client.set_default_account_id(&id.to_string());
destroy_all_mailboxes(admin_client).await;
}
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
pub fn assert_forbidden<T: Debug>(result: Result<T, jmap_client::Error>) {

View file

@ -33,7 +33,7 @@ use jmap_proto::types::id::Id;
use crate::{
directory::sql::{create_test_user_with_email, link_test_address},
jmap::mailbox::destroy_all_mailboxes,
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes},
};
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
@ -202,8 +202,5 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
// Destroy test accounts
admin_client.set_default_account_id(&account_id);
destroy_all_mailboxes(admin_client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}

View file

@ -40,7 +40,10 @@ use reqwest::{header, redirect::Policy};
use serde::de::DeserializeOwned;
use store::ahash::AHashMap;
use crate::{directory::sql::create_test_user_with_email, jmap::mailbox::destroy_all_mailboxes};
use crate::{
directory::sql::create_test_user_with_email,
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes},
};
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
println!("Running OAuth tests...");
@ -307,10 +310,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
// Destroy test accounts
admin_client.set_default_account_id(john_id);
destroy_all_mailboxes(admin_client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
async fn post_bytes(url: &str, params: &AHashMap<String, String>) -> Bytes {

View file

@ -30,7 +30,7 @@ use serde_json::Value;
use crate::{
directory::sql::create_test_user_with_email,
jmap::{jmap_json_request, mailbox::destroy_all_mailboxes},
jmap::{assert_is_empty, jmap_json_request, mailbox::destroy_all_mailboxes},
};
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
@ -489,8 +489,5 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
// Remove test data
admin_client.set_default_account_id(account_id.to_string());
destroy_all_mailboxes(admin_client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}

View file

@ -34,7 +34,7 @@ use tokio::{
use crate::{
directory::sql::{create_test_user_with_email, link_test_address, remove_test_alias},
jmap::mailbox::destroy_all_mailboxes,
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes},
};
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
@ -248,10 +248,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
client.set_default_account_id(account_id);
destroy_all_mailboxes(client).await;
}
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
pub struct SmtpConnection {

View file

@ -34,6 +34,8 @@ use store::{
write::{log::ChangeLogBuilder, BatchBuilder},
};
use crate::jmap::assert_is_empty;
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
println!("Running Email Changes tests...");
@ -315,10 +317,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
assert_eq!(created, vec![2, 3, 11, 12]);
assert_eq!(changes.updated(), Vec::<String>::new());
assert_eq!(changes.destroyed(), Vec::<String>::new());
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
#[derive(Debug, Clone, Copy)]

View file

@ -27,7 +27,7 @@ use jmap::JMAP;
use jmap_client::{client::Client, mailbox::Role};
use jmap_proto::types::id::Id;
use crate::jmap::mailbox::destroy_all_mailboxes;
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
println!("Running Email Copy tests...");
@ -116,8 +116,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
destroy_all_mailboxes(client).await;
client.set_default_account_id(Id::new(2).to_string());
destroy_all_mailboxes(client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}

View file

@ -31,7 +31,7 @@ use jmap_client::{
use jmap_proto::types::id::Id;
use mail_parser::HeaderName;
use crate::jmap::{mailbox::destroy_all_mailboxes, replace_blob_ids};
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, replace_blob_ids};
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
println!("Running Email Get tests...");
@ -177,11 +177,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
}
destroy_all_mailboxes(client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
pub fn all_headers() -> Vec<email::Property> {

View file

@ -31,7 +31,9 @@ use jmap_client::{
};
use jmap_proto::types::id::Id;
use crate::jmap::{email_get::all_headers, mailbox::destroy_all_mailboxes, replace_blob_ids};
use crate::jmap::{
assert_is_empty, email_get::all_headers, mailbox::destroy_all_mailboxes, replace_blob_ids,
};
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
println!("Running Email Parse tests...");
@ -243,9 +245,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
}
destroy_all_mailboxes(client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}

View file

@ -24,7 +24,7 @@
use std::{collections::hash_map::Entry, sync::Arc, time::Instant};
use crate::{
jmap::mailbox::destroy_all_mailboxes,
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, wait_for_index},
store::{deflate_artwork_data, query::FIELDS},
};
use jmap::JMAP;
@ -94,6 +94,9 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client, insert: bool) {
"thread {} found",
MAX_THREADS
);
// Wait for indexing to complete
wait_for_index(&server).await;
}
println!("Running JMAP Mail query tests...");
@ -115,10 +118,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client, insert: bool) {
.unwrap();
destroy_all_mailboxes(client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
pub async fn query(client: &mut Client) {

View file

@ -37,6 +37,7 @@ use store::{
};
use crate::jmap::{
assert_is_empty,
email_changes::{LogAction, ParseState},
mailbox::destroy_all_mailboxes,
};
@ -287,10 +288,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
}
server.store.write(batch.build_batch()).await.unwrap();
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
#[derive(Debug, Clone)]

View file

@ -23,7 +23,7 @@
use std::{fs, path::PathBuf, sync::Arc};
use crate::jmap::mailbox::destroy_all_mailboxes;
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, wait_for_index};
use jmap::{mailbox::INBOX_ID, JMAP};
use jmap_client::{client::Client, core::query, email::query::Filter};
use jmap_proto::types::id::Id;
@ -64,6 +64,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
.take_id();
email_ids.insert(email_name, email_id);
}
wait_for_index(&server).await;
// Run tests
for (filter, email_name, snippet_subject, snippet_preview) in [
@ -179,8 +180,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
// Destroy test data
destroy_all_mailboxes(client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}

View file

@ -23,7 +23,7 @@
use std::{fs, path::PathBuf, sync::Arc};
use crate::jmap::mailbox::destroy_all_mailboxes;
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
use jmap::{mailbox::INBOX_ID, JMAP};
use jmap_client::{
client::Client,
@ -46,11 +46,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
update(client, &mailbox_id).await;
destroy_all_mailboxes(client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
async fn create(client: &mut Client, mailbox_id: &str) {

View file

@ -46,7 +46,7 @@ use tokio::{
use crate::{
directory::sql::create_test_user_with_email,
jmap::{email_set::assert_email_properties, mailbox::destroy_all_mailboxes},
jmap::{assert_is_empty, email_set::assert_email_properties, mailbox::destroy_all_mailboxes},
};
#[derive(Default, Debug, PartialEq, Eq)]
@ -471,10 +471,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
client.email_submission_destroy(&id).await.unwrap();
}
destroy_all_mailboxes(client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
pub fn spawn_mock_smtp_server() -> (mpsc::Receiver<MockMessage>, Arc<Mutex<MockSMTPSettings>>) {

View file

@ -25,7 +25,10 @@ use std::{sync::Arc, time::Duration};
use crate::{
directory::sql::create_test_user_with_email,
jmap::{delivery::SmtpConnection, mailbox::destroy_all_mailboxes, test_account_login},
jmap::{
assert_is_empty, delivery::SmtpConnection, mailbox::destroy_all_mailboxes,
test_account_login,
},
};
use futures::StreamExt;
use jmap::{mailbox::INBOX_ID, JMAP};
@ -130,10 +133,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
assert_ping(&mut event_rx).await;
destroy_all_mailboxes(admin_client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
async fn assert_state(

View file

@ -37,6 +37,8 @@ use jmap_proto::types::{id::Id, state::State};
use serde::{Deserialize, Serialize};
use store::ahash::AHashMap;
use crate::jmap::assert_is_empty;
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
println!("Running Mailbox tests...");
@ -606,10 +608,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
destroy_all_mailboxes(client).await;
client.set_default_account_id(Id::from(1u64));
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
async fn create_test_mailboxes(client: &mut Client) -> AHashMap<String, String> {

View file

@ -25,7 +25,11 @@ use std::{sync::Arc, time::Duration};
use base64::{engine::general_purpose, Engine};
use directory::config::ConfigDirectory;
use jmap::{api::JmapSessionManager, services::IPC_CHANNEL_BUFFER, JMAP};
use jmap::{
api::JmapSessionManager,
services::{housekeeper::Event, IPC_CHANNEL_BUFFER},
JMAP,
};
use jmap_client::client::{Client, Credentials};
use jmap_proto::types::id::Id;
use reqwest::header;
@ -222,17 +226,23 @@ refresh-token-renew = "2s"
#[tokio::test]
pub async fn jmap_tests() {
let coco = 1;
/*let level = "warn";
tracing::subscriber::set_global_default(
tracing_subscriber::FmtSubscriber::builder()
.with_max_level(tracing::Level::WARN)
.with_env_filter(
tracing_subscriber::EnvFilter::builder()
.parse(
format!("smtp={level},imap={level},jmap={level},store={level},utils={level},directory={level}"),
)
.unwrap(),
)
.finish(),
)
.unwrap();
.unwrap();*/
let delete = true;
let mut params = init_jmap_tests(delete).await;
/*email_query::test(params.server.clone(), &mut params.client, delete).await;
email_query::test(params.server.clone(), &mut params.client, delete).await;
email_get::test(params.server.clone(), &mut params.client).await;
email_set::test(params.server.clone(), &mut params.client).await;
email_parse::test(params.server.clone(), &mut params.client).await;
@ -254,7 +264,7 @@ pub async fn jmap_tests() {
email_submission::test(params.server.clone(), &mut params.client).await;
websocket::test(params.server.clone(), &mut params.client).await;
quota::test(params.server.clone(), &mut params.client).await;
crypto::test(params.server.clone(), &mut params.client).await;*/
crypto::test(params.server.clone(), &mut params.client).await;
blob::test(params.server.clone(), &mut params.client).await;
if delete {
@ -285,6 +295,33 @@ struct JMAPTest {
shutdown_tx: watch::Sender<bool>,
}
pub async fn wait_for_index(server: &JMAP) {
loop {
let (tx, rx) = tokio::sync::oneshot::channel();
server
.housekeeper_tx
.send(Event::IndexIsActive(tx))
.await
.unwrap();
if rx.await.unwrap() {
tokio::time::sleep(Duration::from_millis(100)).await;
} else {
break;
}
}
}
pub async fn assert_is_empty(server: Arc<JMAP>) {
// Wait for pending FTS index tasks
wait_for_index(&server).await;
// Assert is empty
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
}
async fn init_jmap_tests(delete_if_exists: bool) -> JMAPTest {
// Load and parse config
let temp_dir = TempDir::new("jmap_tests", delete_if_exists);

View file

@ -53,7 +53,7 @@ use utils::listener::SessionData;
use crate::{
add_test_certs,
directory::sql::create_test_user_with_email,
jmap::{mailbox::destroy_all_mailboxes, test_account_login},
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, test_account_login},
};
const SERVER: &str = "
@ -218,11 +218,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
expect_nothing(&mut event_rx).await;
destroy_all_mailboxes(admin_client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
#[derive(Clone)]

View file

@ -26,8 +26,8 @@ use std::sync::Arc;
use crate::{
directory::sql::{add_to_group, create_test_user_with_email, set_test_quota},
jmap::{
delivery::SmtpConnection, jmap_raw_request, mailbox::destroy_all_mailboxes,
test_account_login,
assert_is_empty, delivery::SmtpConnection, jmap_raw_request,
mailbox::destroy_all_mailboxes, test_account_login,
},
};
use jmap::{blob::upload::DISABLE_UPLOAD_QUOTA, mailbox::INBOX_ID, JMAP};
@ -320,10 +320,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
admin_client.set_default_account_id(account_id.to_string());
destroy_all_mailboxes(admin_client).await;
}
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
fn assert_over_quota<T: std::fmt::Debug>(result: Result<T, jmap_client::Error>) {

View file

@ -40,6 +40,7 @@ use std::{
use crate::{
directory::sql::create_test_user_with_email,
jmap::{
assert_is_empty,
delivery::SmtpConnection,
email_submission::{assert_message_delivery, spawn_mock_smtp_server, MockMessage},
mailbox::destroy_all_mailboxes,
@ -486,10 +487,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
client.sieve_script_destroy(&id).await.unwrap();
}
destroy_all_mailboxes(client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
fn get_script(name: &str) -> Vec<u8> {

View file

@ -34,6 +34,8 @@ use jmap_client::{
use jmap_proto::types::{collection::Collection, id::Id, property::Property};
use store::rand::{self, Rng};
use super::assert_is_empty;
const TEST_USER_ID: u32 = 1;
const NUM_PASSES: usize = 1;
@ -254,11 +256,7 @@ async fn email_tests(server: Arc<JMAP>, client: Arc<Client>) {
}
destroy_all_mailboxes(&client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server.clone()).await;
}
}
@ -331,10 +329,7 @@ async fn mailbox_tests(server: Arc<JMAP>, client: Arc<Client>) {
join_all(futures).await;
destroy_all_mailboxes(&client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
async fn create_mailbox(client: &Client, mailbox: &str) -> Vec<String> {

View file

@ -23,7 +23,7 @@
use std::sync::Arc;
use crate::jmap::mailbox::destroy_all_mailboxes;
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
use jmap::JMAP;
use jmap_client::{client::Client, mailbox::Role};
use jmap_proto::types::id::Id;
@ -66,8 +66,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
);
destroy_all_mailboxes(client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}

View file

@ -23,7 +23,7 @@
use std::sync::Arc;
use crate::jmap::mailbox::destroy_all_mailboxes;
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
use jmap::JMAP;
use jmap_client::{client::Client, email, mailbox::Role};
use jmap_proto::types::id::Id;
@ -203,10 +203,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
}
}
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
fn build_message(message: usize, in_reply_to: Option<usize>, thread_num: usize) -> String {

View file

@ -30,6 +30,7 @@ use std::{sync::Arc, time::Instant};
use crate::{
directory::sql::create_test_user_with_email,
jmap::{
assert_is_empty,
delivery::SmtpConnection,
email_submission::{
assert_message_delivery, expect_nothing, spawn_mock_smtp_server, MockMessage,
@ -173,8 +174,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
// Remove test data
client.vacation_response_destroy().await.unwrap();
destroy_all_mailboxes(client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}

View file

@ -40,7 +40,7 @@ use tokio::sync::mpsc;
use crate::{
directory::sql::create_test_user_with_email,
jmap::{mailbox::destroy_all_mailboxes, test_account_login},
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, test_account_login},
};
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
@ -125,11 +125,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
admin_client.set_default_account_id(account_id);
destroy_all_mailboxes(admin_client).await;
server
.store
.assert_is_empty(server.blob_store.clone())
.await;
assert_is_empty(server).await;
}
async fn expect_response(

View file

@ -35,37 +35,12 @@ pub async fn test(db: Store) {
test_1(db.clone()).await;
test_2(db.clone()).await;
test_3(db.clone()).await;
test_4(db).await;
test_3(db).await;
ID_ASSIGNMENT_EXPIRY.store(60 * 60, std::sync::atomic::Ordering::Relaxed);
}
async fn test_1(db: Store) {
// Test change id assignment
let mut handles = Vec::new();
let mut expected_ids = HashSet::new();
// Create 100 change ids concurrently
for id in 0..100 {
handles.push({
let db = db.clone();
tokio::spawn(async move { db.assign_change_id(0).await })
});
expected_ids.insert(id);
}
for handle in handles {
let assigned_id = handle.await.unwrap().unwrap();
assert!(
expected_ids.remove(&assigned_id),
"already assigned or invalid: {assigned_id} "
);
}
db.destroy().await;
}
async fn test_2(db: Store) {
// Test document id assignment
for wait_for_expiry in [true, false] {
let mut handles = Vec::new();
@ -101,7 +76,7 @@ async fn test_2(db: Store) {
db.destroy().await;
}
async fn test_3(db: Store) {
async fn test_2(db: Store) {
// Create document ids and try reassigning
let mut expected_ids = AHashSet::new();
let mut batch = BatchBuilder::new();
@ -132,7 +107,7 @@ async fn test_3(db: Store) {
db.destroy().await;
}
async fn test_4(db: Store) {
async fn test_3(db: Store) {
// Try reassigning deleted ids
let mut expected_ids = AHashSet::new();
let mut batch = BatchBuilder::new();

View file

@ -25,7 +25,7 @@ pub mod assign_id;
pub mod blob;
pub mod query;
use std::{io::Read, sync::Arc};
use std::io::Read;
use ::store::Store;
@ -56,8 +56,8 @@ pub async fn store_tests() {
if insert {
db.destroy().await;
}
assign_id::test(db.clone()).await;
query::test(db, insert).await;
query::test(db.clone(), insert).await;
assign_id::test(db).await;
temp_dir.delete();
}

View file

@ -22,13 +22,20 @@
*/
use std::{
fmt::Display,
sync::{Arc, Mutex},
time::Instant,
};
use jmap_proto::types::keyword::Keyword;
use nlp::language::Language;
use store::{ahash::AHashMap, query::sort::Pagination, write::ValueClass};
use store::{
ahash::AHashMap,
fts::{index::FtsDocument, Field, FtsFilter},
query::sort::Pagination,
write::ValueClass,
FtsStore,
};
use store::{
query::{Comparator, Filter},
@ -93,9 +100,34 @@ const FIELDS_OPTIONS: [FieldType; 20] = [
FieldType::Text, // "url",
];
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct FieldId(u8);
impl From<FieldId> for u8 {
fn from(field_id: FieldId) -> Self {
field_id.0
}
}
impl Display for FieldId {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{} ({})", FIELDS[self.0 as usize], self.0)
}
}
impl FieldId {
pub fn new(field_id: u8) -> Field<FieldId> {
Field::Header(Self(field_id))
}
pub fn inner(&self) -> u8 {
self.0
}
}
#[allow(clippy::mutex_atomic)]
pub async fn test(db: Store, do_insert: bool) {
println!("Running Store query tests...");
let fts_store = FtsStore::from(db.clone());
let pool = rayon::ThreadPoolBuilder::new()
.num_threads(8)
@ -116,7 +148,10 @@ pub async fn test(db: Store, do_insert: bool) {
let documents = documents.clone();
s.spawn_fifo(move |_| {
/*let mut fts_builder = FtsIndexBuilder::with_default_language(Language::English);
let mut fts_builder = FtsDocument::with_default_language(Language::English)
.with_account_id(0)
.with_collection(COLLECTION_ID)
.with_document_id(document_id as u32);
let mut builder = BatchBuilder::new();
builder
.with_account_id(0)
@ -137,7 +172,7 @@ pub async fn test(db: Store, do_insert: bool) {
FieldType::FullText => {
if !field.is_empty() {
fts_builder.index(
field_id,
FieldId::new(field_id),
field.to_lowercase(),
Language::English,
);
@ -165,8 +200,10 @@ pub async fn test(db: Store, do_insert: bool) {
}
}
builder.custom(fts_builder);
documents.lock().unwrap().push(builder.build());*/
documents
.lock()
.unwrap()
.push((builder.build(), fts_builder));
});
}
});
@ -180,22 +217,31 @@ pub async fn test(db: Store, do_insert: bool) {
let now = Instant::now();
let batches = documents.lock().unwrap().drain(..).collect::<Vec<_>>();
let mut chunk = Vec::new();
let mut fts_chunk = Vec::new();
for batch in batches {
for (batch, fts_batch) in batches {
let chunk_instance = Instant::now();
chunk.push({
let db = db.clone();
tokio::spawn(async move { db.write(batch).await })
});
fts_chunk.push({
let fts_store = fts_store.clone();
tokio::spawn(async move { fts_store.index(fts_batch).await })
});
if chunk.len() == 1000 {
for handle in chunk {
handle.await.unwrap().unwrap();
}
for handle in fts_chunk {
handle.await.unwrap().unwrap();
}
println!(
"Chunk insert took {} ms.",
"Store insert took {} ms.",
chunk_instance.elapsed().as_millis()
);
chunk = Vec::new();
fts_chunk = Vec::new();
}
}
@ -209,72 +255,127 @@ pub async fn test(db: Store, do_insert: bool) {
}
println!("Running filter tests...");
test_filter(db.clone()).await;
test_filter(db.clone(), fts_store).await;
println!("Running sort tests...");
test_sort(db).await;
}
pub async fn test_filter(db: Store) {
/*
pub async fn test_filter(db: Store, fts: FtsStore) {
let mut fields = AHashMap::default();
let mut fields_u8 = AHashMap::default();
for (field_num, field) in FIELDS.iter().enumerate() {
fields.insert(field.to_string(), field_num as u8);
fields.insert(field.to_string(), FieldId::new(field_num as u8));
fields_u8.insert(field.to_string(), field_num as u8);
}
let tests = [
(
vec![
Filter::has_english_text(fields["title"], "water"),
Filter::eq(fields["year"], 1979u32),
Filter::is_in_set(
fts.query(
0,
COLLECTION_ID,
vec![FtsFilter::has_english_text(
fields["title"].clone(),
"water",
)],
)
.await
.unwrap(),
),
Filter::eq(fields_u8["year"], 1979u32),
],
vec!["p11293"],
),
(
vec![
Filter::has_english_text(fields["medium"], "gelatin"),
Filter::gt(fields["year"], 2000u32),
Filter::lt(fields["width"], 180u32),
Filter::gt(fields["width"], 0u32),
Filter::is_in_set(
fts.query(
0,
COLLECTION_ID,
vec![FtsFilter::has_english_text(
fields["medium"].clone(),
"gelatin",
)],
)
.await
.unwrap(),
),
Filter::gt(fields_u8["year"], 2000u32),
Filter::lt(fields_u8["width"], 180u32),
Filter::gt(fields_u8["width"], 0u32),
],
vec!["p79426", "p79427", "p79428", "p79429", "p79430"],
),
(
vec![Filter::has_english_text(fields["title"], "'rustic bridge'")],
vec![Filter::is_in_set(
fts.query(
0,
COLLECTION_ID,
vec![FtsFilter::has_english_text(
fields["title"].clone(),
"'rustic bridge'",
)],
)
.await
.unwrap(),
)],
vec!["d05503"],
),
(
vec![Filter::is_in_set(
fts.query(
0,
COLLECTION_ID,
vec![
Filter::has_english_text(fields["title"], "'rustic'"),
Filter::has_english_text(fields["title"], "study"),
FtsFilter::has_english_text(fields["title"].clone(), "'rustic'"),
FtsFilter::has_english_text(fields["title"].clone(), "study"),
],
)
.await
.unwrap(),
)],
vec!["d00399", "d05352"],
),
(
vec![
Filter::has_text(fields["artist"], "mauro kunst", Language::None),
Filter::is_in_bitmap(fields["artistRole"], Keyword::Other("artist".to_string())),
Filter::has_text(fields_u8["artist"], "mauro kunst"),
Filter::is_in_bitmap(
fields_u8["artistRole"],
Keyword::Other("artist".to_string()),
),
Filter::Or,
Filter::eq(fields["year"], 1969u32),
Filter::eq(fields["year"], 1971u32),
Filter::eq(fields_u8["year"], 1969u32),
Filter::eq(fields_u8["year"], 1971u32),
Filter::End,
],
vec!["p01764", "t05843"],
),
(
vec![
Filter::Not,
Filter::has_english_text(fields["medium"], "oil"),
Filter::End,
Filter::has_english_text(fields["creditLine"], "bequeath"),
Filter::is_in_set(
fts.query(
0,
COLLECTION_ID,
vec![
FtsFilter::Not,
FtsFilter::has_english_text(fields["medium"].clone(), "oil"),
FtsFilter::End,
FtsFilter::has_english_text(fields["creditLine"].clone(), "bequeath"),
],
)
.await
.unwrap(),
),
Filter::Or,
Filter::And,
Filter::ge(fields["year"], 1900u32),
Filter::lt(fields["year"], 1910u32),
Filter::ge(fields_u8["year"], 1900u32),
Filter::lt(fields_u8["year"], 1910u32),
Filter::End,
Filter::And,
Filter::ge(fields["year"], 2000u32),
Filter::lt(fields["year"], 2010u32),
Filter::ge(fields_u8["year"], 2000u32),
Filter::lt(fields_u8["year"], 2010u32),
Filter::End,
Filter::End,
],
@ -287,35 +388,59 @@ pub async fn test_filter(db: Store) {
(
vec![
Filter::And,
Filter::has_text(fields["artist"], "warhol", Language::None),
Filter::has_text(fields_u8["artist"], "warhol"),
Filter::Not,
Filter::has_english_text(fields["title"], "'campbell'"),
Filter::is_in_set(
fts.query(
0,
COLLECTION_ID,
vec![FtsFilter::has_english_text(
fields["title"].clone(),
"'campbell'",
)],
)
.await
.unwrap(),
),
Filter::End,
Filter::Not,
Filter::Or,
Filter::gt(fields["year"], 1980u32),
Filter::gt(fields_u8["year"], 1980u32),
Filter::And,
Filter::gt(fields["width"], 500u32),
Filter::gt(fields["height"], 500u32),
Filter::gt(fields_u8["width"], 500u32),
Filter::gt(fields_u8["height"], 500u32),
Filter::End,
Filter::End,
Filter::End,
Filter::eq(fields["acquisitionYear"], 2008u32),
Filter::eq(fields_u8["acquisitionYear"], 2008u32),
Filter::End,
],
vec!["ar00039", "t12600"],
),
(
vec![
Filter::has_english_text(fields["title"], "study"),
Filter::has_english_text(fields["medium"], "paper"),
Filter::has_english_text(fields["creditLine"], "'purchased'"),
Filter::Not,
Filter::has_english_text(fields["title"], "'anatomical'"),
Filter::has_english_text(fields["title"], "'for'"),
Filter::End,
Filter::gt(fields["year"], 1900u32),
Filter::gt(fields["acquisitionYear"], 2000u32),
Filter::is_in_set(
fts.query(
0,
COLLECTION_ID,
vec![
FtsFilter::has_english_text(fields["title"].clone(), "study"),
FtsFilter::has_english_text(fields["medium"].clone(), "paper"),
FtsFilter::has_english_text(
fields["creditLine"].clone(),
"'purchased'",
),
FtsFilter::Not,
FtsFilter::has_english_text(fields["title"].clone(), "'anatomical'"),
FtsFilter::has_english_text(fields["title"].clone(), "'for'"),
FtsFilter::End,
],
)
.await
.unwrap(),
),
Filter::gt(fields_u8["year"], 1900u32),
Filter::gt(fields_u8["acquisitionYear"], 2000u32),
],
vec![
"p80042", "p80043", "p80044", "p80045", "p80203", "t11937", "t12172",
@ -329,7 +454,7 @@ pub async fn test_filter(db: Store) {
let sorted_docset = db
.sort(
docset,
vec![Comparator::ascending(fields["accession_number"])],
vec![Comparator::ascending(fields_u8["accession_number"])],
Pagination::new(0, 0, None, 0),
)
.await
@ -344,8 +469,7 @@ pub async fn test_filter(db: Store) {
account_id: 0,
collection: COLLECTION_ID,
document_id: document_id as u32,
family: 0,
field: fields["accession_number"],
class: ValueClass::Property(fields_u8["accession_number"])
})
.collect()
)
@ -357,8 +481,6 @@ pub async fn test_filter(db: Store) {
expected_results
);
}
*/
}
pub async fn test_sort(db: Store) {