mirror of
https://github.com/stalwartlabs/mail-server.git
synced 2024-11-28 09:07:32 +00:00
FTS storage implementation and background indexing
This commit is contained in:
parent
a3e6d152c9
commit
bcc05340b2
88 changed files with 3105 additions and 2214 deletions
3
Cargo.lock
generated
3
Cargo.lock
generated
|
@ -2306,6 +2306,7 @@ dependencies = [
|
||||||
"chrono",
|
"chrono",
|
||||||
"jmap_proto",
|
"jmap_proto",
|
||||||
"mail-parser",
|
"mail-parser",
|
||||||
|
"store",
|
||||||
"tokio",
|
"tokio",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -5131,6 +5132,7 @@ dependencies = [
|
||||||
"futures",
|
"futures",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"lru-cache",
|
"lru-cache",
|
||||||
|
"lz4_flex",
|
||||||
"nlp",
|
"nlp",
|
||||||
"num_cpus",
|
"num_cpus",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
|
@ -5923,6 +5925,7 @@ dependencies = [
|
||||||
"opentelemetry-otlp",
|
"opentelemetry-otlp",
|
||||||
"opentelemetry-semantic-conventions",
|
"opentelemetry-semantic-conventions",
|
||||||
"privdrop",
|
"privdrop",
|
||||||
|
"rand 0.8.5",
|
||||||
"rustls 0.21.7",
|
"rustls 0.21.7",
|
||||||
"rustls-pemfile",
|
"rustls-pemfile",
|
||||||
"serde",
|
"serde",
|
||||||
|
|
|
@ -6,6 +6,7 @@ resolver = "2"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
jmap_proto = { path = "../jmap-proto" }
|
jmap_proto = { path = "../jmap-proto" }
|
||||||
|
store = { path = "../store" }
|
||||||
mail-parser = { git = "https://github.com/stalwartlabs/mail-parser", features = ["full_encoding", "serde_support", "ludicrous_mode"] }
|
mail-parser = { git = "https://github.com/stalwartlabs/mail-parser", features = ["full_encoding", "serde_support", "ludicrous_mode"] }
|
||||||
ahash = { version = "0.8" }
|
ahash = { version = "0.8" }
|
||||||
chrono = { version = "0.4"}
|
chrono = { version = "0.4"}
|
||||||
|
|
|
@ -21,6 +21,8 @@
|
||||||
* for more details.
|
* for more details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
use store::fts::{FilterItem, FilterType};
|
||||||
|
|
||||||
use super::{quoted_string, serialize_sequence, Flag, Sequence};
|
use super::{quoted_string, serialize_sequence, Flag, Sequence};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
@ -129,6 +131,38 @@ pub enum Filter {
|
||||||
ThreadId(String),
|
ThreadId(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl FilterItem for Filter {
|
||||||
|
fn filter_type(&self) -> FilterType {
|
||||||
|
match self {
|
||||||
|
Filter::From(_)
|
||||||
|
| Filter::To(_)
|
||||||
|
| Filter::Cc(_)
|
||||||
|
| Filter::Bcc(_)
|
||||||
|
| Filter::Subject(_)
|
||||||
|
| Filter::Body(_)
|
||||||
|
| Filter::Text(_)
|
||||||
|
| Filter::Header(_, _) => FilterType::Fts,
|
||||||
|
Filter::And => FilterType::And,
|
||||||
|
Filter::Or => FilterType::Or,
|
||||||
|
Filter::Not => FilterType::Not,
|
||||||
|
Filter::End => FilterType::End,
|
||||||
|
_ => FilterType::Store,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<FilterType> for Filter {
|
||||||
|
fn from(value: FilterType) -> Self {
|
||||||
|
match value {
|
||||||
|
FilterType::And => Filter::And,
|
||||||
|
FilterType::Or => Filter::Or,
|
||||||
|
FilterType::Not => Filter::Not,
|
||||||
|
FilterType::End => Filter::End,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum ModSeqEntry {
|
pub enum ModSeqEntry {
|
||||||
Shared(Flag),
|
Shared(Flag),
|
||||||
|
|
|
@ -36,6 +36,7 @@ use jmap_proto::types::{collection::Collection, id::Id, keyword::Keyword, proper
|
||||||
use mail_parser::HeaderName;
|
use mail_parser::HeaderName;
|
||||||
use nlp::language::Language;
|
use nlp::language::Language;
|
||||||
use store::{
|
use store::{
|
||||||
|
fts::{Field, FilterGroup, FtsFilter, IntoFilterGroup},
|
||||||
query::{self, log::Query, sort::Pagination, ResultSet},
|
query::{self, log::Query, sort::Pagination, ResultSet},
|
||||||
roaring::RoaringBitmap,
|
roaring::RoaringBitmap,
|
||||||
write::now,
|
write::now,
|
||||||
|
@ -275,371 +276,396 @@ impl SessionData {
|
||||||
|
|
||||||
// Convert query
|
// Convert query
|
||||||
let mut include_highest_modseq = false;
|
let mut include_highest_modseq = false;
|
||||||
for filter in imap_filter {
|
for filter_group in imap_filter.into_filter_group() {
|
||||||
match filter {
|
match filter_group {
|
||||||
search::Filter::Sequence(sequence, uid_filter) => {
|
FilterGroup::Fts(conds) => {
|
||||||
let mut set = RoaringBitmap::new();
|
let mut fts_filters = Vec::with_capacity(filters.len());
|
||||||
if let (Sequence::SavedSearch, Some(prev_saved_search)) =
|
for cond in conds {
|
||||||
(&sequence, &prev_saved_search)
|
match cond {
|
||||||
{
|
search::Filter::Bcc(text) => {
|
||||||
if let Some(prev_saved_search) = prev_saved_search {
|
fts_filters.push(FtsFilter::has_text(
|
||||||
let state = mailbox.state.lock();
|
Field::Header(HeaderName::Bcc),
|
||||||
for imap_id in prev_saved_search.iter() {
|
text,
|
||||||
if let Some(id) = state.uid_to_id.get(&imap_id.uid) {
|
Language::None,
|
||||||
set.insert(*id);
|
));
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
search::Filter::Body(text) => {
|
||||||
return Err(StatusResponse::no("No saved search found."));
|
fts_filters.push(FtsFilter::has_text_detect(
|
||||||
}
|
Field::Body,
|
||||||
} else {
|
text,
|
||||||
for id in mailbox
|
self.jmap.config.default_language,
|
||||||
.sequence_to_ids(&sequence, is_uid || uid_filter)
|
));
|
||||||
.await?
|
}
|
||||||
.keys()
|
search::Filter::Cc(text) => {
|
||||||
{
|
fts_filters.push(FtsFilter::has_text(
|
||||||
set.insert(*id);
|
Field::Header(HeaderName::Cc),
|
||||||
}
|
text,
|
||||||
}
|
Language::None,
|
||||||
filters.push(query::Filter::is_in_set(set));
|
));
|
||||||
}
|
}
|
||||||
search::Filter::All => {
|
search::Filter::From(text) => {
|
||||||
filters.push(query::Filter::is_in_set(message_ids.clone()));
|
fts_filters.push(FtsFilter::has_text(
|
||||||
}
|
Field::Header(HeaderName::From),
|
||||||
search::Filter::Answered => {
|
text,
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
Language::None,
|
||||||
Property::Keywords,
|
));
|
||||||
Keyword::Answered,
|
}
|
||||||
));
|
search::Filter::Header(header, value) => {
|
||||||
}
|
match HeaderName::parse(header) {
|
||||||
/*search::Filter::Bcc(text) => {
|
Some(HeaderName::Other(header_name)) => {
|
||||||
filters.push(query::Filter::has_text(Property::Bcc, text, Language::None));
|
return Err(StatusResponse::no(format!(
|
||||||
}
|
"Querying header '{header_name}' is not supported.",
|
||||||
search::Filter::Before(date) => {
|
)));
|
||||||
filters.push(query::Filter::lt(Property::ReceivedAt, date as u64));
|
|
||||||
}
|
|
||||||
search::Filter::Body(text) => {
|
|
||||||
filters.push(query::Filter::has_text_detect(
|
|
||||||
Property::TextBody,
|
|
||||||
text,
|
|
||||||
self.jmap.config.default_language,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
search::Filter::Cc(text) => {
|
|
||||||
filters.push(query::Filter::has_text(Property::Cc, text, Language::None));
|
|
||||||
}
|
|
||||||
search::Filter::Deleted => {
|
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
|
||||||
Property::Keywords,
|
|
||||||
Keyword::Deleted,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
search::Filter::Draft => {
|
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
|
||||||
Property::Keywords,
|
|
||||||
Keyword::Draft,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
search::Filter::Flagged => {
|
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
|
||||||
Property::Keywords,
|
|
||||||
Keyword::Flagged,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
search::Filter::From(text) => {
|
|
||||||
filters.push(query::Filter::has_text(
|
|
||||||
Property::From,
|
|
||||||
text,
|
|
||||||
Language::None,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
search::Filter::Header(header, value) => match HeaderName::parse(&header) {
|
|
||||||
Some(HeaderName::Other(_)) | None => {
|
|
||||||
return Err(StatusResponse::no(format!(
|
|
||||||
"Querying non-RFC header '{header}' is not allowed.",
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
Some(header_name) => {
|
|
||||||
let is_id = matches!(
|
|
||||||
header_name,
|
|
||||||
HeaderName::MessageId
|
|
||||||
| HeaderName::InReplyTo
|
|
||||||
| HeaderName::References
|
|
||||||
| HeaderName::ResentMessageId
|
|
||||||
);
|
|
||||||
let tokens = if !value.is_empty() {
|
|
||||||
let header_num = header_name.id().to_string();
|
|
||||||
value
|
|
||||||
.split_ascii_whitespace()
|
|
||||||
.filter_map(|token| {
|
|
||||||
if token.len() < MAX_TOKEN_LENGTH {
|
|
||||||
if is_id {
|
|
||||||
format!("{header_num}{token}")
|
|
||||||
} else {
|
|
||||||
format!("{header_num}{}", token.to_lowercase())
|
|
||||||
}
|
|
||||||
.into()
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
}
|
||||||
})
|
Some(header_name) => {
|
||||||
.collect::<Vec<_>>()
|
if !value.is_empty() {
|
||||||
} else {
|
if matches!(
|
||||||
vec![]
|
header_name,
|
||||||
};
|
HeaderName::MessageId
|
||||||
match tokens.len() {
|
| HeaderName::InReplyTo
|
||||||
0 => {
|
| HeaderName::References
|
||||||
filters.push(query::Filter::has_raw_text(
|
| HeaderName::ResentMessageId
|
||||||
Property::Headers,
|
) {
|
||||||
header_name.id().to_string(),
|
fts_filters.push(FtsFilter::has_keyword(
|
||||||
));
|
Field::Header(header_name),
|
||||||
}
|
value,
|
||||||
1 => {
|
));
|
||||||
filters.push(query::Filter::has_raw_text(
|
} else {
|
||||||
Property::Headers,
|
fts_filters.push(FtsFilter::has_text(
|
||||||
tokens.into_iter().next().unwrap(),
|
Field::Header(header_name),
|
||||||
));
|
value,
|
||||||
}
|
Language::None,
|
||||||
_ => {
|
));
|
||||||
filters.push(query::Filter::And);
|
}
|
||||||
for token in tokens {
|
} else {
|
||||||
filters.push(query::Filter::has_raw_text(
|
fts_filters.push(FtsFilter::has_keyword(
|
||||||
Property::Headers,
|
Field::Keyword,
|
||||||
token,
|
header_name.as_str().to_lowercase(),
|
||||||
));
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None => (),
|
||||||
}
|
}
|
||||||
filters.push(query::Filter::End);
|
}
|
||||||
|
search::Filter::Subject(text) => {
|
||||||
|
fts_filters.push(FtsFilter::has_text_detect(
|
||||||
|
Field::Header(HeaderName::Subject),
|
||||||
|
text,
|
||||||
|
self.jmap.config.default_language,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
search::Filter::Text(text) => {
|
||||||
|
fts_filters.push(FtsFilter::Or);
|
||||||
|
fts_filters.push(FtsFilter::has_text(
|
||||||
|
Field::Header(HeaderName::From),
|
||||||
|
&text,
|
||||||
|
Language::None,
|
||||||
|
));
|
||||||
|
fts_filters.push(FtsFilter::has_text(
|
||||||
|
Field::Header(HeaderName::To),
|
||||||
|
&text,
|
||||||
|
Language::None,
|
||||||
|
));
|
||||||
|
fts_filters.push(FtsFilter::has_text(
|
||||||
|
Field::Header(HeaderName::Cc),
|
||||||
|
&text,
|
||||||
|
Language::None,
|
||||||
|
));
|
||||||
|
fts_filters.push(FtsFilter::has_text(
|
||||||
|
Field::Header(HeaderName::Bcc),
|
||||||
|
&text,
|
||||||
|
Language::None,
|
||||||
|
));
|
||||||
|
fts_filters.push(FtsFilter::has_text_detect(
|
||||||
|
Field::Header(HeaderName::Subject),
|
||||||
|
&text,
|
||||||
|
self.jmap.config.default_language,
|
||||||
|
));
|
||||||
|
fts_filters.push(FtsFilter::has_text_detect(
|
||||||
|
Field::Body,
|
||||||
|
&text,
|
||||||
|
self.jmap.config.default_language,
|
||||||
|
));
|
||||||
|
fts_filters.push(FtsFilter::has_text_detect(
|
||||||
|
Field::Attachment,
|
||||||
|
text,
|
||||||
|
self.jmap.config.default_language,
|
||||||
|
));
|
||||||
|
fts_filters.push(FtsFilter::End);
|
||||||
|
}
|
||||||
|
search::Filter::To(text) => {
|
||||||
|
fts_filters.push(FtsFilter::has_text(
|
||||||
|
Field::Header(HeaderName::To),
|
||||||
|
text,
|
||||||
|
Language::None,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
search::Filter::And => {
|
||||||
|
fts_filters.push(FtsFilter::And);
|
||||||
|
}
|
||||||
|
search::Filter::Or => {
|
||||||
|
fts_filters.push(FtsFilter::Or);
|
||||||
|
}
|
||||||
|
search::Filter::Not => {
|
||||||
|
fts_filters.push(FtsFilter::Not);
|
||||||
|
}
|
||||||
|
search::Filter::End => {
|
||||||
|
fts_filters.push(FtsFilter::End);
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
filters.push(query::Filter::is_in_set(
|
||||||
|
self.jmap
|
||||||
|
.fts_filter(mailbox.id.account_id, Collection::Email, fts_filters)
|
||||||
|
.await?,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
FilterGroup::Store(cond) => match cond {
|
||||||
|
search::Filter::Sequence(sequence, uid_filter) => {
|
||||||
|
let mut set = RoaringBitmap::new();
|
||||||
|
if let (Sequence::SavedSearch, Some(prev_saved_search)) =
|
||||||
|
(&sequence, &prev_saved_search)
|
||||||
|
{
|
||||||
|
if let Some(prev_saved_search) = prev_saved_search {
|
||||||
|
let state = mailbox.state.lock();
|
||||||
|
for imap_id in prev_saved_search.iter() {
|
||||||
|
if let Some(id) = state.uid_to_id.get(&imap_id.uid) {
|
||||||
|
set.insert(*id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(StatusResponse::no("No saved search found."));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for id in mailbox
|
||||||
|
.sequence_to_ids(&sequence, is_uid || uid_filter)
|
||||||
|
.await?
|
||||||
|
.keys()
|
||||||
|
{
|
||||||
|
set.insert(*id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
filters.push(query::Filter::is_in_set(set));
|
||||||
}
|
}
|
||||||
},
|
search::Filter::All => {
|
||||||
search::Filter::Keyword(keyword) => {
|
filters.push(query::Filter::is_in_set(message_ids.clone()));
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
}
|
||||||
Property::Keywords,
|
search::Filter::Answered => {
|
||||||
Keyword::from(keyword),
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
));
|
Property::Keywords,
|
||||||
}
|
Keyword::Answered,
|
||||||
search::Filter::Larger(size) => {
|
));
|
||||||
filters.push(query::Filter::gt(Property::Size, size));
|
}
|
||||||
}
|
search::Filter::Before(date) => {
|
||||||
search::Filter::On(date) => {
|
filters.push(query::Filter::lt(Property::ReceivedAt, date as u64));
|
||||||
filters.push(query::Filter::And);
|
}
|
||||||
filters.push(query::Filter::ge(Property::ReceivedAt, date as u64));
|
search::Filter::Deleted => {
|
||||||
filters.push(query::Filter::lt(
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
Property::ReceivedAt,
|
Property::Keywords,
|
||||||
(date + 86400) as u64,
|
Keyword::Deleted,
|
||||||
));
|
));
|
||||||
filters.push(query::Filter::End);
|
}
|
||||||
}
|
search::Filter::Draft => {
|
||||||
search::Filter::Seen => {
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
Property::Keywords,
|
||||||
Property::Keywords,
|
Keyword::Draft,
|
||||||
Keyword::Seen,
|
));
|
||||||
));
|
}
|
||||||
}
|
search::Filter::Flagged => {
|
||||||
search::Filter::SentBefore(date) => {
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
filters.push(query::Filter::lt(Property::SentAt, date as u64));
|
Property::Keywords,
|
||||||
}
|
Keyword::Flagged,
|
||||||
search::Filter::SentOn(date) => {
|
));
|
||||||
filters.push(query::Filter::And);
|
}
|
||||||
filters.push(query::Filter::ge(Property::SentAt, date as u64));
|
search::Filter::Keyword(keyword) => {
|
||||||
filters.push(query::Filter::lt(Property::SentAt, (date + 86400) as u64));
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
filters.push(query::Filter::End);
|
Property::Keywords,
|
||||||
}
|
Keyword::from(keyword),
|
||||||
search::Filter::SentSince(date) => {
|
));
|
||||||
filters.push(query::Filter::ge(Property::SentAt, date as u64));
|
}
|
||||||
}
|
search::Filter::Larger(size) => {
|
||||||
search::Filter::Since(date) => {
|
filters.push(query::Filter::gt(Property::Size, size));
|
||||||
filters.push(query::Filter::ge(Property::ReceivedAt, date as u64));
|
}
|
||||||
}
|
search::Filter::On(date) => {
|
||||||
search::Filter::Smaller(size) => {
|
filters.push(query::Filter::And);
|
||||||
filters.push(query::Filter::lt(Property::Size, size));
|
filters.push(query::Filter::ge(Property::ReceivedAt, date as u64));
|
||||||
}
|
filters.push(query::Filter::lt(
|
||||||
search::Filter::Subject(text) => {
|
Property::ReceivedAt,
|
||||||
filters.push(query::Filter::has_text_detect(
|
(date + 86400) as u64,
|
||||||
Property::Subject,
|
));
|
||||||
text,
|
filters.push(query::Filter::End);
|
||||||
self.jmap.config.default_language,
|
}
|
||||||
));
|
search::Filter::Seen => {
|
||||||
}
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
search::Filter::Text(text) => {
|
Property::Keywords,
|
||||||
filters.push(query::Filter::Or);
|
Keyword::Seen,
|
||||||
filters.push(query::Filter::has_text(
|
));
|
||||||
Property::From,
|
}
|
||||||
&text,
|
search::Filter::SentBefore(date) => {
|
||||||
Language::None,
|
filters.push(query::Filter::lt(Property::SentAt, date as u64));
|
||||||
));
|
}
|
||||||
filters.push(query::Filter::has_text(Property::To, &text, Language::None));
|
search::Filter::SentOn(date) => {
|
||||||
filters.push(query::Filter::has_text(Property::Cc, &text, Language::None));
|
filters.push(query::Filter::And);
|
||||||
filters.push(query::Filter::has_text(
|
filters.push(query::Filter::ge(Property::SentAt, date as u64));
|
||||||
Property::Bcc,
|
filters.push(query::Filter::lt(Property::SentAt, (date + 86400) as u64));
|
||||||
&text,
|
filters.push(query::Filter::End);
|
||||||
Language::None,
|
}
|
||||||
));
|
search::Filter::SentSince(date) => {
|
||||||
filters.push(query::Filter::has_text_detect(
|
filters.push(query::Filter::ge(Property::SentAt, date as u64));
|
||||||
Property::Subject,
|
}
|
||||||
&text,
|
search::Filter::Since(date) => {
|
||||||
self.jmap.config.default_language,
|
filters.push(query::Filter::ge(Property::ReceivedAt, date as u64));
|
||||||
));
|
}
|
||||||
filters.push(query::Filter::has_text_detect(
|
search::Filter::Smaller(size) => {
|
||||||
Property::TextBody,
|
filters.push(query::Filter::lt(Property::Size, size));
|
||||||
&text,
|
}
|
||||||
self.jmap.config.default_language,
|
search::Filter::Unanswered => {
|
||||||
));
|
filters.push(query::Filter::Not);
|
||||||
filters.push(query::Filter::has_text_detect(
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
Property::Attachments,
|
Property::Keywords,
|
||||||
text,
|
Keyword::Answered,
|
||||||
self.jmap.config.default_language,
|
));
|
||||||
));
|
filters.push(query::Filter::End);
|
||||||
filters.push(query::Filter::End);
|
}
|
||||||
}
|
search::Filter::Undeleted => {
|
||||||
search::Filter::To(text) => {
|
filters.push(query::Filter::Not);
|
||||||
filters.push(query::Filter::has_text(Property::To, text, Language::None));
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
}*/
|
Property::Keywords,
|
||||||
search::Filter::Unanswered => {
|
Keyword::Deleted,
|
||||||
filters.push(query::Filter::Not);
|
));
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
filters.push(query::Filter::End);
|
||||||
Property::Keywords,
|
}
|
||||||
Keyword::Answered,
|
search::Filter::Undraft => {
|
||||||
));
|
filters.push(query::Filter::Not);
|
||||||
filters.push(query::Filter::End);
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
}
|
Property::Keywords,
|
||||||
search::Filter::Undeleted => {
|
Keyword::Draft,
|
||||||
filters.push(query::Filter::Not);
|
));
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
filters.push(query::Filter::End);
|
||||||
Property::Keywords,
|
}
|
||||||
Keyword::Deleted,
|
search::Filter::Unflagged => {
|
||||||
));
|
filters.push(query::Filter::Not);
|
||||||
filters.push(query::Filter::End);
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
}
|
Property::Keywords,
|
||||||
search::Filter::Undraft => {
|
Keyword::Flagged,
|
||||||
filters.push(query::Filter::Not);
|
));
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
filters.push(query::Filter::End);
|
||||||
Property::Keywords,
|
}
|
||||||
Keyword::Draft,
|
search::Filter::Unkeyword(keyword) => {
|
||||||
));
|
filters.push(query::Filter::Not);
|
||||||
filters.push(query::Filter::End);
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
}
|
Property::Keywords,
|
||||||
search::Filter::Unflagged => {
|
Keyword::from(keyword),
|
||||||
filters.push(query::Filter::Not);
|
));
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
filters.push(query::Filter::End);
|
||||||
Property::Keywords,
|
}
|
||||||
Keyword::Flagged,
|
search::Filter::Unseen => {
|
||||||
));
|
filters.push(query::Filter::Not);
|
||||||
filters.push(query::Filter::End);
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
}
|
Property::Keywords,
|
||||||
search::Filter::Unkeyword(keyword) => {
|
Keyword::Seen,
|
||||||
filters.push(query::Filter::Not);
|
));
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
filters.push(query::Filter::End);
|
||||||
Property::Keywords,
|
}
|
||||||
Keyword::from(keyword),
|
search::Filter::And => {
|
||||||
));
|
filters.push(query::Filter::And);
|
||||||
filters.push(query::Filter::End);
|
}
|
||||||
}
|
search::Filter::Or => {
|
||||||
search::Filter::Unseen => {
|
filters.push(query::Filter::Or);
|
||||||
filters.push(query::Filter::Not);
|
}
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
search::Filter::Not => {
|
||||||
Property::Keywords,
|
filters.push(query::Filter::Not);
|
||||||
Keyword::Seen,
|
}
|
||||||
));
|
search::Filter::End => {
|
||||||
filters.push(query::Filter::End);
|
filters.push(query::Filter::End);
|
||||||
}
|
}
|
||||||
search::Filter::And => {
|
search::Filter::Recent => {
|
||||||
filters.push(query::Filter::And);
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
}
|
Property::Keywords,
|
||||||
search::Filter::Or => {
|
Keyword::Recent,
|
||||||
filters.push(query::Filter::Or);
|
));
|
||||||
}
|
}
|
||||||
search::Filter::Not => {
|
search::Filter::New => {
|
||||||
filters.push(query::Filter::Not);
|
filters.push(query::Filter::And);
|
||||||
}
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
search::Filter::End => {
|
Property::Keywords,
|
||||||
filters.push(query::Filter::End);
|
Keyword::Recent,
|
||||||
}
|
));
|
||||||
search::Filter::Recent => {
|
filters.push(query::Filter::Not);
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
Property::Keywords,
|
Property::Keywords,
|
||||||
Keyword::Recent,
|
Keyword::Seen,
|
||||||
));
|
));
|
||||||
}
|
filters.push(query::Filter::End);
|
||||||
search::Filter::New => {
|
filters.push(query::Filter::End);
|
||||||
filters.push(query::Filter::And);
|
}
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
search::Filter::Old => {
|
||||||
Property::Keywords,
|
filters.push(query::Filter::Not);
|
||||||
Keyword::Recent,
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
));
|
Property::Keywords,
|
||||||
filters.push(query::Filter::Not);
|
Keyword::Seen,
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
));
|
||||||
Property::Keywords,
|
filters.push(query::Filter::End);
|
||||||
Keyword::Seen,
|
}
|
||||||
));
|
search::Filter::Older(secs) => {
|
||||||
filters.push(query::Filter::End);
|
filters.push(query::Filter::le(
|
||||||
filters.push(query::Filter::End);
|
Property::ReceivedAt,
|
||||||
}
|
now().saturating_sub(secs as u64),
|
||||||
search::Filter::Old => {
|
));
|
||||||
filters.push(query::Filter::Not);
|
}
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
search::Filter::Younger(secs) => {
|
||||||
Property::Keywords,
|
filters.push(query::Filter::ge(
|
||||||
Keyword::Seen,
|
Property::ReceivedAt,
|
||||||
));
|
now().saturating_sub(secs as u64),
|
||||||
filters.push(query::Filter::End);
|
));
|
||||||
}
|
}
|
||||||
search::Filter::Older(secs) => {
|
search::Filter::ModSeq((modseq, _)) => {
|
||||||
filters.push(query::Filter::le(
|
let mut set = RoaringBitmap::new();
|
||||||
Property::ReceivedAt,
|
for change in self
|
||||||
now().saturating_sub(secs as u64),
|
.jmap
|
||||||
));
|
.changes_(
|
||||||
}
|
mailbox.id.account_id,
|
||||||
search::Filter::Younger(secs) => {
|
Collection::Email,
|
||||||
filters.push(query::Filter::ge(
|
Query::from_modseq(modseq),
|
||||||
Property::ReceivedAt,
|
)
|
||||||
now().saturating_sub(secs as u64),
|
.await?
|
||||||
));
|
.changes
|
||||||
}
|
{
|
||||||
search::Filter::ModSeq((modseq, _)) => {
|
let id = (change.unwrap_id() & u32::MAX as u64) as u32;
|
||||||
let mut set = RoaringBitmap::new();
|
if message_ids.contains(id) {
|
||||||
for change in self
|
set.insert(id);
|
||||||
.jmap
|
}
|
||||||
.changes_(
|
}
|
||||||
mailbox.id.account_id,
|
filters.push(query::Filter::is_in_set(set));
|
||||||
Collection::Email,
|
include_highest_modseq = true;
|
||||||
Query::from_modseq(modseq),
|
}
|
||||||
)
|
search::Filter::EmailId(id) => {
|
||||||
.await?
|
if let Some(id) = Id::from_bytes(id.as_bytes()) {
|
||||||
.changes
|
filters.push(query::Filter::is_in_set(
|
||||||
{
|
RoaringBitmap::from_sorted_iter([id.document_id()]).unwrap(),
|
||||||
let id = (change.unwrap_id() & u32::MAX as u64) as u32;
|
));
|
||||||
if message_ids.contains(id) {
|
} else {
|
||||||
set.insert(id);
|
return Err(StatusResponse::no(format!(
|
||||||
|
"Failed to parse email id '{id}'.",
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
filters.push(query::Filter::is_in_set(set));
|
search::Filter::ThreadId(id) => {
|
||||||
include_highest_modseq = true;
|
if let Some(id) = Id::from_bytes(id.as_bytes()) {
|
||||||
}
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
search::Filter::EmailId(id) => {
|
Property::ThreadId,
|
||||||
if let Some(id) = Id::from_bytes(id.as_bytes()) {
|
id.document_id(),
|
||||||
filters.push(query::Filter::is_in_set(
|
));
|
||||||
RoaringBitmap::from_sorted_iter([id.document_id()]).unwrap(),
|
} else {
|
||||||
));
|
return Err(StatusResponse::no(format!(
|
||||||
} else {
|
"Failed to parse thread id '{id}'.",
|
||||||
return Err(StatusResponse::no(format!(
|
)));
|
||||||
"Failed to parse email id '{id}'.",
|
}
|
||||||
)));
|
|
||||||
}
|
}
|
||||||
}
|
_ => (),
|
||||||
search::Filter::ThreadId(id) => {
|
},
|
||||||
if let Some(id) = Id::from_bytes(id.as_bytes()) {
|
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
|
||||||
Property::ThreadId,
|
|
||||||
id.document_id(),
|
|
||||||
));
|
|
||||||
} else {
|
|
||||||
return Err(StatusResponse::no(format!(
|
|
||||||
"Failed to parse thread id '{id}'.",
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,8 @@
|
||||||
|
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
use store::fts::{FilterItem, FilterType, FtsFilter};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
error::method::MethodError,
|
error::method::MethodError,
|
||||||
object::{email, mailbox},
|
object::{email, mailbox},
|
||||||
|
@ -785,3 +787,47 @@ impl From<Filter> for store::query::Filter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<Filter> for FtsFilter<T> {
|
||||||
|
fn from(value: Filter) -> Self {
|
||||||
|
match value {
|
||||||
|
Filter::And => Self::And,
|
||||||
|
Filter::Or => Self::Or,
|
||||||
|
Filter::Not => Self::Not,
|
||||||
|
Filter::Close => Self::End,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FilterItem for Filter {
|
||||||
|
fn filter_type(&self) -> FilterType {
|
||||||
|
match self {
|
||||||
|
Filter::Text(_)
|
||||||
|
| Filter::From(_)
|
||||||
|
| Filter::To(_)
|
||||||
|
| Filter::Cc(_)
|
||||||
|
| Filter::Bcc(_)
|
||||||
|
| Filter::Subject(_)
|
||||||
|
| Filter::Body(_)
|
||||||
|
| Filter::Header(_) => FilterType::Fts,
|
||||||
|
Filter::And => FilterType::And,
|
||||||
|
Filter::Or => FilterType::Or,
|
||||||
|
Filter::Not => FilterType::Not,
|
||||||
|
Filter::Close => FilterType::End,
|
||||||
|
_ => FilterType::Store,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<FilterType> for Filter {
|
||||||
|
fn from(value: FilterType) -> Self {
|
||||||
|
match value {
|
||||||
|
FilterType::And => Filter::And,
|
||||||
|
FilterType::Or => Filter::Or,
|
||||||
|
FilterType::Not => Filter::Not,
|
||||||
|
FilterType::End => Filter::Close,
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -25,8 +25,8 @@ use std::{borrow::Cow, collections::HashSet};
|
||||||
|
|
||||||
use store::{
|
use store::{
|
||||||
write::{
|
write::{
|
||||||
assert::HashedValue, BatchBuilder, BitmapClass, IntoOperations, Operation, TagValue,
|
assert::HashedValue, BatchBuilder, BitmapClass, BitmapHash, IntoOperations, Operation,
|
||||||
TokenizeText, ValueClass, ValueOp,
|
TagValue, TokenizeText, ValueClass, ValueOp,
|
||||||
},
|
},
|
||||||
Serialize,
|
Serialize,
|
||||||
};
|
};
|
||||||
|
@ -238,7 +238,7 @@ fn merge_batch(
|
||||||
batch.ops.push(Operation::Bitmap {
|
batch.ops.push(Operation::Bitmap {
|
||||||
class: BitmapClass::Text {
|
class: BitmapClass::Text {
|
||||||
field,
|
field,
|
||||||
token: token.into(),
|
token: BitmapHash::new(token),
|
||||||
},
|
},
|
||||||
set,
|
set,
|
||||||
});
|
});
|
||||||
|
@ -301,7 +301,7 @@ fn merge_batch(
|
||||||
batch.ops.push(Operation::Bitmap {
|
batch.ops.push(Operation::Bitmap {
|
||||||
class: BitmapClass::Text {
|
class: BitmapClass::Text {
|
||||||
field,
|
field,
|
||||||
token: token.into_bytes(),
|
token: BitmapHash::new(token),
|
||||||
},
|
},
|
||||||
set,
|
set,
|
||||||
});
|
});
|
||||||
|
@ -480,7 +480,7 @@ fn build_batch(
|
||||||
batch.ops.push(Operation::Bitmap {
|
batch.ops.push(Operation::Bitmap {
|
||||||
class: BitmapClass::Text {
|
class: BitmapClass::Text {
|
||||||
field,
|
field,
|
||||||
token: token.into_bytes(),
|
token: BitmapHash::new(token),
|
||||||
},
|
},
|
||||||
set,
|
set,
|
||||||
});
|
});
|
||||||
|
@ -512,7 +512,7 @@ fn build_batch(
|
||||||
batch.ops.push(Operation::Bitmap {
|
batch.ops.push(Operation::Bitmap {
|
||||||
class: BitmapClass::Text {
|
class: BitmapClass::Text {
|
||||||
field,
|
field,
|
||||||
token: token.into_bytes(),
|
token: BitmapHash::new(token),
|
||||||
},
|
},
|
||||||
set,
|
set,
|
||||||
});
|
});
|
||||||
|
|
|
@ -39,6 +39,9 @@ impl JMAP {
|
||||||
// Delete account data
|
// Delete account data
|
||||||
self.store.purge_account(account_id).await?;
|
self.store.purge_account(account_id).await?;
|
||||||
|
|
||||||
|
// Remove FTS index
|
||||||
|
let todo = 1;
|
||||||
|
|
||||||
// Delete account
|
// Delete account
|
||||||
let mut batch = BatchBuilder::new();
|
let mut batch = BatchBuilder::new();
|
||||||
batch
|
batch
|
||||||
|
|
|
@ -41,6 +41,9 @@ impl crate::Config {
|
||||||
changes_max_results: settings
|
changes_max_results: settings
|
||||||
.property("jmap.protocol.changes.max-results")?
|
.property("jmap.protocol.changes.max-results")?
|
||||||
.unwrap_or(5000),
|
.unwrap_or(5000),
|
||||||
|
snippet_max_results: settings
|
||||||
|
.property("jmap.protocol.search-snippet.max-results")?
|
||||||
|
.unwrap_or(100),
|
||||||
request_max_size: settings
|
request_max_size: settings
|
||||||
.property("jmap.protocol.request.max-size")?
|
.property("jmap.protocol.request.max-size")?
|
||||||
.unwrap_or(10000000),
|
.unwrap_or(10000000),
|
||||||
|
|
|
@ -377,6 +377,19 @@ pub async fn parse_jmap_request(
|
||||||
.into_http_response(),
|
.into_http_response(),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
("db", "purge", &Method::GET) => {
|
||||||
|
return match jmap.store.purge_bitmaps().await {
|
||||||
|
Ok(_) => {
|
||||||
|
JsonResponse::new(Value::String("success".into())).into_http_response()
|
||||||
|
}
|
||||||
|
Err(err) => RequestError::blank(
|
||||||
|
StatusCode::INTERNAL_SERVER_ERROR.as_u16(),
|
||||||
|
"Purge database failed",
|
||||||
|
err.to_string(),
|
||||||
|
)
|
||||||
|
.into_http_response(),
|
||||||
|
};
|
||||||
|
}
|
||||||
(path_1 @ ("queue" | "report"), path_2, &Method::GET) => {
|
(path_1 @ ("queue" | "report"), path_2, &Method::GET) => {
|
||||||
return jmap
|
return jmap
|
||||||
.smtp
|
.smtp
|
||||||
|
|
|
@ -33,18 +33,30 @@ impl JMAP {
|
||||||
.map(ChangeLogBuilder::with_change_id)
|
.map(ChangeLogBuilder::with_change_id)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn assign_change_id(&self, account_id: u32) -> Result<u64, MethodError> {
|
pub async fn assign_change_id(&self, _: u32) -> Result<u64, MethodError> {
|
||||||
self.store
|
self.generate_snowflake_id()
|
||||||
.assign_change_id(account_id)
|
/*self.store
|
||||||
.await
|
.assign_change_id(account_id)
|
||||||
.map_err(|err| {
|
.await
|
||||||
tracing::error!(
|
.map_err(|err| {
|
||||||
|
tracing::error!(
|
||||||
|
event = "error",
|
||||||
|
context = "change_log",
|
||||||
|
error = ?err,
|
||||||
|
"Failed to assign changeId.");
|
||||||
|
MethodError::ServerPartialFail
|
||||||
|
})*/
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_snowflake_id(&self) -> Result<u64, MethodError> {
|
||||||
|
self.snowflake_id.generate().ok_or_else(|| {
|
||||||
|
tracing::error!(
|
||||||
event = "error",
|
event = "error",
|
||||||
context = "change_log",
|
context = "change_log",
|
||||||
error = ?err,
|
"Failed to generate snowflake id."
|
||||||
"Failed to assign changeId.");
|
);
|
||||||
MethodError::ServerPartialFail
|
MethodError::ServerPartialFail
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn commit_changes(
|
pub async fn commit_changes(
|
||||||
|
|
|
@ -53,7 +53,7 @@ use store::{
|
||||||
};
|
};
|
||||||
use utils::map::vec_map::VecMap;
|
use utils::map::vec_map::VecMap;
|
||||||
|
|
||||||
use crate::{auth::AccessToken, Bincode, JMAP};
|
use crate::{auth::AccessToken, services::housekeeper::Event, Bincode, NamedKey, JMAP};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
index::{EmailIndexBuilder, TrimTextValue, MAX_SORT_FIELD_LENGTH},
|
index::{EmailIndexBuilder, TrimTextValue, MAX_SORT_FIELD_LENGTH},
|
||||||
|
@ -291,7 +291,7 @@ impl JMAP {
|
||||||
keywords: Vec<Keyword>,
|
keywords: Vec<Keyword>,
|
||||||
received_at: Option<UTCDate>,
|
received_at: Option<UTCDate>,
|
||||||
) -> Result<Result<IngestedEmail, SetError>, MethodError> {
|
) -> Result<Result<IngestedEmail, SetError>, MethodError> {
|
||||||
// Obtain term index and metadata
|
// Obtain metadata
|
||||||
let mut metadata = if let Some(metadata) = self
|
let mut metadata = if let Some(metadata) = self
|
||||||
.get_property::<Bincode<MessageMetadata>>(
|
.get_property::<Bincode<MessageMetadata>>(
|
||||||
from_account_id,
|
from_account_id,
|
||||||
|
@ -405,6 +405,14 @@ impl JMAP {
|
||||||
.value(Property::MailboxIds, mailboxes, F_VALUE | F_BITMAP)
|
.value(Property::MailboxIds, mailboxes, F_VALUE | F_BITMAP)
|
||||||
.value(Property::Keywords, keywords, F_VALUE | F_BITMAP)
|
.value(Property::Keywords, keywords, F_VALUE | F_BITMAP)
|
||||||
.value(Property::Cid, changes.change_id, F_VALUE)
|
.value(Property::Cid, changes.change_id, F_VALUE)
|
||||||
|
.set(
|
||||||
|
NamedKey::IndexEmail::<&[u8]> {
|
||||||
|
account_id,
|
||||||
|
document_id: message_id,
|
||||||
|
seq: self.generate_snowflake_id()?,
|
||||||
|
},
|
||||||
|
metadata.blob_hash.clone(),
|
||||||
|
)
|
||||||
.custom(EmailIndexBuilder::set(metadata))
|
.custom(EmailIndexBuilder::set(metadata))
|
||||||
.custom(changes);
|
.custom(changes);
|
||||||
|
|
||||||
|
@ -417,6 +425,9 @@ impl JMAP {
|
||||||
MethodError::ServerPartialFail
|
MethodError::ServerPartialFail
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
// Request FTS index
|
||||||
|
let _ = self.housekeeper_tx.send(Event::IndexStart).await;
|
||||||
|
|
||||||
Ok(Ok(email))
|
Ok(Ok(email))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,8 @@ use mail_parser::{
|
||||||
};
|
};
|
||||||
use nlp::language::Language;
|
use nlp::language::Language;
|
||||||
use store::{
|
use store::{
|
||||||
|
backend::MAX_TOKEN_LENGTH,
|
||||||
|
fts::{index::FtsDocument, Field},
|
||||||
write::{BatchBuilder, BlobOp, IntoOperations, F_BITMAP, F_CLEAR, F_INDEX, F_VALUE},
|
write::{BatchBuilder, BlobOp, IntoOperations, F_BITMAP, F_CLEAR, F_INDEX, F_VALUE},
|
||||||
BlobHash,
|
BlobHash,
|
||||||
};
|
};
|
||||||
|
@ -60,13 +62,13 @@ pub(super) trait IndexMessage {
|
||||||
keywords: Vec<Keyword>,
|
keywords: Vec<Keyword>,
|
||||||
mailbox_ids: Vec<u32>,
|
mailbox_ids: Vec<u32>,
|
||||||
received_at: u64,
|
received_at: u64,
|
||||||
) -> store::Result<&mut Self>;
|
) -> &mut Self;
|
||||||
|
|
||||||
fn index_headers(&mut self, headers: &[Header<'_>], options: u32);
|
fn index_headers(&mut self, headers: &[Header<'_>], options: u32);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) trait IndexMessageText<'x> {
|
pub trait IndexMessageText<'x>: Sized {
|
||||||
fn index_message(&mut self, message: &'x Message<'x>);
|
fn index_message(self, message: &'x Message<'x>) -> Self;
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IndexMessage for BatchBuilder {
|
impl IndexMessage for BatchBuilder {
|
||||||
|
@ -77,7 +79,7 @@ impl IndexMessage for BatchBuilder {
|
||||||
keywords: Vec<Keyword>,
|
keywords: Vec<Keyword>,
|
||||||
mailbox_ids: Vec<u32>,
|
mailbox_ids: Vec<u32>,
|
||||||
received_at: u64,
|
received_at: u64,
|
||||||
) -> store::Result<&mut Self> {
|
) -> &mut Self {
|
||||||
// Index keywords
|
// Index keywords
|
||||||
self.value(Property::Keywords, keywords, F_VALUE | F_BITMAP);
|
self.value(Property::Keywords, keywords, F_VALUE | F_BITMAP);
|
||||||
|
|
||||||
|
@ -164,7 +166,7 @@ impl IndexMessage for BatchBuilder {
|
||||||
F_VALUE,
|
F_VALUE,
|
||||||
);
|
);
|
||||||
|
|
||||||
Ok(self)
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
fn index_headers(&mut self, headers: &[Header<'_>], options: u32) {
|
fn index_headers(&mut self, headers: &[Header<'_>], options: u32) {
|
||||||
|
@ -262,9 +264,8 @@ impl IndexMessage for BatchBuilder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
impl<'x> IndexMessageText<'x> for FtsDocument<'x, HeaderName<'x>> {
|
||||||
impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
|
fn index_message(mut self, message: &'x Message<'x>) -> Self {
|
||||||
fn index_message(&mut self, message: &'x Message<'x>) {
|
|
||||||
let mut language = Language::Unknown;
|
let mut language = Language::Unknown;
|
||||||
|
|
||||||
for (part_id, part) in message.parts.iter().take(MAX_MESSAGE_PARTS).enumerate() {
|
for (part_id, part) in message.parts.iter().take(MAX_MESSAGE_PARTS).enumerate() {
|
||||||
|
@ -277,9 +278,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Index hasHeader property
|
// Index hasHeader property
|
||||||
self.index_raw_token(Property::Headers, header.name.as_str());
|
self.index_keyword(Field::Keyword, header.name.as_str().to_ascii_lowercase());
|
||||||
|
|
||||||
match header.name {
|
match &header.name {
|
||||||
HeaderName::MessageId
|
HeaderName::MessageId
|
||||||
| HeaderName::InReplyTo
|
| HeaderName::InReplyTo
|
||||||
| HeaderName::References
|
| HeaderName::References
|
||||||
|
@ -287,45 +288,35 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
|
||||||
header.value.visit_text(|id| {
|
header.value.visit_text(|id| {
|
||||||
// Index ids without stemming
|
// Index ids without stemming
|
||||||
if id.len() < MAX_TOKEN_LENGTH {
|
if id.len() < MAX_TOKEN_LENGTH {
|
||||||
let fix = "true";
|
self.index_keyword(
|
||||||
self.index_raw_token(Property::MessageId, id.to_string());
|
Field::Header(header.name.clone()),
|
||||||
|
id.to_string(),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
HeaderName::From | HeaderName::To | HeaderName::Cc | HeaderName::Bcc => {
|
HeaderName::From | HeaderName::To | HeaderName::Cc | HeaderName::Bcc => {
|
||||||
let property = Property::from_header(&header.name);
|
|
||||||
|
|
||||||
header.value.visit_addresses(|_, value| {
|
header.value.visit_addresses(|_, value| {
|
||||||
// Index an address name or email without stemming
|
// Index an address name or email without stemming
|
||||||
self.index_raw(property.clone(), value.to_string());
|
self.index_tokenized(
|
||||||
|
Field::Header(header.name.clone()),
|
||||||
|
value.to_string(),
|
||||||
|
);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
HeaderName::Subject => {
|
HeaderName::Subject => {
|
||||||
// Index subject for FTS
|
// Index subject for FTS
|
||||||
self.index(
|
if let Some(subject) = header.value.as_text() {
|
||||||
Property::Subject,
|
self.index(Field::Header(HeaderName::Subject), subject, language);
|
||||||
match &header.value {
|
}
|
||||||
HeaderValue::Text(text) => text.clone(),
|
|
||||||
HeaderValue::TextList(list) if !list.is_empty() => {
|
|
||||||
list.first().unwrap().clone()
|
|
||||||
}
|
|
||||||
_ => "".into(),
|
|
||||||
},
|
|
||||||
language,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
HeaderName::Comments | HeaderName::Keywords | HeaderName::ListId => {
|
HeaderName::Comments | HeaderName::Keywords | HeaderName::ListId => {
|
||||||
// Index headers
|
// Index headers
|
||||||
header.value.visit_text(|text| {
|
header.value.visit_text(|text| {
|
||||||
for token in text.split_ascii_whitespace() {
|
self.index_tokenized(
|
||||||
if token.len() < MAX_TOKEN_LENGTH {
|
Field::Header(header.name.clone()),
|
||||||
let fix = "true";
|
text.to_string(),
|
||||||
self.index_raw_token(
|
);
|
||||||
Property::Headers,
|
|
||||||
token.to_lowercase(),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
|
@ -337,9 +328,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
|
||||||
PartType::Text(text) => {
|
PartType::Text(text) => {
|
||||||
if message.text_body.contains(&part_id) || message.html_body.contains(&part_id)
|
if message.text_body.contains(&part_id) || message.html_body.contains(&part_id)
|
||||||
{
|
{
|
||||||
self.index(Property::TextBody, text.as_ref(), part_language);
|
self.index(Field::Body, text.as_ref(), part_language);
|
||||||
} else {
|
} else {
|
||||||
self.index(Property::Attachments, text.as_ref(), part_language);
|
self.index(Field::Attachment, text.as_ref(), part_language);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PartType::Html(html) => {
|
PartType::Html(html) => {
|
||||||
|
@ -347,9 +338,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
|
||||||
|
|
||||||
if message.text_body.contains(&part_id) || message.html_body.contains(&part_id)
|
if message.text_body.contains(&part_id) || message.html_body.contains(&part_id)
|
||||||
{
|
{
|
||||||
self.index(Property::TextBody, text, part_language);
|
self.index(Field::Body, text, part_language);
|
||||||
} else {
|
} else {
|
||||||
self.index(Property::Attachments, text, part_language);
|
self.index(Field::Attachment, text, part_language);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
PartType::Message(nested_message) => {
|
PartType::Message(nested_message) => {
|
||||||
|
@ -360,21 +351,17 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
|
||||||
if let Some(HeaderValue::Text(subject)) =
|
if let Some(HeaderValue::Text(subject)) =
|
||||||
nested_message.header(HeaderName::Subject)
|
nested_message.header(HeaderName::Subject)
|
||||||
{
|
{
|
||||||
self.index(
|
self.index(Field::Attachment, subject.as_ref(), nested_message_language);
|
||||||
Property::Attachments,
|
|
||||||
subject.as_ref(),
|
|
||||||
nested_message_language,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for sub_part in nested_message.parts.iter().take(MAX_MESSAGE_PARTS) {
|
for sub_part in nested_message.parts.iter().take(MAX_MESSAGE_PARTS) {
|
||||||
let language = sub_part.language().unwrap_or(nested_message_language);
|
let language = sub_part.language().unwrap_or(nested_message_language);
|
||||||
match &sub_part.body {
|
match &sub_part.body {
|
||||||
PartType::Text(text) => {
|
PartType::Text(text) => {
|
||||||
self.index(Property::Attachments, text.as_ref(), language);
|
self.index(Field::Attachment, text.as_ref(), language);
|
||||||
}
|
}
|
||||||
PartType::Html(html) => {
|
PartType::Html(html) => {
|
||||||
self.index(Property::Attachments, html_to_text(html), language);
|
self.index(Field::Attachment, html_to_text(html), language);
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
@ -383,9 +370,9 @@ impl<'x> IndexMessageText<'x> for FtsIndexBuilder<'x, Property> {
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
self
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
pub struct EmailIndexBuilder<'x> {
|
pub struct EmailIndexBuilder<'x> {
|
||||||
inner: Bincode<MessageMetadata<'x>>,
|
inner: Bincode<MessageMetadata<'x>>,
|
||||||
|
|
|
@ -33,6 +33,7 @@ use jmap_proto::{
|
||||||
use mail_parser::{
|
use mail_parser::{
|
||||||
parsers::fields::thread::thread_name, HeaderName, HeaderValue, Message, PartType,
|
parsers::fields::thread::thread_name, HeaderName, HeaderValue, Message, PartType,
|
||||||
};
|
};
|
||||||
|
|
||||||
use store::{
|
use store::{
|
||||||
ahash::AHashSet,
|
ahash::AHashSet,
|
||||||
query::Filter,
|
query::Filter,
|
||||||
|
@ -46,7 +47,8 @@ use utils::map::vec_map::VecMap;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
email::index::{IndexMessage, MAX_ID_LENGTH},
|
email::index::{IndexMessage, MAX_ID_LENGTH},
|
||||||
IngestError, JMAP,
|
services::housekeeper::Event,
|
||||||
|
IngestError, NamedKey, JMAP,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
|
@ -237,15 +239,14 @@ impl JMAP {
|
||||||
IngestError::Temporary
|
IngestError::Temporary
|
||||||
})?;
|
})?;
|
||||||
let change_id = self
|
let change_id = self
|
||||||
.store
|
|
||||||
.assign_change_id(params.account_id)
|
.assign_change_id(params.account_id)
|
||||||
.await
|
.await
|
||||||
.map_err(|err| {
|
.map_err(|_| {
|
||||||
tracing::error!(
|
tracing::error!(
|
||||||
event = "error",
|
event = "error",
|
||||||
context = "email_ingest",
|
context = "email_ingest",
|
||||||
error = ?err,
|
"Failed to assign changeId."
|
||||||
"Failed to assign changeId.");
|
);
|
||||||
IngestError::Temporary
|
IngestError::Temporary
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
@ -307,17 +308,19 @@ impl JMAP {
|
||||||
params.mailbox_ids,
|
params.mailbox_ids,
|
||||||
params.received_at.unwrap_or_else(now),
|
params.received_at.unwrap_or_else(now),
|
||||||
)
|
)
|
||||||
.map_err(|err| {
|
|
||||||
tracing::error!(
|
|
||||||
event = "error",
|
|
||||||
context = "email_ingest",
|
|
||||||
error = ?err,
|
|
||||||
"Failed to index message.");
|
|
||||||
IngestError::Temporary
|
|
||||||
})?
|
|
||||||
.value(Property::Cid, change_id, F_VALUE)
|
.value(Property::Cid, change_id, F_VALUE)
|
||||||
.value(Property::ThreadId, thread_id, F_VALUE | F_BITMAP)
|
.value(Property::ThreadId, thread_id, F_VALUE | F_BITMAP)
|
||||||
.custom(changes);
|
.custom(changes)
|
||||||
|
.set(
|
||||||
|
NamedKey::IndexEmail::<&[u8]> {
|
||||||
|
account_id: params.account_id,
|
||||||
|
document_id,
|
||||||
|
seq: self
|
||||||
|
.generate_snowflake_id()
|
||||||
|
.map_err(|_| IngestError::Temporary)?,
|
||||||
|
},
|
||||||
|
blob_id.hash.clone(),
|
||||||
|
);
|
||||||
self.store.write(batch.build()).await.map_err(|err| {
|
self.store.write(batch.build()).await.map_err(|err| {
|
||||||
tracing::error!(
|
tracing::error!(
|
||||||
event = "error",
|
event = "error",
|
||||||
|
@ -327,6 +330,9 @@ impl JMAP {
|
||||||
IngestError::Temporary
|
IngestError::Temporary
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
// Request FTS index
|
||||||
|
let _ = self.housekeeper_tx.send(Event::IndexStart).await;
|
||||||
|
|
||||||
Ok(IngestedEmail {
|
Ok(IngestedEmail {
|
||||||
id,
|
id,
|
||||||
change_id,
|
change_id,
|
||||||
|
@ -434,18 +440,14 @@ impl JMAP {
|
||||||
|
|
||||||
// Delete all but the most common threadId
|
// Delete all but the most common threadId
|
||||||
let mut batch = BatchBuilder::new();
|
let mut batch = BatchBuilder::new();
|
||||||
let change_id = self
|
let change_id = self.assign_change_id(account_id).await.map_err(|_| {
|
||||||
.store
|
tracing::error!(
|
||||||
.assign_change_id(account_id)
|
event = "error",
|
||||||
.await
|
context = "find_or_merge_thread",
|
||||||
.map_err(|err| {
|
"Failed to assign changeId for thread merge."
|
||||||
tracing::error!(
|
);
|
||||||
event = "error",
|
IngestError::Temporary
|
||||||
context = "find_or_merge_thread",
|
})?;
|
||||||
error = ?err,
|
|
||||||
"Failed to assign changeId for thread merge.");
|
|
||||||
IngestError::Temporary
|
|
||||||
})?;
|
|
||||||
let mut changes = ChangeLogBuilder::with_change_id(change_id);
|
let mut changes = ChangeLogBuilder::with_change_id(change_id);
|
||||||
batch
|
batch
|
||||||
.with_account_id(account_id)
|
.with_account_id(account_id)
|
||||||
|
|
|
@ -27,7 +27,10 @@ use jmap_proto::{
|
||||||
object::email::QueryArguments,
|
object::email::QueryArguments,
|
||||||
types::{acl::Acl, collection::Collection, keyword::Keyword, property::Property},
|
types::{acl::Acl, collection::Collection, keyword::Keyword, property::Property},
|
||||||
};
|
};
|
||||||
|
use mail_parser::HeaderName;
|
||||||
|
use nlp::language::Language;
|
||||||
use store::{
|
use store::{
|
||||||
|
fts::{Field, FilterGroup, FtsFilter, IntoFilterGroup},
|
||||||
query::{self},
|
query::{self},
|
||||||
roaring::RoaringBitmap,
|
roaring::RoaringBitmap,
|
||||||
write::ValueClass,
|
write::ValueClass,
|
||||||
|
@ -45,200 +48,226 @@ impl JMAP {
|
||||||
let account_id = request.account_id.document_id();
|
let account_id = request.account_id.document_id();
|
||||||
let mut filters = Vec::with_capacity(request.filter.len());
|
let mut filters = Vec::with_capacity(request.filter.len());
|
||||||
|
|
||||||
for cond in std::mem::take(&mut request.filter) {
|
for cond_group in std::mem::take(&mut request.filter).into_filter_group() {
|
||||||
match cond {
|
match cond_group {
|
||||||
Filter::InMailbox(mailbox) => filters.push(query::Filter::is_in_bitmap(
|
FilterGroup::Fts(conds) => {
|
||||||
Property::MailboxIds,
|
let mut fts_filters = Vec::with_capacity(filters.len());
|
||||||
mailbox.document_id(),
|
for cond in conds {
|
||||||
)),
|
match cond {
|
||||||
Filter::InMailboxOtherThan(mailboxes) => {
|
Filter::Text(text) => {
|
||||||
filters.push(query::Filter::Not);
|
fts_filters.push(FtsFilter::Or);
|
||||||
filters.push(query::Filter::Or);
|
fts_filters.push(FtsFilter::has_text(
|
||||||
for mailbox in mailboxes {
|
Field::Header(HeaderName::From),
|
||||||
filters.push(query::Filter::is_in_bitmap(
|
&text,
|
||||||
Property::MailboxIds,
|
|
||||||
mailbox.document_id(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
filters.push(query::Filter::End);
|
|
||||||
filters.push(query::Filter::End);
|
|
||||||
}
|
|
||||||
Filter::Before(date) => filters.push(query::Filter::lt(Property::ReceivedAt, date)),
|
|
||||||
Filter::After(date) => filters.push(query::Filter::gt(Property::ReceivedAt, date)),
|
|
||||||
Filter::MinSize(size) => filters.push(query::Filter::ge(Property::Size, size)),
|
|
||||||
Filter::MaxSize(size) => filters.push(query::Filter::lt(Property::Size, size)),
|
|
||||||
Filter::AllInThreadHaveKeyword(keyword) => filters.push(query::Filter::is_in_set(
|
|
||||||
self.thread_keywords(account_id, keyword, true).await?,
|
|
||||||
)),
|
|
||||||
Filter::SomeInThreadHaveKeyword(keyword) => filters.push(query::Filter::is_in_set(
|
|
||||||
self.thread_keywords(account_id, keyword, false).await?,
|
|
||||||
)),
|
|
||||||
Filter::NoneInThreadHaveKeyword(keyword) => {
|
|
||||||
filters.push(query::Filter::Not);
|
|
||||||
filters.push(query::Filter::is_in_set(
|
|
||||||
self.thread_keywords(account_id, keyword, false).await?,
|
|
||||||
));
|
|
||||||
filters.push(query::Filter::End);
|
|
||||||
}
|
|
||||||
Filter::HasKeyword(keyword) => {
|
|
||||||
filters.push(query::Filter::is_in_bitmap(Property::Keywords, keyword))
|
|
||||||
}
|
|
||||||
Filter::NotKeyword(keyword) => {
|
|
||||||
filters.push(query::Filter::Not);
|
|
||||||
filters.push(query::Filter::is_in_bitmap(Property::Keywords, keyword));
|
|
||||||
filters.push(query::Filter::End);
|
|
||||||
}
|
|
||||||
Filter::HasAttachment(has_attach) => {
|
|
||||||
if !has_attach {
|
|
||||||
filters.push(query::Filter::Not);
|
|
||||||
}
|
|
||||||
filters.push(query::Filter::is_in_bitmap(Property::HasAttachment, ()));
|
|
||||||
if !has_attach {
|
|
||||||
filters.push(query::Filter::End);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/*Filter::Text(text) => {
|
|
||||||
filters.push(query::Filter::Or);
|
|
||||||
filters.push(query::Filter::has_text(
|
|
||||||
Property::From,
|
|
||||||
&text,
|
|
||||||
Language::None,
|
|
||||||
));
|
|
||||||
filters.push(query::Filter::has_text(Property::To, &text, Language::None));
|
|
||||||
filters.push(query::Filter::has_text(Property::Cc, &text, Language::None));
|
|
||||||
filters.push(query::Filter::has_text(
|
|
||||||
Property::Bcc,
|
|
||||||
&text,
|
|
||||||
Language::None,
|
|
||||||
));
|
|
||||||
filters.push(query::Filter::has_text_detect(
|
|
||||||
Property::Subject,
|
|
||||||
&text,
|
|
||||||
self.config.default_language,
|
|
||||||
));
|
|
||||||
filters.push(query::Filter::has_text_detect(
|
|
||||||
Property::TextBody,
|
|
||||||
&text,
|
|
||||||
self.config.default_language,
|
|
||||||
));
|
|
||||||
filters.push(query::Filter::has_text_detect(
|
|
||||||
Property::Attachments,
|
|
||||||
text,
|
|
||||||
self.config.default_language,
|
|
||||||
));
|
|
||||||
filters.push(query::Filter::End);
|
|
||||||
}
|
|
||||||
Filter::From(text) => filters.push(query::Filter::has_text(
|
|
||||||
Property::From,
|
|
||||||
text,
|
|
||||||
Language::None,
|
Language::None,
|
||||||
)),
|
));
|
||||||
Filter::To(text) => {
|
fts_filters.push(FtsFilter::has_text(
|
||||||
filters.push(query::Filter::has_text(Property::To, text, Language::None))
|
Field::Header(HeaderName::To),
|
||||||
}
|
&text,
|
||||||
Filter::Cc(text) => {
|
Language::None,
|
||||||
filters.push(query::Filter::has_text(Property::Cc, text, Language::None))
|
));
|
||||||
}
|
fts_filters.push(FtsFilter::has_text(
|
||||||
Filter::Bcc(text) => {
|
Field::Header(HeaderName::Cc),
|
||||||
filters.push(query::Filter::has_text(Property::Bcc, text, Language::None))
|
&text,
|
||||||
}
|
Language::None,
|
||||||
Filter::Subject(text) => filters.push(query::Filter::has_text_detect(
|
));
|
||||||
Property::Subject,
|
fts_filters.push(FtsFilter::has_text(
|
||||||
|
Field::Header(HeaderName::Bcc),
|
||||||
|
&text,
|
||||||
|
Language::None,
|
||||||
|
));
|
||||||
|
fts_filters.push(FtsFilter::has_text_detect(
|
||||||
|
Field::Header(HeaderName::Subject),
|
||||||
|
&text,
|
||||||
|
self.config.default_language,
|
||||||
|
));
|
||||||
|
fts_filters.push(FtsFilter::has_text_detect(
|
||||||
|
Field::Body,
|
||||||
|
&text,
|
||||||
|
self.config.default_language,
|
||||||
|
));
|
||||||
|
fts_filters.push(FtsFilter::has_text_detect(
|
||||||
|
Field::Attachment,
|
||||||
text,
|
text,
|
||||||
self.config.default_language,
|
self.config.default_language,
|
||||||
)),
|
));
|
||||||
Filter::Body(text) => filters.push(query::Filter::has_text_detect(
|
fts_filters.push(FtsFilter::End);
|
||||||
Property::TextBody,
|
}
|
||||||
text,
|
Filter::From(text) => fts_filters.push(FtsFilter::has_text(
|
||||||
self.config.default_language,
|
Field::Header(HeaderName::From),
|
||||||
)),
|
text,
|
||||||
Filter::Header(header) => {
|
Language::None,
|
||||||
let mut header = header.into_iter();
|
)),
|
||||||
let header_name = header.next().ok_or_else(|| {
|
Filter::To(text) => fts_filters.push(FtsFilter::has_text(
|
||||||
MethodError::InvalidArguments("Header name is missing.".to_string())
|
Field::Header(HeaderName::To),
|
||||||
})?;
|
text,
|
||||||
|
Language::None,
|
||||||
|
)),
|
||||||
|
Filter::Cc(text) => fts_filters.push(FtsFilter::has_text(
|
||||||
|
Field::Header(HeaderName::Cc),
|
||||||
|
text,
|
||||||
|
Language::None,
|
||||||
|
)),
|
||||||
|
Filter::Bcc(text) => fts_filters.push(FtsFilter::has_text(
|
||||||
|
Field::Header(HeaderName::Bcc),
|
||||||
|
text,
|
||||||
|
Language::None,
|
||||||
|
)),
|
||||||
|
Filter::Subject(text) => fts_filters.push(FtsFilter::has_text_detect(
|
||||||
|
Field::Header(HeaderName::Subject),
|
||||||
|
text,
|
||||||
|
self.config.default_language,
|
||||||
|
)),
|
||||||
|
Filter::Body(text) => fts_filters.push(FtsFilter::has_text_detect(
|
||||||
|
Field::Body,
|
||||||
|
text,
|
||||||
|
self.config.default_language,
|
||||||
|
)),
|
||||||
|
Filter::Header(header) => {
|
||||||
|
let mut header = header.into_iter();
|
||||||
|
let header_name = header.next().ok_or_else(|| {
|
||||||
|
MethodError::InvalidArguments(
|
||||||
|
"Header name is missing.".to_string(),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
match HeaderName::parse(&header_name) {
|
match HeaderName::parse(header_name) {
|
||||||
Some(HeaderName::Other(_)) | None => {
|
Some(HeaderName::Other(header_name)) => {
|
||||||
return Err(MethodError::InvalidArguments(format!(
|
return Err(MethodError::InvalidArguments(format!(
|
||||||
"Querying non-RFC header '{header_name}' is not allowed.",
|
"Querying header '{header_name}' is not supported.",
|
||||||
)));
|
)));
|
||||||
}
|
}
|
||||||
Some(header_name) => {
|
Some(header_name) => {
|
||||||
let is_id = matches!(
|
if let Some(header_value) = header.next() {
|
||||||
|
if matches!(
|
||||||
header_name,
|
header_name,
|
||||||
HeaderName::MessageId
|
HeaderName::MessageId
|
||||||
| HeaderName::InReplyTo
|
| HeaderName::InReplyTo
|
||||||
| HeaderName::References
|
| HeaderName::References
|
||||||
| HeaderName::ResentMessageId
|
| HeaderName::ResentMessageId
|
||||||
);
|
) {
|
||||||
let tokens = if let Some(header_value) = header.next() {
|
fts_filters.push(FtsFilter::has_keyword(
|
||||||
let header_num = header_name.id().to_string();
|
Field::Header(header_name),
|
||||||
header_value
|
header_value,
|
||||||
.split_ascii_whitespace()
|
));
|
||||||
.filter_map(|token| {
|
|
||||||
if token.len() < MAX_TOKEN_LENGTH {
|
|
||||||
if is_id {
|
|
||||||
format!("{header_num}{token}")
|
|
||||||
} else {
|
|
||||||
format!("{header_num}{}", token.to_lowercase())
|
|
||||||
}
|
|
||||||
.into()
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
} else {
|
} else {
|
||||||
vec![]
|
fts_filters.push(FtsFilter::has_text(
|
||||||
};
|
Field::Header(header_name),
|
||||||
match tokens.len() {
|
header_value,
|
||||||
0 => {
|
Language::None,
|
||||||
filters.push(query::Filter::has_raw_text(
|
));
|
||||||
Property::Headers,
|
|
||||||
header_name.id().to_string(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
1 => {
|
|
||||||
filters.push(query::Filter::has_raw_text(
|
|
||||||
Property::Headers,
|
|
||||||
tokens.into_iter().next().unwrap(),
|
|
||||||
));
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
filters.push(query::Filter::And);
|
|
||||||
for token in tokens {
|
|
||||||
filters.push(query::Filter::has_raw_text(
|
|
||||||
Property::Headers,
|
|
||||||
token,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
filters.push(query::Filter::End);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
fts_filters.push(FtsFilter::has_keyword(
|
||||||
|
Field::Keyword,
|
||||||
|
header_name.as_str().to_lowercase(),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
None => (),
|
||||||
}
|
}
|
||||||
*/
|
}
|
||||||
// Non-standard
|
Filter::And | Filter::Or | Filter::Not | Filter::Close => {
|
||||||
Filter::Id(ids) => {
|
fts_filters.push(cond.into());
|
||||||
let mut set = RoaringBitmap::new();
|
}
|
||||||
for id in ids {
|
other => return Err(MethodError::UnsupportedFilter(other.to_string())),
|
||||||
set.insert(id.document_id());
|
}
|
||||||
}
|
}
|
||||||
filters.push(query::Filter::is_in_set(set));
|
filters.push(query::Filter::is_in_set(
|
||||||
}
|
self.fts_filter(account_id, Collection::Email, fts_filters)
|
||||||
Filter::SentBefore(date) => filters.push(query::Filter::lt(Property::SentAt, date)),
|
.await?,
|
||||||
Filter::SentAfter(date) => filters.push(query::Filter::gt(Property::SentAt, date)),
|
));
|
||||||
Filter::InThread(id) => filters.push(query::Filter::is_in_bitmap(
|
|
||||||
Property::ThreadId,
|
|
||||||
id.document_id(),
|
|
||||||
)),
|
|
||||||
Filter::And | Filter::Or | Filter::Not | Filter::Close => {
|
|
||||||
filters.push(cond.into());
|
|
||||||
}
|
}
|
||||||
|
FilterGroup::Store(cond) => {
|
||||||
|
match cond {
|
||||||
|
Filter::InMailbox(mailbox) => filters.push(query::Filter::is_in_bitmap(
|
||||||
|
Property::MailboxIds,
|
||||||
|
mailbox.document_id(),
|
||||||
|
)),
|
||||||
|
Filter::InMailboxOtherThan(mailboxes) => {
|
||||||
|
filters.push(query::Filter::Not);
|
||||||
|
filters.push(query::Filter::Or);
|
||||||
|
for mailbox in mailboxes {
|
||||||
|
filters.push(query::Filter::is_in_bitmap(
|
||||||
|
Property::MailboxIds,
|
||||||
|
mailbox.document_id(),
|
||||||
|
));
|
||||||
|
}
|
||||||
|
filters.push(query::Filter::End);
|
||||||
|
filters.push(query::Filter::End);
|
||||||
|
}
|
||||||
|
Filter::Before(date) => {
|
||||||
|
filters.push(query::Filter::lt(Property::ReceivedAt, date))
|
||||||
|
}
|
||||||
|
Filter::After(date) => {
|
||||||
|
filters.push(query::Filter::gt(Property::ReceivedAt, date))
|
||||||
|
}
|
||||||
|
Filter::MinSize(size) => {
|
||||||
|
filters.push(query::Filter::ge(Property::Size, size))
|
||||||
|
}
|
||||||
|
Filter::MaxSize(size) => {
|
||||||
|
filters.push(query::Filter::lt(Property::Size, size))
|
||||||
|
}
|
||||||
|
Filter::AllInThreadHaveKeyword(keyword) => {
|
||||||
|
filters.push(query::Filter::is_in_set(
|
||||||
|
self.thread_keywords(account_id, keyword, true).await?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
Filter::SomeInThreadHaveKeyword(keyword) => {
|
||||||
|
filters.push(query::Filter::is_in_set(
|
||||||
|
self.thread_keywords(account_id, keyword, false).await?,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
Filter::NoneInThreadHaveKeyword(keyword) => {
|
||||||
|
filters.push(query::Filter::Not);
|
||||||
|
filters.push(query::Filter::is_in_set(
|
||||||
|
self.thread_keywords(account_id, keyword, false).await?,
|
||||||
|
));
|
||||||
|
filters.push(query::Filter::End);
|
||||||
|
}
|
||||||
|
Filter::HasKeyword(keyword) => {
|
||||||
|
filters.push(query::Filter::is_in_bitmap(Property::Keywords, keyword))
|
||||||
|
}
|
||||||
|
Filter::NotKeyword(keyword) => {
|
||||||
|
filters.push(query::Filter::Not);
|
||||||
|
filters.push(query::Filter::is_in_bitmap(Property::Keywords, keyword));
|
||||||
|
filters.push(query::Filter::End);
|
||||||
|
}
|
||||||
|
Filter::HasAttachment(has_attach) => {
|
||||||
|
if !has_attach {
|
||||||
|
filters.push(query::Filter::Not);
|
||||||
|
}
|
||||||
|
filters.push(query::Filter::is_in_bitmap(Property::HasAttachment, ()));
|
||||||
|
if !has_attach {
|
||||||
|
filters.push(query::Filter::End);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
other => return Err(MethodError::UnsupportedFilter(other.to_string())),
|
// Non-standard
|
||||||
|
Filter::Id(ids) => {
|
||||||
|
let mut set = RoaringBitmap::new();
|
||||||
|
for id in ids {
|
||||||
|
set.insert(id.document_id());
|
||||||
|
}
|
||||||
|
filters.push(query::Filter::is_in_set(set));
|
||||||
|
}
|
||||||
|
Filter::SentBefore(date) => {
|
||||||
|
filters.push(query::Filter::lt(Property::SentAt, date))
|
||||||
|
}
|
||||||
|
Filter::SentAfter(date) => {
|
||||||
|
filters.push(query::Filter::gt(Property::SentAt, date))
|
||||||
|
}
|
||||||
|
Filter::InThread(id) => filters.push(query::Filter::is_in_bitmap(
|
||||||
|
Property::ThreadId,
|
||||||
|
id.document_id(),
|
||||||
|
)),
|
||||||
|
Filter::And | Filter::Or | Filter::Not | Filter::Close => {
|
||||||
|
filters.push(cond.into());
|
||||||
|
}
|
||||||
|
|
||||||
|
other => return Err(MethodError::UnsupportedFilter(other.to_string())),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -59,7 +59,9 @@ use store::{
|
||||||
Serialize,
|
Serialize,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{auth::AccessToken, Bincode, IngestError, JMAP};
|
use crate::{
|
||||||
|
auth::AccessToken, services::housekeeper::Event, Bincode, IngestError, NamedKey, JMAP,
|
||||||
|
};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
headers::{BuildHeader, ValueToHeader},
|
headers::{BuildHeader, ValueToHeader},
|
||||||
|
@ -1208,6 +1210,16 @@ impl JMAP {
|
||||||
.delete_document(thread_id);
|
.delete_document(thread_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Remove message from FTS index
|
||||||
|
batch.set(
|
||||||
|
NamedKey::IndexEmail::<&[u8]> {
|
||||||
|
account_id,
|
||||||
|
document_id,
|
||||||
|
seq: self.generate_snowflake_id()?,
|
||||||
|
},
|
||||||
|
vec![],
|
||||||
|
);
|
||||||
|
|
||||||
// Commit batch
|
// Commit batch
|
||||||
match self.store.write(batch.build()).await {
|
match self.store.write(batch.build()).await {
|
||||||
Ok(_) => (),
|
Ok(_) => (),
|
||||||
|
@ -1226,6 +1238,9 @@ impl JMAP {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Request FTS index
|
||||||
|
let _ = self.housekeeper_tx.send(Event::IndexStart).await;
|
||||||
|
|
||||||
Ok(Ok(changes))
|
Ok(Ok(changes))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,15 +27,15 @@ use jmap_proto::{
|
||||||
query::Filter,
|
query::Filter,
|
||||||
search_snippet::{GetSearchSnippetRequest, GetSearchSnippetResponse, SearchSnippet},
|
search_snippet::{GetSearchSnippetRequest, GetSearchSnippetResponse, SearchSnippet},
|
||||||
},
|
},
|
||||||
types::{acl::Acl, collection::Collection},
|
types::{acl::Acl, collection::Collection, property::Property},
|
||||||
};
|
};
|
||||||
use mail_parser::{decoders::html::html_to_text, MessageParser, PartType};
|
use mail_parser::{decoders::html::html_to_text, GetHeader, HeaderName, PartType};
|
||||||
use nlp::language::{stemmer::Stemmer, Language};
|
use nlp::language::{search_snippet::generate_snippet, stemmer::Stemmer, Language};
|
||||||
use store::BlobHash;
|
use store::backend::MAX_TOKEN_LENGTH;
|
||||||
|
|
||||||
use crate::{auth::AccessToken, JMAP};
|
use crate::{auth::AccessToken, Bincode, JMAP};
|
||||||
|
|
||||||
use super::index::MAX_MESSAGE_PARTS;
|
use super::metadata::{MessageMetadata, MetadataPartType};
|
||||||
|
|
||||||
impl JMAP {
|
impl JMAP {
|
||||||
pub async fn email_search_snippet(
|
pub async fn email_search_snippet(
|
||||||
|
@ -45,37 +45,33 @@ impl JMAP {
|
||||||
) -> Result<GetSearchSnippetResponse, MethodError> {
|
) -> Result<GetSearchSnippetResponse, MethodError> {
|
||||||
let mut filter_stack = vec![];
|
let mut filter_stack = vec![];
|
||||||
let mut include_term = true;
|
let mut include_term = true;
|
||||||
//let mut terms = vec![];
|
let mut terms = vec![];
|
||||||
let mut match_phrase = false;
|
let mut is_exact = false;
|
||||||
|
let mut language = self.config.default_language;
|
||||||
|
|
||||||
for cond in request.filter {
|
for cond in request.filter {
|
||||||
match cond {
|
match cond {
|
||||||
Filter::Text(text) | Filter::Subject(text) | Filter::Body(text) => {
|
Filter::Text(text) | Filter::Subject(text) | Filter::Body(text) => {
|
||||||
/*if include_term {
|
if include_term {
|
||||||
let (text, language) = Language::detect(text, self.config.default_language);
|
let (text, language_) =
|
||||||
|
Language::detect(text, self.config.default_language);
|
||||||
|
language = language_;
|
||||||
if (text.starts_with('"') && text.ends_with('"'))
|
if (text.starts_with('"') && text.ends_with('"'))
|
||||||
|| (text.starts_with('\'') && text.ends_with('\''))
|
|| (text.starts_with('\'') && text.ends_with('\''))
|
||||||
{
|
{
|
||||||
terms.push(
|
for token in language.tokenize_text(&text, MAX_TOKEN_LENGTH) {
|
||||||
language
|
terms.push(token.word.into_owned());
|
||||||
.tokenize_text(&text, MAX_TOKEN_LENGTH)
|
}
|
||||||
.map(|token| (token.word.into_owned(), None))
|
is_exact = true;
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
);
|
|
||||||
match_phrase = true;
|
|
||||||
} else {
|
} else {
|
||||||
terms.push(
|
for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH) {
|
||||||
Stemmer::new(&text, language, MAX_TOKEN_LENGTH)
|
terms.push(token.word.into_owned());
|
||||||
.map(|token| {
|
if let Some(stemmed_word) = token.stemmed_word {
|
||||||
(
|
terms.push(stemmed_word.into_owned());
|
||||||
token.word.into_owned(),
|
}
|
||||||
token.stemmed_word.map(|w| w.into_owned()),
|
}
|
||||||
)
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}*/
|
}
|
||||||
}
|
}
|
||||||
Filter::And | Filter::Or => {
|
Filter::And | Filter::Or => {
|
||||||
filter_stack.push(cond);
|
filter_stack.push(cond);
|
||||||
|
@ -103,150 +99,112 @@ impl JMAP {
|
||||||
not_found: vec![],
|
not_found: vec![],
|
||||||
};
|
};
|
||||||
|
|
||||||
if email_ids.len() > self.config.get_max_objects {
|
if email_ids.len() > self.config.snippet_max_results {
|
||||||
return Err(MethodError::RequestTooLarge);
|
return Err(MethodError::RequestTooLarge);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
for email_id in email_ids {
|
||||||
for email_id in email_ids {
|
let document_id = email_id.document_id();
|
||||||
let document_id = email_id.document_id();
|
let mut snippet = SearchSnippet {
|
||||||
let mut snippet = SearchSnippet {
|
email_id,
|
||||||
email_id,
|
subject: None,
|
||||||
subject: None,
|
preview: None,
|
||||||
preview: None,
|
};
|
||||||
};
|
if !document_ids.contains(document_id) {
|
||||||
if !document_ids.contains(document_id) {
|
response.not_found.push(email_id);
|
||||||
response.not_found.push(email_id);
|
continue;
|
||||||
continue;
|
} else if terms.is_empty() {
|
||||||
} else if terms.is_empty() {
|
response.list.push(snippet);
|
||||||
response.list.push(snippet);
|
continue;
|
||||||
continue;
|
}
|
||||||
}
|
let metadata = match self
|
||||||
|
.get_property::<Bincode<MessageMetadata>>(
|
||||||
|
account_id,
|
||||||
|
Collection::Email,
|
||||||
|
document_id,
|
||||||
|
&Property::BodyStructure,
|
||||||
|
)
|
||||||
|
.await?
|
||||||
|
{
|
||||||
|
Some(metadata) => metadata.inner,
|
||||||
|
None => {
|
||||||
|
response.not_found.push(email_id);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Obtain the term index and raw message
|
// Add subject snippet
|
||||||
let (term_index, raw_message) = if let (Some(term_index), Some(raw_message)) = (
|
if let Some(subject) = metadata
|
||||||
self.get_term_index::<TermIndex>(account_id, Collection::Email, document_id)
|
.contents
|
||||||
.await?,
|
.root_part()
|
||||||
self.get_blob(
|
.headers
|
||||||
&BlobHash::LinkedMaildir {
|
.header_value(&HeaderName::Subject)
|
||||||
account_id,
|
.and_then(|v| v.as_text())
|
||||||
document_id,
|
.and_then(|v| generate_snippet(v, &terms, language, is_exact))
|
||||||
},
|
{
|
||||||
0..u32::MAX,
|
snippet.subject = subject.into();
|
||||||
)
|
}
|
||||||
.await?,
|
|
||||||
) {
|
|
||||||
(term_index, raw_message)
|
|
||||||
} else {
|
|
||||||
response.not_found.push(email_id);
|
|
||||||
continue;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Parse message
|
// Check if the snippet can be generated from the preview
|
||||||
let message = if let Some(message) = MessageParser::new().parse(&raw_message) {
|
/*if let Some(body) = generate_snippet(&metadata.preview, &terms) {
|
||||||
message
|
snippet.preview = body.into();
|
||||||
} else {
|
} else {*/
|
||||||
response.not_found.push(email_id);
|
// Download message
|
||||||
continue;
|
let raw_message =
|
||||||
};
|
if let Some(raw_message) = self.get_blob(&metadata.blob_hash, 0..u32::MAX).await? {
|
||||||
|
raw_message
|
||||||
|
} else {
|
||||||
|
tracing::warn!(event = "not-found",
|
||||||
|
account_id = account_id,
|
||||||
|
collection = ?Collection::Email,
|
||||||
|
document_id = email_id.document_id(),
|
||||||
|
blob_id = ?metadata.blob_hash,
|
||||||
|
"Blob not found");
|
||||||
|
response.not_found.push(email_id);
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
|
||||||
// Build the match terms
|
// Find a matching part
|
||||||
let mut match_terms = Vec::new();
|
'outer: for part in &metadata.contents.parts {
|
||||||
for term in &terms {
|
match &part.body {
|
||||||
for (word, stemmed_word) in term {
|
MetadataPartType::Text | MetadataPartType::Html => {
|
||||||
match_terms.push(term_index.get_match_term(word, stemmed_word.as_deref()));
|
let text = match part.decode_contents(&raw_message) {
|
||||||
|
PartType::Text(text) => text,
|
||||||
|
PartType::Html(html) => html_to_text(&html).into(),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(body) = generate_snippet(&text, &terms, language, is_exact) {
|
||||||
|
snippet.preview = body.into();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
MetadataPartType::Message(message) => {
|
||||||
|
for part in &message.parts {
|
||||||
|
if let MetadataPartType::Text | MetadataPartType::Html = part.body {
|
||||||
|
let text = match part.decode_contents(&raw_message) {
|
||||||
|
PartType::Text(text) => text,
|
||||||
|
PartType::Html(html) => html_to_text(&html).into(),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
'outer: for term_group in term_index
|
if let Some(body) =
|
||||||
.match_terms(&match_terms, None, match_phrase, true, true)
|
generate_snippet(&text, &terms, language, is_exact)
|
||||||
.map_err(|err| match err {
|
{
|
||||||
term_index::Error::InvalidArgument => {
|
snippet.preview = body.into();
|
||||||
MethodError::UnsupportedFilter("Too many search terms.".to_string())
|
break 'outer;
|
||||||
}
|
|
||||||
err => {
|
|
||||||
tracing::error!(
|
|
||||||
account_id = account_id,
|
|
||||||
document_id = document_id,
|
|
||||||
reason = ?err,
|
|
||||||
"Failed to generate search snippet.");
|
|
||||||
MethodError::UnsupportedFilter(
|
|
||||||
"Failed to generate search snippet.".to_string(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
})?
|
|
||||||
.unwrap_or_default()
|
|
||||||
{
|
|
||||||
if term_group.part_id == 0 {
|
|
||||||
// Generate subject snippent
|
|
||||||
snippet.subject =
|
|
||||||
generate_snippet(&term_group.terms, message.subject().unwrap_or_default());
|
|
||||||
} else {
|
|
||||||
let mut part_num = 1;
|
|
||||||
for part in &message.parts {
|
|
||||||
match &part.body {
|
|
||||||
PartType::Text(text) => {
|
|
||||||
if part_num == term_group.part_id {
|
|
||||||
snippet.preview = generate_snippet(&term_group.terms, text);
|
|
||||||
break 'outer;
|
|
||||||
} else {
|
|
||||||
part_num += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PartType::Html(html) => {
|
|
||||||
if part_num == term_group.part_id {
|
|
||||||
snippet.preview =
|
|
||||||
generate_snippet(&term_group.terms, &html_to_text(html));
|
|
||||||
break 'outer;
|
|
||||||
} else {
|
|
||||||
part_num += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PartType::Message(message) => {
|
|
||||||
if let Some(subject) = message.subject() {
|
|
||||||
if part_num == term_group.part_id {
|
|
||||||
snippet.preview =
|
|
||||||
generate_snippet(&term_group.terms, subject);
|
|
||||||
break 'outer;
|
|
||||||
} else {
|
|
||||||
part_num += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for sub_part in message.parts.iter().take(MAX_MESSAGE_PARTS) {
|
|
||||||
match &sub_part.body {
|
|
||||||
PartType::Text(text) => {
|
|
||||||
if part_num == term_group.part_id {
|
|
||||||
snippet.preview =
|
|
||||||
generate_snippet(&term_group.terms, text);
|
|
||||||
break 'outer;
|
|
||||||
} else {
|
|
||||||
part_num += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PartType::Html(html) => {
|
|
||||||
if part_num == term_group.part_id {
|
|
||||||
snippet.preview = generate_snippet(
|
|
||||||
&term_group.terms,
|
|
||||||
&html_to_text(html),
|
|
||||||
);
|
|
||||||
break 'outer;
|
|
||||||
} else {
|
|
||||||
part_num += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
_ => (),
|
||||||
response.list.push(snippet);
|
|
||||||
}
|
}
|
||||||
*/
|
}
|
||||||
|
//}
|
||||||
|
|
||||||
|
response.list.push(snippet);
|
||||||
|
}
|
||||||
|
|
||||||
Ok(response)
|
Ok(response)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
* for more details.
|
* for more details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::{collections::hash_map::RandomState, sync::Arc, time::Duration};
|
use std::{collections::hash_map::RandomState, fmt::Display, sync::Arc, time::Duration};
|
||||||
|
|
||||||
use ::sieve::{Compiler, Runtime};
|
use ::sieve::{Compiler, Runtime};
|
||||||
use api::session::BaseCapabilities;
|
use api::session::BaseCapabilities;
|
||||||
|
@ -49,17 +49,23 @@ use services::{
|
||||||
use smtp::core::SMTP;
|
use smtp::core::SMTP;
|
||||||
use store::{
|
use store::{
|
||||||
backend::{fs::FsStore, sqlite::SqliteStore},
|
backend::{fs::FsStore, sqlite::SqliteStore},
|
||||||
|
fts::FtsFilter,
|
||||||
parking_lot::Mutex,
|
parking_lot::Mutex,
|
||||||
query::{sort::Pagination, Comparator, Filter, ResultSet, SortedResultSet},
|
query::{sort::Pagination, Comparator, Filter, ResultSet, SortedResultSet},
|
||||||
roaring::RoaringBitmap,
|
roaring::RoaringBitmap,
|
||||||
write::{key::KeySerializer, BatchBuilder, BitmapClass, TagValue, ToBitmaps, ValueClass},
|
write::{
|
||||||
BitmapKey, BlobStore, Deserialize, Key, Serialize, Store, ValueKey, SUBSPACE_VALUES,
|
key::{DeserializeBigEndian, KeySerializer},
|
||||||
|
BatchBuilder, BitmapClass, TagValue, ToBitmaps, ValueClass,
|
||||||
|
},
|
||||||
|
BitmapKey, BlobStore, Deserialize, FtsStore, Key, Serialize, Store, ValueKey, SUBSPACE_VALUES,
|
||||||
|
U32_LEN, U64_LEN,
|
||||||
};
|
};
|
||||||
use tokio::sync::mpsc;
|
use tokio::sync::mpsc;
|
||||||
use utils::{
|
use utils::{
|
||||||
config::Rate,
|
config::Rate,
|
||||||
ipc::DeliveryEvent,
|
ipc::DeliveryEvent,
|
||||||
map::ttl_dashmap::{TtlDashMap, TtlMap},
|
map::ttl_dashmap::{TtlDashMap, TtlMap},
|
||||||
|
snowflake::SnowflakeIdGenerator,
|
||||||
UnwrapFailure,
|
UnwrapFailure,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -85,11 +91,13 @@ pub const LONG_SLUMBER: Duration = Duration::from_secs(60 * 60 * 24);
|
||||||
pub struct JMAP {
|
pub struct JMAP {
|
||||||
pub store: Store,
|
pub store: Store,
|
||||||
pub blob_store: BlobStore,
|
pub blob_store: BlobStore,
|
||||||
|
pub fts_store: FtsStore,
|
||||||
pub config: Config,
|
pub config: Config,
|
||||||
pub directory: Arc<dyn Directory>,
|
pub directory: Arc<dyn Directory>,
|
||||||
|
|
||||||
pub sessions: TtlDashMap<String, u32>,
|
pub sessions: TtlDashMap<String, u32>,
|
||||||
pub access_tokens: TtlDashMap<u32, Arc<AccessToken>>,
|
pub access_tokens: TtlDashMap<u32, Arc<AccessToken>>,
|
||||||
|
pub snowflake_id: SnowflakeIdGenerator,
|
||||||
|
|
||||||
pub rate_limit_auth: DashMap<u32, Arc<Mutex<AuthenticatedLimiter>>>,
|
pub rate_limit_auth: DashMap<u32, Arc<Mutex<AuthenticatedLimiter>>>,
|
||||||
pub rate_limit_unauth: DashMap<RemoteAddress, Arc<Mutex<AnonymousLimiter>>>,
|
pub rate_limit_unauth: DashMap<RemoteAddress, Arc<Mutex<AnonymousLimiter>>>,
|
||||||
|
@ -108,6 +116,7 @@ pub struct Config {
|
||||||
pub default_language: Language,
|
pub default_language: Language,
|
||||||
pub query_max_results: usize,
|
pub query_max_results: usize,
|
||||||
pub changes_max_results: usize,
|
pub changes_max_results: usize,
|
||||||
|
pub snippet_max_results: usize,
|
||||||
|
|
||||||
pub request_max_size: usize,
|
pub request_max_size: usize,
|
||||||
pub request_max_calls: usize,
|
pub request_max_calls: usize,
|
||||||
|
@ -187,6 +196,11 @@ impl JMAP {
|
||||||
.property::<u64>("global.shared-map.shard")?
|
.property::<u64>("global.shared-map.shard")?
|
||||||
.unwrap_or(32)
|
.unwrap_or(32)
|
||||||
.next_power_of_two() as usize;
|
.next_power_of_two() as usize;
|
||||||
|
let store = Store::SQLite(Arc::new(
|
||||||
|
SqliteStore::open(config)
|
||||||
|
.await
|
||||||
|
.failed("Unable to open database"),
|
||||||
|
));
|
||||||
|
|
||||||
let jmap_server = Arc::new(JMAP {
|
let jmap_server = Arc::new(JMAP {
|
||||||
directory: directory_config
|
directory: directory_config
|
||||||
|
@ -197,11 +211,12 @@ impl JMAP {
|
||||||
config.value_require("jmap.directory")?
|
config.value_require("jmap.directory")?
|
||||||
))
|
))
|
||||||
.clone(),
|
.clone(),
|
||||||
store: Store::SQLite(Arc::new(
|
snowflake_id: config
|
||||||
SqliteStore::open(config)
|
.property::<u64>("global.node-id")?
|
||||||
.await
|
.map(SnowflakeIdGenerator::with_node_id)
|
||||||
.failed("Unable to open database"),
|
.unwrap_or_else(SnowflakeIdGenerator::new),
|
||||||
)),
|
fts_store: FtsStore::Store(store.clone()),
|
||||||
|
store,
|
||||||
blob_store: BlobStore::Fs(Arc::new(
|
blob_store: BlobStore::Fs(Arc::new(
|
||||||
FsStore::open(config)
|
FsStore::open(config)
|
||||||
.await
|
.await
|
||||||
|
@ -618,7 +633,28 @@ impl JMAP {
|
||||||
.await
|
.await
|
||||||
.map_err(|err| {
|
.map_err(|err| {
|
||||||
tracing::error!(event = "error",
|
tracing::error!(event = "error",
|
||||||
context = "mailbox_set",
|
context = "filter",
|
||||||
|
account_id = account_id,
|
||||||
|
collection = ?collection,
|
||||||
|
error = ?err,
|
||||||
|
"Failed to execute filter.");
|
||||||
|
|
||||||
|
MethodError::ServerPartialFail
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn fts_filter<T: Into<u8> + Display + Clone + std::fmt::Debug>(
|
||||||
|
&self,
|
||||||
|
account_id: u32,
|
||||||
|
collection: Collection,
|
||||||
|
filters: Vec<FtsFilter<T>>,
|
||||||
|
) -> Result<RoaringBitmap, MethodError> {
|
||||||
|
self.fts_store
|
||||||
|
.query(account_id, collection, filters)
|
||||||
|
.await
|
||||||
|
.map_err(|err| {
|
||||||
|
tracing::error!(event = "error",
|
||||||
|
context = "fts-filter",
|
||||||
account_id = account_id,
|
account_id = account_id,
|
||||||
collection = ?collection,
|
collection = ?collection,
|
||||||
error = ?err,
|
error = ?err,
|
||||||
|
@ -805,6 +841,11 @@ pub enum NamedKey<T: AsRef<[u8]>> {
|
||||||
Name(T),
|
Name(T),
|
||||||
Id(u32),
|
Id(u32),
|
||||||
Quota(u32),
|
Quota(u32),
|
||||||
|
IndexEmail {
|
||||||
|
account_id: u32,
|
||||||
|
document_id: u32,
|
||||||
|
seq: u64,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: AsRef<[u8]>> From<&NamedKey<T>> for ValueClass {
|
impl<T: AsRef<[u8]>> From<&NamedKey<T>> for ValueClass {
|
||||||
|
@ -817,21 +858,44 @@ impl<T: AsRef<[u8]>> From<&NamedKey<T>> for ValueClass {
|
||||||
.finalize(),
|
.finalize(),
|
||||||
),
|
),
|
||||||
NamedKey::Id(id) => ValueClass::Named(
|
NamedKey::Id(id) => ValueClass::Named(
|
||||||
KeySerializer::new(std::mem::size_of::<u32>())
|
KeySerializer::new(std::mem::size_of::<u32>() + 1)
|
||||||
.write(1u8)
|
.write(1u8)
|
||||||
.write_leb128(*id)
|
.write_leb128(*id)
|
||||||
.finalize(),
|
.finalize(),
|
||||||
),
|
),
|
||||||
NamedKey::Quota(id) => ValueClass::Named(
|
NamedKey::Quota(id) => ValueClass::Named(
|
||||||
KeySerializer::new(std::mem::size_of::<u32>())
|
KeySerializer::new(std::mem::size_of::<u32>() + 1)
|
||||||
.write(2u8)
|
.write(2u8)
|
||||||
.write_leb128(*id)
|
.write_leb128(*id)
|
||||||
.finalize(),
|
.finalize(),
|
||||||
),
|
),
|
||||||
|
NamedKey::IndexEmail {
|
||||||
|
account_id,
|
||||||
|
document_id,
|
||||||
|
seq,
|
||||||
|
} => ValueClass::Named(
|
||||||
|
KeySerializer::new(std::mem::size_of::<u32>() * 4 + 1)
|
||||||
|
.write(3u8)
|
||||||
|
.write(*seq)
|
||||||
|
.write(*account_id)
|
||||||
|
.write(*document_id)
|
||||||
|
.finalize(),
|
||||||
|
),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<T: AsRef<[u8]>> NamedKey<T> {
|
||||||
|
pub fn deserialize_index_email(bytes: &[u8]) -> store::Result<Self> {
|
||||||
|
let len = bytes.len();
|
||||||
|
Ok(NamedKey::IndexEmail {
|
||||||
|
seq: bytes.deserialize_be_u64(len - U64_LEN - (U32_LEN * 2))?,
|
||||||
|
account_id: bytes.deserialize_be_u32(len - U32_LEN * 2)?,
|
||||||
|
document_id: bytes.deserialize_be_u32(len - U32_LEN)?,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<T: AsRef<[u8]>> From<NamedKey<T>> for ValueClass {
|
impl<T: AsRef<[u8]>> From<NamedKey<T>> for ValueClass {
|
||||||
fn from(key: NamedKey<T>) -> Self {
|
fn from(key: NamedKey<T>) -> Self {
|
||||||
(&key).into()
|
(&key).into()
|
||||||
|
|
|
@ -36,43 +36,73 @@ use super::IPC_CHANNEL_BUFFER;
|
||||||
|
|
||||||
pub enum Event {
|
pub enum Event {
|
||||||
PurgeDb,
|
PurgeDb,
|
||||||
PurgeBlobs,
|
|
||||||
PurgeSessions,
|
PurgeSessions,
|
||||||
|
IndexStart,
|
||||||
|
IndexDone,
|
||||||
|
#[cfg(feature = "test_mode")]
|
||||||
|
IndexIsActive(tokio::sync::oneshot::Sender<bool>),
|
||||||
Exit,
|
Exit,
|
||||||
}
|
}
|
||||||
|
|
||||||
const TASK_PURGE_DB: usize = 0;
|
const TASK_PURGE_DB: usize = 0;
|
||||||
const TASK_PURGE_BLOBS: usize = 1;
|
const TASK_PURGE_SESSIONS: usize = 1;
|
||||||
const TASK_PURGE_SESSIONS: usize = 2;
|
|
||||||
|
|
||||||
pub fn spawn_housekeeper(core: Arc<JMAP>, settings: &Config, mut rx: mpsc::Receiver<Event>) {
|
pub fn spawn_housekeeper(core: Arc<JMAP>, settings: &Config, mut rx: mpsc::Receiver<Event>) {
|
||||||
let purge_db_at = settings
|
let purge_db_at = settings
|
||||||
.property_or_static::<SimpleCron>("jmap.purge.schedule.db", "0 3 *")
|
.property_or_static::<SimpleCron>("jmap.purge.schedule.db", "0 3 *")
|
||||||
.failed("Initialize housekeeper");
|
.failed("Initialize housekeeper");
|
||||||
let purge_blobs_at = settings
|
|
||||||
.property_or_static::<SimpleCron>("jmap.purge.schedule.blobs", "30 3 *")
|
|
||||||
.failed("Initialize housekeeper");
|
|
||||||
let purge_cache = settings
|
let purge_cache = settings
|
||||||
.property_or_static::<SimpleCron>("jmap.purge.schedule.sessions", "15 * *")
|
.property_or_static::<SimpleCron>("jmap.purge.schedule.sessions", "15 * *")
|
||||||
.failed("Initialize housekeeper");
|
.failed("Initialize housekeeper");
|
||||||
|
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
tracing::debug!("Housekeeper task started.");
|
tracing::debug!("Housekeeper task started.");
|
||||||
|
|
||||||
|
let mut index_busy = true;
|
||||||
|
let mut index_pending = false;
|
||||||
|
|
||||||
|
// Index any queued messages
|
||||||
|
let core_ = core.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
core_.fts_index_queued().await;
|
||||||
|
});
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let time_to_next = [
|
let time_to_next = [purge_db_at.time_to_next(), purge_cache.time_to_next()];
|
||||||
purge_db_at.time_to_next(),
|
let mut tasks_to_run = [false, false];
|
||||||
purge_blobs_at.time_to_next(),
|
|
||||||
purge_cache.time_to_next(),
|
|
||||||
];
|
|
||||||
let mut tasks_to_run = [false, false, false];
|
|
||||||
let start_time = Instant::now();
|
let start_time = Instant::now();
|
||||||
|
|
||||||
match tokio::time::timeout(time_to_next.iter().min().copied().unwrap(), rx.recv()).await
|
match tokio::time::timeout(time_to_next.iter().min().copied().unwrap(), rx.recv()).await
|
||||||
{
|
{
|
||||||
Ok(Some(event)) => match event {
|
Ok(Some(event)) => match event {
|
||||||
Event::PurgeDb => tasks_to_run[TASK_PURGE_DB] = true,
|
Event::PurgeDb => tasks_to_run[TASK_PURGE_DB] = true,
|
||||||
Event::PurgeBlobs => tasks_to_run[TASK_PURGE_BLOBS] = true,
|
|
||||||
Event::PurgeSessions => tasks_to_run[TASK_PURGE_SESSIONS] = true,
|
Event::PurgeSessions => tasks_to_run[TASK_PURGE_SESSIONS] = true,
|
||||||
|
Event::IndexStart => {
|
||||||
|
if !index_busy {
|
||||||
|
index_busy = true;
|
||||||
|
let core = core.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
core.fts_index_queued().await;
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
index_pending = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::IndexDone => {
|
||||||
|
if index_pending {
|
||||||
|
index_pending = false;
|
||||||
|
let core = core.clone();
|
||||||
|
tokio::spawn(async move {
|
||||||
|
core.fts_index_queued().await;
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
index_busy = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[cfg(feature = "test_mode")]
|
||||||
|
Event::IndexIsActive(tx) => {
|
||||||
|
tx.send(index_busy).ok();
|
||||||
|
}
|
||||||
Event::Exit => {
|
Event::Exit => {
|
||||||
tracing::debug!("Housekeeper task exiting.");
|
tracing::debug!("Housekeeper task exiting.");
|
||||||
return;
|
return;
|
||||||
|
@ -104,13 +134,12 @@ pub fn spawn_housekeeper(core: Arc<JMAP>, settings: &Config, mut rx: mpsc::Recei
|
||||||
tokio::spawn(async move {
|
tokio::spawn(async move {
|
||||||
match task_id {
|
match task_id {
|
||||||
TASK_PURGE_DB => {
|
TASK_PURGE_DB => {
|
||||||
tracing::info!("Purging database.");
|
tracing::info!("Purging database...");
|
||||||
if let Err(err) = core.store.purge_bitmaps().await {
|
if let Err(err) = core.store.purge_bitmaps().await {
|
||||||
tracing::error!("Error while purging bitmaps: {}", err);
|
tracing::error!("Error while purging bitmaps: {}", err);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
TASK_PURGE_BLOBS => {
|
tracing::info!("Purging blobs...",);
|
||||||
tracing::info!("Purging temporary blobs.",);
|
|
||||||
if let Err(err) =
|
if let Err(err) =
|
||||||
core.store.blob_hash_purge(core.blob_store.clone()).await
|
core.store.blob_hash_purge(core.blob_store.clone()).await
|
||||||
{
|
{
|
||||||
|
|
224
crates/jmap/src/services/index.rs
Normal file
224
crates/jmap/src/services/index.rs
Normal file
|
@ -0,0 +1,224 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023 Stalwart Labs Ltd.
|
||||||
|
*
|
||||||
|
* This file is part of Stalwart Mail Server.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of
|
||||||
|
* the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
* in the LICENSE file at the top-level directory of this distribution.
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* You can be released from the requirements of the AGPLv3 license by
|
||||||
|
* purchasing a commercial license. Please contact licensing@stalw.art
|
||||||
|
* for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use jmap_proto::types::{collection::Collection, property::Property};
|
||||||
|
use store::{
|
||||||
|
fts::index::FtsDocument,
|
||||||
|
write::{BatchBuilder, ValueClass},
|
||||||
|
IterateParams, ValueKey,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
email::{index::IndexMessageText, metadata::MessageMetadata},
|
||||||
|
Bincode, NamedKey, JMAP,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::housekeeper::Event;
|
||||||
|
|
||||||
|
impl JMAP {
|
||||||
|
pub async fn fts_index_queued(&self) {
|
||||||
|
let from_key = ValueKey::<ValueClass> {
|
||||||
|
account_id: 0,
|
||||||
|
collection: 0,
|
||||||
|
document_id: 0,
|
||||||
|
class: NamedKey::IndexEmail::<&[u8]> {
|
||||||
|
account_id: 0,
|
||||||
|
document_id: 0,
|
||||||
|
seq: 0,
|
||||||
|
}
|
||||||
|
.into(),
|
||||||
|
};
|
||||||
|
let to_key = ValueKey::<ValueClass> {
|
||||||
|
account_id: u32::MAX,
|
||||||
|
collection: u8::MAX,
|
||||||
|
document_id: u32::MAX,
|
||||||
|
class: NamedKey::IndexEmail::<&[u8]> {
|
||||||
|
account_id: u32::MAX,
|
||||||
|
document_id: u32::MAX,
|
||||||
|
seq: u64::MAX,
|
||||||
|
}
|
||||||
|
.into(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// Retrieve entries pending to be indexed
|
||||||
|
// TODO: Support indexing from multiple nodes
|
||||||
|
let mut entries = Vec::new();
|
||||||
|
let _ = self
|
||||||
|
.store
|
||||||
|
.iterate(
|
||||||
|
IterateParams::new(from_key, to_key).ascending(),
|
||||||
|
|key, value| {
|
||||||
|
entries.push((
|
||||||
|
NamedKey::<Vec<u8>>::deserialize_index_email(key)?,
|
||||||
|
value.to_vec(),
|
||||||
|
));
|
||||||
|
Ok(true)
|
||||||
|
},
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.map_err(|err| {
|
||||||
|
tracing::error!(
|
||||||
|
context = "fts_index_queued",
|
||||||
|
event = "error",
|
||||||
|
reason = ?err,
|
||||||
|
"Failed to iterate over index emails"
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Index entries
|
||||||
|
for (key, blob_hash) in entries {
|
||||||
|
if let NamedKey::IndexEmail {
|
||||||
|
account_id,
|
||||||
|
document_id,
|
||||||
|
..
|
||||||
|
} = &key
|
||||||
|
{
|
||||||
|
if !blob_hash.is_empty() {
|
||||||
|
match self
|
||||||
|
.get_property::<Bincode<MessageMetadata>>(
|
||||||
|
*account_id,
|
||||||
|
Collection::Email,
|
||||||
|
*document_id,
|
||||||
|
Property::BodyStructure,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(Some(metadata))
|
||||||
|
if metadata.inner.blob_hash.as_slice() == blob_hash.as_slice() =>
|
||||||
|
{
|
||||||
|
// Obtain raw message
|
||||||
|
let raw_message = if let Ok(Some(raw_message)) =
|
||||||
|
self.get_blob(&metadata.inner.blob_hash, 0..u32::MAX).await
|
||||||
|
{
|
||||||
|
raw_message
|
||||||
|
} else {
|
||||||
|
tracing::warn!(
|
||||||
|
context = "fts_index_queued",
|
||||||
|
event = "error",
|
||||||
|
account_id = *account_id,
|
||||||
|
document_id = *document_id,
|
||||||
|
blob_hash = ?metadata.inner.blob_hash,
|
||||||
|
"Message blob not found"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
};
|
||||||
|
let message = metadata.inner.contents.into_message(&raw_message);
|
||||||
|
|
||||||
|
// Index message
|
||||||
|
let document =
|
||||||
|
FtsDocument::with_default_language(self.config.default_language)
|
||||||
|
.with_account_id(*account_id)
|
||||||
|
.with_collection(Collection::Email)
|
||||||
|
.with_document_id(*document_id)
|
||||||
|
.index_message(&message);
|
||||||
|
if let Err(err) = self.fts_store.index(document).await {
|
||||||
|
tracing::error!(
|
||||||
|
context = "fts_index_queued",
|
||||||
|
event = "error",
|
||||||
|
account_id = *account_id,
|
||||||
|
document_id = *document_id,
|
||||||
|
reason = ?err,
|
||||||
|
"Failed to index email in FTS index"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::debug!(
|
||||||
|
context = "fts_index_queued",
|
||||||
|
event = "index",
|
||||||
|
account_id = *account_id,
|
||||||
|
document_id = *document_id,
|
||||||
|
"Indexed document in FTS index"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(err) => {
|
||||||
|
tracing::error!(
|
||||||
|
context = "fts_index_queued",
|
||||||
|
event = "error",
|
||||||
|
account_id = *account_id,
|
||||||
|
document_id = *document_id,
|
||||||
|
reason = ?err,
|
||||||
|
"Failed to retrieve email metadata"
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
// The message was probably deleted or overwritten
|
||||||
|
tracing::debug!(
|
||||||
|
context = "fts_index_queued",
|
||||||
|
event = "error",
|
||||||
|
account_id = *account_id,
|
||||||
|
document_id = *document_id,
|
||||||
|
"Email metadata not found"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if let Err(err) = self
|
||||||
|
.fts_store
|
||||||
|
.remove(*account_id, Collection::Email.into(), *document_id)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
tracing::error!(
|
||||||
|
context = "fts_index_queued",
|
||||||
|
event = "error",
|
||||||
|
account_id = *account_id,
|
||||||
|
document_id = *document_id,
|
||||||
|
reason = ?err,
|
||||||
|
"Failed to remove document from FTS index"
|
||||||
|
);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::debug!(
|
||||||
|
context = "fts_index_queued",
|
||||||
|
event = "delete",
|
||||||
|
account_id = *account_id,
|
||||||
|
document_id = *document_id,
|
||||||
|
"Deleted document from FTS index"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove entry from queue
|
||||||
|
if let Err(err) = self
|
||||||
|
.store
|
||||||
|
.write(BatchBuilder::new().clear(key).build_batch())
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
tracing::error!(
|
||||||
|
context = "fts_index_queued",
|
||||||
|
event = "error",
|
||||||
|
reason = ?err,
|
||||||
|
"Failed to remove index email from queue"
|
||||||
|
);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Err(err) = self.housekeeper_tx.send(Event::IndexDone).await {
|
||||||
|
tracing::warn!("Failed to send index done event to housekeeper: {}", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,6 +23,7 @@
|
||||||
|
|
||||||
pub mod delivery;
|
pub mod delivery;
|
||||||
pub mod housekeeper;
|
pub mod housekeeper;
|
||||||
|
pub mod index;
|
||||||
pub mod ingest;
|
pub mod ingest;
|
||||||
pub mod state;
|
pub mod state;
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
pub mod detect;
|
pub mod detect;
|
||||||
|
pub mod search_snippet;
|
||||||
pub mod stemmer;
|
pub mod stemmer;
|
||||||
pub mod stopwords;
|
pub mod stopwords;
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@
|
||||||
* for more details.
|
* for more details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use super::term_index::Term;
|
use super::Language;
|
||||||
|
|
||||||
fn escape_char(c: char, string: &mut String) {
|
fn escape_char(c: char, string: &mut String) {
|
||||||
match c {
|
match c {
|
||||||
|
@ -45,9 +45,53 @@ fn escape_char_len(c: char) -> usize {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn generate_snippet(terms: &[Term], text: &str) -> Option<String> {
|
pub struct Term {
|
||||||
|
offset: usize,
|
||||||
|
len: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate_snippet(
|
||||||
|
text: &str,
|
||||||
|
needles: &[impl AsRef<str>],
|
||||||
|
language: Language,
|
||||||
|
is_exact: bool,
|
||||||
|
) -> Option<String> {
|
||||||
|
let mut terms = Vec::new();
|
||||||
|
if is_exact {
|
||||||
|
let tokens = language.tokenize_text(text, 200).collect::<Vec<_>>();
|
||||||
|
for tokens in tokens.windows(needles.len()) {
|
||||||
|
if needles
|
||||||
|
.iter()
|
||||||
|
.zip(tokens)
|
||||||
|
.all(|(needle, token)| needle.as_ref() == token.word.as_ref())
|
||||||
|
{
|
||||||
|
for token in tokens {
|
||||||
|
terms.push(Term {
|
||||||
|
offset: token.from,
|
||||||
|
len: token.to - token.from,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for token in language.tokenize_text(text, 200) {
|
||||||
|
if needles.iter().any(|needle| {
|
||||||
|
let needle = needle.as_ref();
|
||||||
|
needle == token.word.as_ref() || needle.len() > 2 && token.word.contains(needle)
|
||||||
|
}) {
|
||||||
|
terms.push(Term {
|
||||||
|
offset: token.from,
|
||||||
|
len: token.to - token.from,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if terms.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
let mut snippet = String::with_capacity(text.len());
|
let mut snippet = String::with_capacity(text.len());
|
||||||
let start_offset = terms.get(0)?.offset as usize;
|
let start_offset = terms.get(0)?.offset;
|
||||||
|
|
||||||
if start_offset > 0 {
|
if start_offset > 0 {
|
||||||
let mut word_count = 0;
|
let mut word_count = 0;
|
||||||
|
@ -92,25 +136,22 @@ pub fn generate_snippet(terms: &[Term], text: &str) -> Option<String> {
|
||||||
let mut terms = terms.iter().peekable();
|
let mut terms = terms.iter().peekable();
|
||||||
|
|
||||||
'outer: while let Some(term) = terms.next() {
|
'outer: while let Some(term) = terms.next() {
|
||||||
if snippet.len() + ("<mark>".len() * 2) + term.len as usize + 1 > 255 {
|
if snippet.len() + ("<mark>".len() * 2) + term.len + 1 > 255 {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
snippet.push_str("<mark>");
|
snippet.push_str("<mark>");
|
||||||
snippet.push_str(text.get(term.offset as usize..term.offset as usize + term.len as usize)?);
|
snippet.push_str(text.get(term.offset..term.offset + term.len)?);
|
||||||
snippet.push_str("</mark>");
|
snippet.push_str("</mark>");
|
||||||
|
|
||||||
let next_offset = if let Some(next_term) = terms.peek() {
|
let next_offset = if let Some(next_term) = terms.peek() {
|
||||||
next_term.offset as usize
|
next_term.offset
|
||||||
} else {
|
} else {
|
||||||
text.len()
|
text.len()
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut last_is_space = false;
|
let mut last_is_space = false;
|
||||||
for char in text
|
for char in text.get(term.offset + term.len..next_offset)?.chars() {
|
||||||
.get(term.offset as usize + term.len as usize..next_offset)?
|
|
||||||
.chars()
|
|
||||||
{
|
|
||||||
if !char.is_whitespace() {
|
if !char.is_whitespace() {
|
||||||
last_is_space = false;
|
last_is_space = false;
|
||||||
} else {
|
} else {
|
||||||
|
@ -133,15 +174,7 @@ pub fn generate_snippet(terms: &[Term], text: &str) -> Option<String> {
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
use crate::language::{search_snippet::generate_snippet, Language};
|
||||||
use nlp::language::Language;
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
fts::term_index::{TermIndex, TermIndexBuilder},
|
|
||||||
Deserialize, Serialize,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn search_snippets() {
|
fn search_snippets() {
|
||||||
|
@ -236,39 +269,18 @@ mod tests {
|
||||||
];
|
];
|
||||||
|
|
||||||
for (parts, tests) in inputs {
|
for (parts, tests) in inputs {
|
||||||
let mut builder = TermIndexBuilder::new();
|
for (needles, snippets) in tests {
|
||||||
|
let mut results = Vec::new();
|
||||||
|
|
||||||
for (field_num, part) in parts.iter().enumerate() {
|
for part in &parts {
|
||||||
let mut terms = Vec::new();
|
if let Some(matched) =
|
||||||
for token in Language::English.tokenize_text(part, 40) {
|
generate_snippet(part, &needles, Language::English, false)
|
||||||
terms.push(builder.add_token(token));
|
{
|
||||||
}
|
results.push(matched);
|
||||||
builder.add_terms(field_num as u8, 0, terms);
|
}
|
||||||
}
|
|
||||||
|
|
||||||
let compressed_term_index = builder.serialize();
|
|
||||||
let term_index = TermIndex::deserialize(&compressed_term_index[..]).unwrap();
|
|
||||||
|
|
||||||
for (match_words, snippets) in tests {
|
|
||||||
let mut match_terms = Vec::new();
|
|
||||||
for word in &match_words {
|
|
||||||
match_terms.push(term_index.get_match_term(word, None));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let term_groups = term_index
|
assert_eq!(snippets, results);
|
||||||
.match_terms(&match_terms, None, false, true, true)
|
|
||||||
.unwrap()
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
assert_eq!(term_groups.len(), snippets.len());
|
|
||||||
|
|
||||||
for (term_group, snippet) in term_groups.iter().zip(snippets.iter()) {
|
|
||||||
assert_eq!(
|
|
||||||
snippet,
|
|
||||||
&generate_snippet(&term_group.terms, parts[term_group.field_id as usize])
|
|
||||||
.unwrap()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -141,6 +141,7 @@ pub static STEMMER_MAP: &[Option<Algorithm>] = &[
|
||||||
None, // Tagalog = 67,
|
None, // Tagalog = 67,
|
||||||
None, // Armenian = 68,
|
None, // Armenian = 68,
|
||||||
None, // Unknown = 69,
|
None, // Unknown = 69,
|
||||||
|
None, // None = 70,
|
||||||
];
|
];
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
@ -93,6 +93,7 @@ pub static STOP_WORDS: &[Option<&Set<&'static str>>] = &[
|
||||||
None, // Tagalog = 67,
|
None, // Tagalog = 67,
|
||||||
None, // Armenian = 68,
|
None, // Armenian = 68,
|
||||||
None, // Unknown = 69,
|
None, // Unknown = 69,
|
||||||
|
None, // None = 70,
|
||||||
];
|
];
|
||||||
|
|
||||||
static ARABIC: Set<&'static str> = phf_set! {
|
static ARABIC: Set<&'static str> = phf_set! {
|
||||||
|
|
|
@ -30,6 +30,7 @@ num_cpus = { version = "1.15.0", optional = true }
|
||||||
blake3 = "1.3.3"
|
blake3 = "1.3.3"
|
||||||
tracing = "0.1"
|
tracing = "0.1"
|
||||||
async-trait = "0.1.68"
|
async-trait = "0.1.68"
|
||||||
|
lz4_flex = { version = "0.11" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tokio = { version = "1.23", features = ["full"] }
|
tokio = { version = "1.23", features = ["full"] }
|
||||||
|
|
44
crates/store/src/backend/foundationdb/blob.rs
Normal file
44
crates/store/src/backend/foundationdb/blob.rs
Normal file
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023 Stalwart Labs Ltd.
|
||||||
|
*
|
||||||
|
* This file is part of the Stalwart Mail Server.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of
|
||||||
|
* the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
* in the LICENSE file at the top-level directory of this distribution.
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* You can be released from the requirements of the AGPLv3 license by
|
||||||
|
* purchasing a commercial license. Please contact licensing@stalw.art
|
||||||
|
* for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use super::FdbStore;
|
||||||
|
|
||||||
|
impl FdbStore {
|
||||||
|
pub(crate) async fn get_blob(
|
||||||
|
&self,
|
||||||
|
key: &[u8],
|
||||||
|
range: Range<u32>,
|
||||||
|
) -> crate::Result<Option<Vec<u8>>> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn put_blob(&self, key: &[u8], data: &[u8]) -> crate::Result<()> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn delete_blob(&self, key: &[u8]) -> crate::Result<bool> {
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
}
|
|
@ -28,10 +28,7 @@ use futures::StreamExt;
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use crate::{
|
use crate::{write::now, BitmapKey, IndexKey};
|
||||||
write::{key::KeySerializer, now},
|
|
||||||
BitmapKey, IndexKey, SUBSPACE_VALUES,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
bitmap::{next_available_index, BITS_PER_BLOCK},
|
bitmap::{next_available_index, BITS_PER_BLOCK},
|
||||||
|
@ -183,36 +180,4 @@ impl FdbStore {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
|
|
||||||
let start = Instant::now();
|
|
||||||
let counter = KeySerializer::new(U32_LEN + 2)
|
|
||||||
.write(SUBSPACE_VALUES)
|
|
||||||
.write(account_id)
|
|
||||||
.finalize();
|
|
||||||
|
|
||||||
loop {
|
|
||||||
// Read id
|
|
||||||
let trx = self.db.create_trx()?;
|
|
||||||
let id = if let Some(bytes) = trx.get(&counter, false).await? {
|
|
||||||
u64::deserialize(&bytes)? + 1
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
};
|
|
||||||
trx.set(&counter, &id.serialize());
|
|
||||||
|
|
||||||
match trx.commit().await {
|
|
||||||
Ok(_) => {
|
|
||||||
return Ok(id);
|
|
||||||
}
|
|
||||||
Err(err) => {
|
|
||||||
if start.elapsed() < MAX_COMMIT_TIME {
|
|
||||||
err.on_error().await?;
|
|
||||||
} else {
|
|
||||||
return Err(FdbError::from(err).into());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@ use foundationdb::{api::NetworkAutoStop, Database, FdbError};
|
||||||
use crate::Error;
|
use crate::Error;
|
||||||
|
|
||||||
pub mod bitmap;
|
pub mod bitmap;
|
||||||
|
pub mod blob;
|
||||||
pub mod id_assign;
|
pub mod id_assign;
|
||||||
pub mod main;
|
pub mod main;
|
||||||
pub mod purge;
|
pub mod purge;
|
||||||
|
|
|
@ -95,7 +95,7 @@ impl FdbStore {
|
||||||
account_id: u32,
|
account_id: u32,
|
||||||
collection: u8,
|
collection: u8,
|
||||||
field: u8,
|
field: u8,
|
||||||
value: Vec<u8>,
|
value: &[u8],
|
||||||
op: query::Operator,
|
op: query::Operator,
|
||||||
) -> crate::Result<Option<RoaringBitmap>> {
|
) -> crate::Result<Option<RoaringBitmap>> {
|
||||||
let k1 =
|
let k1 =
|
||||||
|
@ -116,27 +116,23 @@ impl FdbStore {
|
||||||
let (begin, end) = match op {
|
let (begin, end) = match op {
|
||||||
Operator::LowerThan => (
|
Operator::LowerThan => (
|
||||||
KeySelector::first_greater_or_equal(k1.finalize()),
|
KeySelector::first_greater_or_equal(k1.finalize()),
|
||||||
KeySelector::first_greater_or_equal(k2.write(&value[..]).write(0u32).finalize()),
|
KeySelector::first_greater_or_equal(k2.write(value).write(0u32).finalize()),
|
||||||
),
|
),
|
||||||
Operator::LowerEqualThan => (
|
Operator::LowerEqualThan => (
|
||||||
KeySelector::first_greater_or_equal(k1.finalize()),
|
KeySelector::first_greater_or_equal(k1.finalize()),
|
||||||
KeySelector::first_greater_or_equal(
|
KeySelector::first_greater_or_equal(k2.write(value).write(u32::MAX).finalize()),
|
||||||
k2.write(&value[..]).write(u32::MAX).finalize(),
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
Operator::GreaterThan => (
|
Operator::GreaterThan => (
|
||||||
KeySelector::first_greater_than(k1.write(&value[..]).write(u32::MAX).finalize()),
|
KeySelector::first_greater_than(k1.write(value).write(u32::MAX).finalize()),
|
||||||
KeySelector::first_greater_or_equal(k2.finalize()),
|
KeySelector::first_greater_or_equal(k2.finalize()),
|
||||||
),
|
),
|
||||||
Operator::GreaterEqualThan => (
|
Operator::GreaterEqualThan => (
|
||||||
KeySelector::first_greater_or_equal(k1.write(&value[..]).write(0u32).finalize()),
|
KeySelector::first_greater_or_equal(k1.write(value).write(0u32).finalize()),
|
||||||
KeySelector::first_greater_or_equal(k2.finalize()),
|
KeySelector::first_greater_or_equal(k2.finalize()),
|
||||||
),
|
),
|
||||||
Operator::Equal => (
|
Operator::Equal => (
|
||||||
KeySelector::first_greater_or_equal(k1.write(&value[..]).write(0u32).finalize()),
|
KeySelector::first_greater_or_equal(k1.write(value).write(0u32).finalize()),
|
||||||
KeySelector::first_greater_or_equal(
|
KeySelector::first_greater_or_equal(k2.write(value).write(u32::MAX).finalize()),
|
||||||
k2.write(&value[..]).write(u32::MAX).finalize(),
|
|
||||||
),
|
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
let key_len = begin.key().len();
|
let key_len = begin.key().len();
|
||||||
|
|
|
@ -52,9 +52,7 @@ impl FsStore {
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl FsStore {
|
|
||||||
pub(crate) async fn get_blob(
|
pub(crate) async fn get_blob(
|
||||||
&self,
|
&self,
|
||||||
key: &[u8],
|
key: &[u8],
|
||||||
|
@ -113,9 +111,7 @@ impl FsStore {
|
||||||
Ok(false)
|
Ok(false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
impl FsStore {
|
|
||||||
fn build_path(&self, key: &[u8]) -> PathBuf {
|
fn build_path(&self, key: &[u8]) -> PathBuf {
|
||||||
let mut path = self.path.clone();
|
let mut path = self.path.clone();
|
||||||
|
|
||||||
|
|
|
@ -30,8 +30,8 @@ pub mod s3;
|
||||||
#[cfg(feature = "sqlite")]
|
#[cfg(feature = "sqlite")]
|
||||||
pub mod sqlite;
|
pub mod sqlite;
|
||||||
|
|
||||||
pub(crate) const MAX_TOKEN_LENGTH: usize = (u8::MAX >> 2) as usize;
|
pub const MAX_TOKEN_LENGTH: usize = (u8::MAX >> 1) as usize;
|
||||||
pub(crate) const MAX_TOKEN_MASK: usize = MAX_TOKEN_LENGTH - 1;
|
pub const MAX_TOKEN_MASK: usize = MAX_TOKEN_LENGTH - 1;
|
||||||
|
|
||||||
#[cfg(feature = "test_mode")]
|
#[cfg(feature = "test_mode")]
|
||||||
pub static ID_ASSIGNMENT_EXPIRY: std::sync::atomic::AtomicU64 =
|
pub static ID_ASSIGNMENT_EXPIRY: std::sync::atomic::AtomicU64 =
|
||||||
|
|
|
@ -140,3 +140,8 @@ impl From<rocksdb::Error> for crate::Error {
|
||||||
Self::InternalError(format!("RocksDB error: {}", value))
|
Self::InternalError(format!("RocksDB error: {}", value))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "rocks")]
|
||||||
|
pub struct Store {
|
||||||
|
db: rocksdb::OptimisticTransactionDB<rocksdb::MultiThreaded>,
|
||||||
|
}
|
||||||
|
|
83
crates/store/src/backend/sqlite/blob.rs
Normal file
83
crates/store/src/backend/sqlite/blob.rs
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023 Stalwart Labs Ltd.
|
||||||
|
*
|
||||||
|
* This file is part of the Stalwart Mail Server.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of
|
||||||
|
* the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
* in the LICENSE file at the top-level directory of this distribution.
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* You can be released from the requirements of the AGPLv3 license by
|
||||||
|
* purchasing a commercial license. Please contact licensing@stalw.art
|
||||||
|
* for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use rusqlite::OptionalExtension;
|
||||||
|
|
||||||
|
use super::SqliteStore;
|
||||||
|
|
||||||
|
impl SqliteStore {
|
||||||
|
pub(crate) async fn get_blob(
|
||||||
|
&self,
|
||||||
|
key: &[u8],
|
||||||
|
range: Range<u32>,
|
||||||
|
) -> crate::Result<Option<Vec<u8>>> {
|
||||||
|
let conn = self.conn_pool.get()?;
|
||||||
|
self.spawn_worker(move || {
|
||||||
|
let mut result = conn.prepare_cached("SELECT v FROM t WHERE k = ?")?;
|
||||||
|
result
|
||||||
|
.query_row([&key], |row| {
|
||||||
|
Ok({
|
||||||
|
let bytes = row.get_ref(0)?.as_bytes()?;
|
||||||
|
if range.start == 0 && range.end == u32::MAX {
|
||||||
|
bytes.to_vec()
|
||||||
|
} else {
|
||||||
|
bytes
|
||||||
|
.get(
|
||||||
|
range.start as usize
|
||||||
|
..std::cmp::min(bytes.len(), range.end as usize),
|
||||||
|
)
|
||||||
|
.unwrap_or_default()
|
||||||
|
.to_vec()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.optional()
|
||||||
|
.map_err(Into::into)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn put_blob(&self, key: &[u8], data: &[u8]) -> crate::Result<()> {
|
||||||
|
let conn = self.conn_pool.get()?;
|
||||||
|
self.spawn_worker(move || {
|
||||||
|
conn.prepare_cached("INSERT OR REPLACE INTO t (k, v) VALUES (?, ?)")?
|
||||||
|
.execute([key, data])
|
||||||
|
.map_err(|e| crate::Error::InternalError(format!("Failed to insert blob: {}", e)))
|
||||||
|
.map(|_| ())
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn delete_blob(&self, key: &[u8]) -> crate::Result<bool> {
|
||||||
|
let conn = self.conn_pool.get()?;
|
||||||
|
self.spawn_worker(move || {
|
||||||
|
conn.prepare_cached("DELETE FROM t WHERE k = ?")?
|
||||||
|
.execute([key])
|
||||||
|
.map_err(|e| crate::Error::InternalError(format!("Failed to delete blob: {}", e)))
|
||||||
|
.map(|_| true)
|
||||||
|
})
|
||||||
|
.await
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{write::key::DeserializeBigEndian, BitmapKey, IterateParams, LogKey, U64_LEN};
|
use crate::BitmapKey;
|
||||||
|
|
||||||
use super::SqliteStore;
|
use super::SqliteStore;
|
||||||
|
|
||||||
|
@ -46,15 +46,13 @@ impl IdCacheKey {
|
||||||
pub struct IdAssigner {
|
pub struct IdAssigner {
|
||||||
pub freed_document_ids: Option<RoaringBitmap>,
|
pub freed_document_ids: Option<RoaringBitmap>,
|
||||||
pub next_document_id: u32,
|
pub next_document_id: u32,
|
||||||
pub next_change_id: u64,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IdAssigner {
|
impl IdAssigner {
|
||||||
pub fn new(used_ids: Option<RoaringBitmap>, next_change_id: u64) -> Self {
|
pub fn new(used_ids: Option<RoaringBitmap>) -> Self {
|
||||||
let mut assigner = IdAssigner {
|
let mut assigner = IdAssigner {
|
||||||
freed_document_ids: None,
|
freed_document_ids: None,
|
||||||
next_document_id: 0,
|
next_document_id: 0,
|
||||||
next_change_id,
|
|
||||||
};
|
};
|
||||||
if let Some(used_ids) = used_ids {
|
if let Some(used_ids) = used_ids {
|
||||||
if let Some(max) = used_ids.max() {
|
if let Some(max) = used_ids.max() {
|
||||||
|
@ -85,28 +83,9 @@ impl IdAssigner {
|
||||||
id
|
id
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn assign_change_id(&mut self) -> u64 {
|
|
||||||
let id = self.next_change_id;
|
|
||||||
self.next_change_id += 1;
|
|
||||||
id
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SqliteStore {
|
impl SqliteStore {
|
||||||
pub(crate) async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
|
|
||||||
let collection = u8::MAX;
|
|
||||||
let key = IdCacheKey::new(account_id, collection);
|
|
||||||
for _ in 0..2 {
|
|
||||||
if let Some(assigner) = self.id_assigner.lock().get_mut(&key) {
|
|
||||||
return Ok(assigner.assign_change_id());
|
|
||||||
}
|
|
||||||
self.build_id_assigner(key).await?;
|
|
||||||
}
|
|
||||||
|
|
||||||
unreachable!()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) async fn assign_document_id(
|
pub(crate) async fn assign_document_id(
|
||||||
&self,
|
&self,
|
||||||
account_id: u32,
|
account_id: u32,
|
||||||
|
@ -128,56 +107,16 @@ impl SqliteStore {
|
||||||
let used_ids = self
|
let used_ids = self
|
||||||
.get_bitmap(BitmapKey::document_ids(key.account_id, key.collection))
|
.get_bitmap(BitmapKey::document_ids(key.account_id, key.collection))
|
||||||
.await?;
|
.await?;
|
||||||
let next_change_id = self
|
|
||||||
.get_last_change_id(key.account_id, key.collection)
|
|
||||||
.await?
|
|
||||||
.map(|id| id + 1)
|
|
||||||
.unwrap_or(0);
|
|
||||||
|
|
||||||
let id_assigner = self.id_assigner.clone();
|
let id_assigner = self.id_assigner.clone();
|
||||||
let mut id_assigner = id_assigner.lock();
|
let mut id_assigner = id_assigner.lock();
|
||||||
// Make sure id assigner was not added by another thread
|
// Make sure id assigner was not added by another thread
|
||||||
if id_assigner.get_mut(&key).is_none() {
|
if id_assigner.get_mut(&key).is_none() {
|
||||||
id_assigner.insert(key, IdAssigner::new(used_ids, next_change_id));
|
id_assigner.insert(key, IdAssigner::new(used_ids));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_last_change_id(
|
|
||||||
&self,
|
|
||||||
account_id: u32,
|
|
||||||
collection: impl Into<u8> + Sync + Send,
|
|
||||||
) -> crate::Result<Option<u64>> {
|
|
||||||
let collection = collection.into();
|
|
||||||
|
|
||||||
let from_key = LogKey {
|
|
||||||
account_id,
|
|
||||||
collection,
|
|
||||||
change_id: u64::MAX,
|
|
||||||
};
|
|
||||||
let to_key = LogKey {
|
|
||||||
account_id,
|
|
||||||
collection,
|
|
||||||
change_id: 0,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut last_change_id = None;
|
|
||||||
|
|
||||||
self.iterate(
|
|
||||||
IterateParams::new(from_key, to_key)
|
|
||||||
.descending()
|
|
||||||
.no_values()
|
|
||||||
.only_first(),
|
|
||||||
|key, _| {
|
|
||||||
last_change_id = key.deserialize_be_u64(key.len() - U64_LEN)?.into();
|
|
||||||
Ok(false)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
.await?;
|
|
||||||
|
|
||||||
Ok(last_change_id)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -188,7 +127,7 @@ mod tests {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn id_assigner() {
|
fn id_assigner() {
|
||||||
let mut assigner = IdAssigner::new(None, 0);
|
let mut assigner = IdAssigner::new(None);
|
||||||
assert_eq!(assigner.assign_document_id(), 0);
|
assert_eq!(assigner.assign_document_id(), 0);
|
||||||
assert_eq!(assigner.assign_document_id(), 1);
|
assert_eq!(assigner.assign_document_id(), 1);
|
||||||
assert_eq!(assigner.assign_document_id(), 2);
|
assert_eq!(assigner.assign_document_id(), 2);
|
||||||
|
@ -197,7 +136,6 @@ mod tests {
|
||||||
RoaringBitmap::from_sorted_iter([0, 2, 4, 6])
|
RoaringBitmap::from_sorted_iter([0, 2, 4, 6])
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.into(),
|
.into(),
|
||||||
0,
|
|
||||||
);
|
);
|
||||||
assert_eq!(assigner.assign_document_id(), 1);
|
assert_eq!(assigner.assign_document_id(), 1);
|
||||||
assert_eq!(assigner.assign_document_id(), 3);
|
assert_eq!(assigner.assign_document_id(), 3);
|
||||||
|
|
|
@ -30,8 +30,8 @@ use tokio::sync::oneshot;
|
||||||
use utils::{config::Config, UnwrapFailure};
|
use utils::{config::Config, UnwrapFailure};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_COUNTERS, SUBSPACE_INDEXES,
|
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_BLOB_DATA, SUBSPACE_COUNTERS,
|
||||||
SUBSPACE_LOGS, SUBSPACE_VALUES,
|
SUBSPACE_INDEXES, SUBSPACE_LOGS, SUBSPACE_VALUES,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{pool::SqliteConnectionManager, SqliteStore};
|
use super::{pool::SqliteConnectionManager, SqliteStore};
|
||||||
|
@ -78,7 +78,12 @@ impl SqliteStore {
|
||||||
pub(super) fn create_tables(&self) -> crate::Result<()> {
|
pub(super) fn create_tables(&self) -> crate::Result<()> {
|
||||||
let conn = self.conn_pool.get()?;
|
let conn = self.conn_pool.get()?;
|
||||||
|
|
||||||
for table in [SUBSPACE_VALUES, SUBSPACE_LOGS, SUBSPACE_ACLS] {
|
for table in [
|
||||||
|
SUBSPACE_VALUES,
|
||||||
|
SUBSPACE_LOGS,
|
||||||
|
SUBSPACE_ACLS,
|
||||||
|
SUBSPACE_BLOB_DATA,
|
||||||
|
] {
|
||||||
let table = char::from(table);
|
let table = char::from(table);
|
||||||
conn.execute(
|
conn.execute(
|
||||||
&format!(
|
&format!(
|
||||||
|
|
|
@ -34,6 +34,7 @@ use self::{
|
||||||
pool::SqliteConnectionManager,
|
pool::SqliteConnectionManager,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub mod blob;
|
||||||
pub mod id_assign;
|
pub mod id_assign;
|
||||||
pub mod main;
|
pub mod main;
|
||||||
pub mod pool;
|
pub mod pool;
|
||||||
|
|
|
@ -32,7 +32,6 @@ impl SqliteStore {
|
||||||
pub(crate) async fn purge_bitmaps(&self) -> crate::Result<()> {
|
pub(crate) async fn purge_bitmaps(&self) -> crate::Result<()> {
|
||||||
let conn = self.conn_pool.get()?;
|
let conn = self.conn_pool.get()?;
|
||||||
self.spawn_worker(move || {
|
self.spawn_worker(move || {
|
||||||
//Todo
|
|
||||||
conn.prepare_cached(concat!(
|
conn.prepare_cached(concat!(
|
||||||
"DELETE FROM b WHERE ",
|
"DELETE FROM b WHERE ",
|
||||||
"a = 0 AND ",
|
"a = 0 AND ",
|
||||||
|
|
|
@ -110,7 +110,7 @@ impl SqliteStore {
|
||||||
account_id: u32,
|
account_id: u32,
|
||||||
collection: u8,
|
collection: u8,
|
||||||
field: u8,
|
field: u8,
|
||||||
value: Vec<u8>,
|
value: &[u8],
|
||||||
op: query::Operator,
|
op: query::Operator,
|
||||||
) -> crate::Result<Option<RoaringBitmap>> {
|
) -> crate::Result<Option<RoaringBitmap>> {
|
||||||
let conn = self.conn_pool.get()?;
|
let conn = self.conn_pool.get()?;
|
||||||
|
@ -132,27 +132,27 @@ impl SqliteStore {
|
||||||
Operator::LowerThan => (
|
Operator::LowerThan => (
|
||||||
("SELECT k FROM i WHERE k >= ? AND k < ?"),
|
("SELECT k FROM i WHERE k >= ? AND k < ?"),
|
||||||
(k1.finalize()),
|
(k1.finalize()),
|
||||||
(k2.write(&value[..]).write(0u32).finalize()),
|
(k2.write(value).write(0u32).finalize()),
|
||||||
),
|
),
|
||||||
Operator::LowerEqualThan => (
|
Operator::LowerEqualThan => (
|
||||||
("SELECT k FROM i WHERE k >= ? AND k <= ?"),
|
("SELECT k FROM i WHERE k >= ? AND k <= ?"),
|
||||||
(k1.finalize()),
|
(k1.finalize()),
|
||||||
(k2.write(&value[..]).write(u32::MAX).finalize()),
|
(k2.write(value).write(u32::MAX).finalize()),
|
||||||
),
|
),
|
||||||
Operator::GreaterThan => (
|
Operator::GreaterThan => (
|
||||||
("SELECT k FROM i WHERE k > ? AND k <= ?"),
|
("SELECT k FROM i WHERE k > ? AND k <= ?"),
|
||||||
(k1.write(&value[..]).write(u32::MAX).finalize()),
|
(k1.write(value).write(u32::MAX).finalize()),
|
||||||
(k2.finalize()),
|
(k2.finalize()),
|
||||||
),
|
),
|
||||||
Operator::GreaterEqualThan => (
|
Operator::GreaterEqualThan => (
|
||||||
("SELECT k FROM i WHERE k >= ? AND k <= ?"),
|
("SELECT k FROM i WHERE k >= ? AND k <= ?"),
|
||||||
(k1.write(&value[..]).write(0u32).finalize()),
|
(k1.write(value).write(0u32).finalize()),
|
||||||
(k2.finalize()),
|
(k2.finalize()),
|
||||||
),
|
),
|
||||||
Operator::Equal => (
|
Operator::Equal => (
|
||||||
("SELECT k FROM i WHERE k >= ? AND k <= ?"),
|
("SELECT k FROM i WHERE k >= ? AND k <= ?"),
|
||||||
(k1.write(&value[..]).write(0u32).finalize()),
|
(k1.write(value).write(0u32).finalize()),
|
||||||
(k2.write(&value[..]).write(u32::MAX).finalize()),
|
(k2.write(value).write(u32::MAX).finalize()),
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -314,7 +314,7 @@ impl SqliteStore {
|
||||||
|
|
||||||
// Values
|
// Values
|
||||||
let mut has_errors = false;
|
let mut has_errors = false;
|
||||||
for table in [crate::SUBSPACE_VALUES, crate::SUBSPACE_ACLS, crate::SUBSPACE_COUNTERS] {
|
for table in [crate::SUBSPACE_VALUES, crate::SUBSPACE_ACLS, crate::SUBSPACE_COUNTERS, crate::SUBSPACE_BLOB_DATA] {
|
||||||
let table = char::from(table);
|
let table = char::from(table);
|
||||||
let mut query = conn.prepare_cached(&format!("SELECT k, v FROM {table}")).unwrap();
|
let mut query = conn.prepare_cached(&format!("SELECT k, v FROM {table}")).unwrap();
|
||||||
let mut rows = query.query([]).unwrap();
|
let mut rows = query.query([]).unwrap();
|
||||||
|
@ -370,7 +370,7 @@ impl SqliteStore {
|
||||||
|
|
||||||
// Bitmaps
|
// Bitmaps
|
||||||
let mut query = conn
|
let mut query = conn
|
||||||
.prepare_cached("SELECT z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p FROM b")
|
.prepare_cached(&format!("SELECT z, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p FROM {}", char::from(crate::SUBSPACE_BITMAPS)))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let mut rows = query.query([]).unwrap();
|
let mut rows = query.query([]).unwrap();
|
||||||
|
|
||||||
|
|
|
@ -274,8 +274,8 @@ impl SqliteStore {
|
||||||
#[cfg(feature = "test_mode")]
|
#[cfg(feature = "test_mode")]
|
||||||
pub(crate) async fn destroy(&self) {
|
pub(crate) async fn destroy(&self) {
|
||||||
use crate::{
|
use crate::{
|
||||||
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_COUNTERS, SUBSPACE_INDEXES,
|
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_BLOBS, SUBSPACE_BLOB_DATA, SUBSPACE_COUNTERS,
|
||||||
SUBSPACE_LOGS, SUBSPACE_VALUES,
|
SUBSPACE_INDEXES, SUBSPACE_LOGS, SUBSPACE_VALUES,
|
||||||
};
|
};
|
||||||
|
|
||||||
let conn = self.conn_pool.get().unwrap();
|
let conn = self.conn_pool.get().unwrap();
|
||||||
|
@ -287,6 +287,7 @@ impl SqliteStore {
|
||||||
SUBSPACE_BLOBS,
|
SUBSPACE_BLOBS,
|
||||||
SUBSPACE_ACLS,
|
SUBSPACE_ACLS,
|
||||||
SUBSPACE_COUNTERS,
|
SUBSPACE_COUNTERS,
|
||||||
|
SUBSPACE_BLOB_DATA,
|
||||||
] {
|
] {
|
||||||
conn.execute(&format!("DROP TABLE {}", char::from(table)), [])
|
conn.execute(&format!("DROP TABLE {}", char::from(table)), [])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
|
@ -21,23 +21,27 @@
|
||||||
* for more details.
|
* for more details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::ops::{BitAndAssign, Range};
|
use std::{
|
||||||
|
fmt::Display,
|
||||||
|
ops::{BitAndAssign, Range},
|
||||||
|
};
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
fts::{index::FtsDocument, FtsFilter},
|
||||||
query,
|
query,
|
||||||
write::{Batch, BitmapClass, ValueClass},
|
write::{Batch, BitmapClass, ValueClass},
|
||||||
BitmapKey, BlobStore, Deserialize, IterateParams, Key, Store, ValueKey,
|
BitmapKey, BlobStore, Deserialize, FtsStore, IterateParams, Key, Store, ValueKey,
|
||||||
};
|
};
|
||||||
|
|
||||||
impl Store {
|
impl Store {
|
||||||
pub async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
|
/*pub async fn assign_change_id(&self, account_id: u32) -> crate::Result<u64> {
|
||||||
match self {
|
match self {
|
||||||
Self::SQLite(store) => store.assign_change_id(account_id).await,
|
Self::SQLite(store) => store.assign_change_id(account_id).await,
|
||||||
Self::FoundationDb(store) => store.assign_change_id(account_id).await,
|
Self::FoundationDb(store) => store.assign_change_id(account_id).await,
|
||||||
}
|
}
|
||||||
}
|
}*/
|
||||||
|
|
||||||
pub async fn assign_document_id(
|
pub async fn assign_document_id(
|
||||||
&self,
|
&self,
|
||||||
|
@ -110,7 +114,7 @@ impl Store {
|
||||||
account_id: u32,
|
account_id: u32,
|
||||||
collection: u8,
|
collection: u8,
|
||||||
field: u8,
|
field: u8,
|
||||||
value: Vec<u8>,
|
value: &[u8],
|
||||||
op: query::Operator,
|
op: query::Operator,
|
||||||
) -> crate::Result<Option<RoaringBitmap>> {
|
) -> crate::Result<Option<RoaringBitmap>> {
|
||||||
match self {
|
match self {
|
||||||
|
@ -149,7 +153,7 @@ impl Store {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) async fn iterate<T: Key>(
|
pub async fn iterate<T: Key>(
|
||||||
&self,
|
&self,
|
||||||
params: IterateParams<T>,
|
params: IterateParams<T>,
|
||||||
cb: impl for<'x> FnMut(&'x [u8], &'x [u8]) -> crate::Result<bool> + Sync + Send,
|
cb: impl for<'x> FnMut(&'x [u8], &'x [u8]) -> crate::Result<bool> + Sync + Send,
|
||||||
|
@ -190,6 +194,27 @@ impl Store {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn get_blob(&self, key: &[u8], range: Range<u32>) -> crate::Result<Option<Vec<u8>>> {
|
||||||
|
match self {
|
||||||
|
Self::SQLite(store) => store.get_blob(key, range).await,
|
||||||
|
Self::FoundationDb(store) => store.get_blob(key, range).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn put_blob(&self, key: &[u8], data: &[u8]) -> crate::Result<()> {
|
||||||
|
match self {
|
||||||
|
Self::SQLite(store) => store.put_blob(key, data).await,
|
||||||
|
Self::FoundationDb(store) => store.put_blob(key, data).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn delete_blob(&self, key: &[u8]) -> crate::Result<bool> {
|
||||||
|
match self {
|
||||||
|
Self::SQLite(store) => store.delete_blob(key).await,
|
||||||
|
Self::FoundationDb(store) => store.delete_blob(key).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(feature = "test_mode")]
|
#[cfg(feature = "test_mode")]
|
||||||
pub async fn destroy(&self) {
|
pub async fn destroy(&self) {
|
||||||
match self {
|
match self {
|
||||||
|
@ -269,6 +294,8 @@ impl BlobStore {
|
||||||
match self {
|
match self {
|
||||||
Self::Fs(store) => store.get_blob(key, range).await,
|
Self::Fs(store) => store.get_blob(key, range).await,
|
||||||
Self::S3(store) => store.get_blob(key, range).await,
|
Self::S3(store) => store.get_blob(key, range).await,
|
||||||
|
Self::Sqlite(store) => store.get_blob(key, range).await,
|
||||||
|
Self::FoundationDb(store) => store.get_blob(key, range).await,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -276,6 +303,8 @@ impl BlobStore {
|
||||||
match self {
|
match self {
|
||||||
Self::Fs(store) => store.put_blob(key, data).await,
|
Self::Fs(store) => store.put_blob(key, data).await,
|
||||||
Self::S3(store) => store.put_blob(key, data).await,
|
Self::S3(store) => store.put_blob(key, data).await,
|
||||||
|
Self::Sqlite(store) => store.put_blob(key, data).await,
|
||||||
|
Self::FoundationDb(store) => store.put_blob(key, data).await,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -283,6 +312,47 @@ impl BlobStore {
|
||||||
match self {
|
match self {
|
||||||
Self::Fs(store) => store.delete_blob(key).await,
|
Self::Fs(store) => store.delete_blob(key).await,
|
||||||
Self::S3(store) => store.delete_blob(key).await,
|
Self::S3(store) => store.delete_blob(key).await,
|
||||||
|
Self::Sqlite(store) => store.delete_blob(key).await,
|
||||||
|
Self::FoundationDb(store) => store.delete_blob(key).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FtsStore {
|
||||||
|
pub async fn index<T: Into<u8> + Display + Clone + std::fmt::Debug>(
|
||||||
|
&self,
|
||||||
|
document: FtsDocument<'_, T>,
|
||||||
|
) -> crate::Result<()> {
|
||||||
|
match self {
|
||||||
|
FtsStore::Store(store) => store.fts_index(document).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn query<T: Into<u8> + Display + Clone + std::fmt::Debug>(
|
||||||
|
&self,
|
||||||
|
account_id: u32,
|
||||||
|
collection: impl Into<u8>,
|
||||||
|
filters: Vec<FtsFilter<T>>,
|
||||||
|
) -> crate::Result<RoaringBitmap> {
|
||||||
|
match self {
|
||||||
|
FtsStore::Store(store) => store.fts_query(account_id, collection, filters).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn remove(
|
||||||
|
&self,
|
||||||
|
account_id: u32,
|
||||||
|
collection: u8,
|
||||||
|
document_id: u32,
|
||||||
|
) -> crate::Result<bool> {
|
||||||
|
match self {
|
||||||
|
FtsStore::Store(store) => store.fts_remove(account_id, collection, document_id).await,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn remove_all(&self, account_id: u32) -> crate::Result<()> {
|
||||||
|
match self {
|
||||||
|
FtsStore::Store(store) => store.fts_remove_all(account_id).await,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,257 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2023 Stalwart Labs Ltd.
|
|
||||||
*
|
|
||||||
* This file is part of the Stalwart Mail Server.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU Affero General Public License as
|
|
||||||
* published by the Free Software Foundation, either version 3 of
|
|
||||||
* the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU Affero General Public License for more details.
|
|
||||||
* in the LICENSE file at the top-level directory of this distribution.
|
|
||||||
* You should have received a copy of the GNU Affero General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
* You can be released from the requirements of the AGPLv3 license by
|
|
||||||
* purchasing a commercial license. Please contact licensing@stalw.art
|
|
||||||
* for more details.
|
|
||||||
*/
|
|
||||||
|
|
||||||
use std::{
|
|
||||||
borrow::Cow,
|
|
||||||
f64::consts::LN_2,
|
|
||||||
hash::{Hash, Hasher},
|
|
||||||
};
|
|
||||||
|
|
||||||
use nlp::{language::stemmer::StemmedToken, tokenizers::Token};
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use utils::codec::leb128::{Leb128Reader, Leb128Vec};
|
|
||||||
|
|
||||||
use crate::{Deserialize, Error, Serialize};
|
|
||||||
|
|
||||||
pub struct BloomFilter {
|
|
||||||
m: u64,
|
|
||||||
b: RoaringBitmap,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct BloomHash {
|
|
||||||
pub h: [u64; 7],
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct BloomHashGroup {
|
|
||||||
pub h1: BloomHash,
|
|
||||||
pub h2: Option<BloomHash>,
|
|
||||||
}
|
|
||||||
|
|
||||||
const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
|
|
||||||
0xaf1f2242106c64b3,
|
|
||||||
0x60ca4cfb4b3ed0ce,
|
|
||||||
0xc7dbc0bb615e82b3,
|
|
||||||
0x520ad065378daf88,
|
|
||||||
);
|
|
||||||
lazy_static::lazy_static! {
|
|
||||||
static ref SIPHASHER: siphasher::sip::SipHasher13 =
|
|
||||||
siphasher::sip::SipHasher13::new_with_keys(0x56205cbdba8f02a6, 0xbd0dbc4bb06d687b);
|
|
||||||
}
|
|
||||||
|
|
||||||
const P: f64 = 0.01;
|
|
||||||
|
|
||||||
impl BloomFilter {
|
|
||||||
pub fn new(items: usize) -> Self {
|
|
||||||
Self {
|
|
||||||
m: if items > 0 {
|
|
||||||
std::cmp::max(Self::estimate_m(items, P), 10240)
|
|
||||||
} else {
|
|
||||||
0
|
|
||||||
},
|
|
||||||
b: RoaringBitmap::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn from_params(m: u64, b: RoaringBitmap) -> Self {
|
|
||||||
Self { m, b }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn estimate_m(n: usize, p: f64) -> u64 {
|
|
||||||
(((n as f64) * f64::ln(p) / (-8.0 * LN_2.powi(2))).ceil() as u64) * 8
|
|
||||||
}
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
|
||||||
fn estimate_k(m: u64, n: usize) -> u32 {
|
|
||||||
std::cmp::max(((m as f64) / (n as f64) * f64::ln(2.0f64)).ceil() as u32, 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert(&mut self, hash: &BloomHash) {
|
|
||||||
self.b.insert((hash.h[0] % self.m) as u32);
|
|
||||||
self.b.insert((hash.h[1] % self.m) as u32);
|
|
||||||
self.b.insert((hash.h[2] % self.m) as u32);
|
|
||||||
self.b.insert((hash.h[3] % self.m) as u32);
|
|
||||||
self.b.insert((hash.h[4] % self.m) as u32);
|
|
||||||
self.b.insert((hash.h[5] % self.m) as u32);
|
|
||||||
self.b.insert((hash.h[6] % self.m) as u32);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn contains(&self, hash: &BloomHash) -> bool {
|
|
||||||
self.b.contains((hash.h[0] % self.m) as u32)
|
|
||||||
&& self.b.contains((hash.h[1] % self.m) as u32)
|
|
||||||
&& self.b.contains((hash.h[2] % self.m) as u32)
|
|
||||||
&& self.b.contains((hash.h[3] % self.m) as u32)
|
|
||||||
&& self.b.contains((hash.h[4] % self.m) as u32)
|
|
||||||
&& self.b.contains((hash.h[5] % self.m) as u32)
|
|
||||||
&& self.b.contains((hash.h[6] % self.m) as u32)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_subset(&self, other: &Self) -> bool {
|
|
||||||
self.b.is_subset(&other.b)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_empty(&self) -> bool {
|
|
||||||
self.m == 0 || self.b.is_empty()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait BloomHasher {
|
|
||||||
fn hash<T: Hash + AsRef<[u8]> + ?Sized>(item: &T) -> Self;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl BloomHash {
|
|
||||||
pub fn hash<T: Hash + AsRef<[u8]> + ?Sized>(item: &T) -> Self {
|
|
||||||
let h1 = xxhash_rust::xxh3::xxh3_64(item.as_ref());
|
|
||||||
let h2 = farmhash::hash64(item.as_ref());
|
|
||||||
let h3 = AHASHER.hash_one(item);
|
|
||||||
let mut sh = *SIPHASHER;
|
|
||||||
sh.write(item.as_ref());
|
|
||||||
let h4 = sh.finish();
|
|
||||||
|
|
||||||
Self {
|
|
||||||
h: [h1, h2, h3, h4, h1 ^ h2, h2 ^ h3, h3 ^ h4],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn hash_token(item: &str) -> Vec<u8> {
|
|
||||||
let h1 = xxhash_rust::xxh3::xxh3_64(item.as_ref()).to_le_bytes();
|
|
||||||
let h2 = farmhash::hash64(item.as_ref()).to_le_bytes();
|
|
||||||
let h3 = AHASHER.hash_one(item).to_le_bytes();
|
|
||||||
let mut sh = *SIPHASHER;
|
|
||||||
sh.write(item.as_ref());
|
|
||||||
let h4 = sh.finish().to_le_bytes();
|
|
||||||
|
|
||||||
match item.len() {
|
|
||||||
0..=8 => {
|
|
||||||
let mut hash = Vec::with_capacity(6);
|
|
||||||
hash.extend_from_slice(&h1[..2]);
|
|
||||||
hash.extend_from_slice(&h2[..2]);
|
|
||||||
hash.push(h3[0]);
|
|
||||||
hash.push(h4[0]);
|
|
||||||
hash
|
|
||||||
}
|
|
||||||
9..=16 => {
|
|
||||||
let mut hash = Vec::with_capacity(8);
|
|
||||||
hash.extend_from_slice(&h1[..2]);
|
|
||||||
hash.extend_from_slice(&h2[..2]);
|
|
||||||
hash.extend_from_slice(&h3[..2]);
|
|
||||||
hash.extend_from_slice(&h4[..2]);
|
|
||||||
hash
|
|
||||||
}
|
|
||||||
17..=32 => {
|
|
||||||
let mut hash = Vec::with_capacity(12);
|
|
||||||
hash.extend_from_slice(&h1[..3]);
|
|
||||||
hash.extend_from_slice(&h2[..3]);
|
|
||||||
hash.extend_from_slice(&h3[..3]);
|
|
||||||
hash.extend_from_slice(&h4[..3]);
|
|
||||||
hash
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
let mut hash = Vec::with_capacity(16);
|
|
||||||
hash.extend_from_slice(&h1[..4]);
|
|
||||||
hash.extend_from_slice(&h2[..4]);
|
|
||||||
hash.extend_from_slice(&h3[..4]);
|
|
||||||
hash.extend_from_slice(&h4[..4]);
|
|
||||||
hash
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<&str> for BloomHash {
|
|
||||||
fn from(s: &str) -> Self {
|
|
||||||
Self::hash(&s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<String> for BloomHash {
|
|
||||||
fn from(s: String) -> Self {
|
|
||||||
Self::hash(&s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<&String> for BloomHash {
|
|
||||||
fn from(s: &String) -> Self {
|
|
||||||
Self::hash(&s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Cow<'_, str>> for BloomHash {
|
|
||||||
fn from(s: Cow<'_, str>) -> Self {
|
|
||||||
Self::hash(s.as_ref())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Token<Cow<'_, str>>> for BloomHashGroup {
|
|
||||||
fn from(t: Token<Cow<'_, str>>) -> Self {
|
|
||||||
Self {
|
|
||||||
h1: BloomHash::hash(t.word.as_ref()),
|
|
||||||
h2: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<StemmedToken<'_>> for BloomHashGroup {
|
|
||||||
fn from(t: StemmedToken<'_>) -> Self {
|
|
||||||
Self {
|
|
||||||
h1: BloomHash::hash(t.word.as_ref()),
|
|
||||||
h2: t.stemmed_word.map(|w| BloomHash::hash(&format!("{w}_"))),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Cow<'_, str>> for BloomHashGroup {
|
|
||||||
fn from(t: Cow<'_, str>) -> Self {
|
|
||||||
Self {
|
|
||||||
h1: BloomHash::hash(t.as_ref()),
|
|
||||||
h2: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Serialize for BloomFilter {
|
|
||||||
fn serialize(self) -> Vec<u8> {
|
|
||||||
let mut buf = Vec::with_capacity(U64_LEN + self.b.serialized_size());
|
|
||||||
buf.push_leb128(self.m);
|
|
||||||
let _ = self.b.serialize_into(&mut buf);
|
|
||||||
buf
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Deserialize for BloomFilter {
|
|
||||||
fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
|
|
||||||
let (m, pos) = bytes.read_leb128().ok_or_else(|| {
|
|
||||||
Error::InternalError(
|
|
||||||
"Failed to read 'm' value while deserializing bloom filter.".to_string(),
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
RoaringBitmap::deserialize_unchecked_from(bytes.get(pos..).ok_or_else(|| {
|
|
||||||
Error::InternalError(
|
|
||||||
"Failed to read bitmap while deserializing bloom filter.".to_string(),
|
|
||||||
)
|
|
||||||
})?)
|
|
||||||
.map_err(|err| Error::InternalError(format!("Failed to deserialize bloom filter: {err}.")))
|
|
||||||
.map(|b| Self::from_params(m, b))
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,250 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2023 Stalwart Labs Ltd.
|
|
||||||
*
|
|
||||||
* This file is part of the Stalwart Mail Server.
|
|
||||||
*
|
|
||||||
* This program is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU Affero General Public License as
|
|
||||||
* published by the Free Software Foundation, either version 3 of
|
|
||||||
* the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* This program is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU Affero General Public License for more details.
|
|
||||||
* in the LICENSE file at the top-level directory of this distribution.
|
|
||||||
* You should have received a copy of the GNU Affero General Public License
|
|
||||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*
|
|
||||||
* You can be released from the requirements of the AGPLv3 license by
|
|
||||||
* purchasing a commercial license. Please contact licensing@stalw.art
|
|
||||||
* for more details.
|
|
||||||
*/
|
|
||||||
|
|
||||||
use std::{borrow::Cow, collections::HashSet, fmt::Display};
|
|
||||||
|
|
||||||
use ahash::AHashSet;
|
|
||||||
use nlp::{
|
|
||||||
language::{
|
|
||||||
detect::{LanguageDetector, MIN_LANGUAGE_SCORE},
|
|
||||||
stemmer::Stemmer,
|
|
||||||
Language,
|
|
||||||
},
|
|
||||||
tokenizers::{space::SpaceTokenizer, Token},
|
|
||||||
};
|
|
||||||
use utils::map::vec_map::VecMap;
|
|
||||||
|
|
||||||
use crate::{
|
|
||||||
query::RawValue,
|
|
||||||
write::{BatchBuilder, IntoOperations, Operation, ValueClass},
|
|
||||||
Serialize, HASH_EXACT, HASH_STEMMED,
|
|
||||||
};
|
|
||||||
|
|
||||||
use super::term_index::{TermIndexBuilder, TokenIndex};
|
|
||||||
|
|
||||||
pub const MAX_TOKEN_LENGTH: usize = (u8::MAX >> 2) as usize;
|
|
||||||
pub const MAX_TOKEN_MASK: usize = MAX_TOKEN_LENGTH - 1;
|
|
||||||
|
|
||||||
struct Text<'x, T: Into<u8> + Display> {
|
|
||||||
field: T,
|
|
||||||
text: Cow<'x, str>,
|
|
||||||
language: Type,
|
|
||||||
}
|
|
||||||
|
|
||||||
enum Type {
|
|
||||||
Stem(Language),
|
|
||||||
Tokenize,
|
|
||||||
Static,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FtsIndexBuilder<'x, T: Into<u8> + Display> {
|
|
||||||
parts: Vec<Text<'x, T>>,
|
|
||||||
default_language: Language,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'x, T: Into<u8> + Display> FtsIndexBuilder<'x, T> {
|
|
||||||
pub fn with_default_language(default_language: Language) -> FtsIndexBuilder<'x, T> {
|
|
||||||
FtsIndexBuilder {
|
|
||||||
parts: vec![],
|
|
||||||
default_language,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn index(&mut self, field: T, text: impl Into<Cow<'x, str>>, language: Language) {
|
|
||||||
self.parts.push(Text {
|
|
||||||
field,
|
|
||||||
text: text.into(),
|
|
||||||
language: Type::Stem(language),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn index_raw(&mut self, field: T, text: impl Into<Cow<'x, str>>) {
|
|
||||||
self.parts.push(Text {
|
|
||||||
field,
|
|
||||||
text: text.into(),
|
|
||||||
language: Type::Tokenize,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn index_raw_token(&mut self, field: T, text: impl Into<Cow<'x, str>>) {
|
|
||||||
self.parts.push(Text {
|
|
||||||
field,
|
|
||||||
text: text.into(),
|
|
||||||
language: Type::Static,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'x, T: Into<u8> + Display> IntoOperations for FtsIndexBuilder<'x, T> {
|
|
||||||
fn build(self, batch: &mut BatchBuilder) {
|
|
||||||
let mut detect = LanguageDetector::new();
|
|
||||||
let mut tokens: VecMap<u8, AHashSet<String>> = VecMap::new();
|
|
||||||
let mut parts = Vec::new();
|
|
||||||
|
|
||||||
for text in self.parts {
|
|
||||||
match text.language {
|
|
||||||
Type::Stem(language) => {
|
|
||||||
let language = if language == Language::Unknown {
|
|
||||||
detect.detect(&text.text, MIN_LANGUAGE_SCORE)
|
|
||||||
} else {
|
|
||||||
language
|
|
||||||
};
|
|
||||||
parts.push((text.field, language, text.text));
|
|
||||||
}
|
|
||||||
Type::Tokenize => {
|
|
||||||
let tokens = tokens.get_mut_or_insert(text.field.into());
|
|
||||||
for token in SpaceTokenizer::new(text.text.as_ref(), MAX_TOKEN_LENGTH) {
|
|
||||||
tokens.insert(token);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Type::Static => {
|
|
||||||
tokens
|
|
||||||
.get_mut_or_insert(text.field.into())
|
|
||||||
.insert(text.text.into_owned());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let default_language = detect
|
|
||||||
.most_frequent_language()
|
|
||||||
.unwrap_or(self.default_language);
|
|
||||||
let mut term_index = TermIndexBuilder::new();
|
|
||||||
let mut ops = AHashSet::new();
|
|
||||||
|
|
||||||
for (part_id, (field, language, text)) in parts.into_iter().enumerate() {
|
|
||||||
let language = if language != Language::Unknown {
|
|
||||||
language
|
|
||||||
} else {
|
|
||||||
default_language
|
|
||||||
};
|
|
||||||
let mut terms = Vec::new();
|
|
||||||
let field: u8 = field.into();
|
|
||||||
|
|
||||||
for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH).collect::<Vec<_>>() {
|
|
||||||
ops.insert(Operation::hash(&token.word, HASH_EXACT, field, true));
|
|
||||||
if let Some(stemmed_word) = &token.stemmed_word {
|
|
||||||
ops.insert(Operation::hash(stemmed_word, HASH_STEMMED, field, true));
|
|
||||||
}
|
|
||||||
terms.push(term_index.add_stemmed_token(token));
|
|
||||||
}
|
|
||||||
|
|
||||||
if !terms.is_empty() {
|
|
||||||
term_index.add_terms(field, part_id as u32, terms);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (field, tokens) in tokens {
|
|
||||||
let mut terms = Vec::with_capacity(tokens.len());
|
|
||||||
for token in tokens {
|
|
||||||
ops.insert(Operation::hash(&token, HASH_EXACT, field, true));
|
|
||||||
terms.push(term_index.add_token(Token {
|
|
||||||
word: token.into(),
|
|
||||||
from: 0,
|
|
||||||
to: 0,
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
term_index.add_terms(field, 0, terms);
|
|
||||||
}
|
|
||||||
|
|
||||||
for op in ops {
|
|
||||||
batch.ops.push(op);
|
|
||||||
}
|
|
||||||
|
|
||||||
batch.ops.push(Operation::Value {
|
|
||||||
class: ValueClass::Property {
|
|
||||||
field: u8::MAX,
|
|
||||||
family: u8::MAX,
|
|
||||||
},
|
|
||||||
set: term_index.serialize().into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TokenIndex {
|
|
||||||
fn build_index(self, batch: &mut BatchBuilder, set: bool) {
|
|
||||||
let mut ops = AHashSet::with_capacity(self.tokens.len() * 2);
|
|
||||||
for term in self.terms {
|
|
||||||
for (term_ids, is_exact) in [(term.exact_terms, true), (term.stemmed_terms, false)] {
|
|
||||||
for term_id in term_ids {
|
|
||||||
if let Some(word) = self.tokens.get(term_id as usize) {
|
|
||||||
ops.insert(Operation::hash(
|
|
||||||
word,
|
|
||||||
if is_exact { HASH_EXACT } else { HASH_STEMMED },
|
|
||||||
term.field_id,
|
|
||||||
set,
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for op in ops {
|
|
||||||
batch.ops.push(op);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IntoOperations for TokenIndex {
|
|
||||||
fn build(self, batch: &mut BatchBuilder) {
|
|
||||||
self.build_index(batch, false);
|
|
||||||
batch.ops.push(Operation::Value {
|
|
||||||
class: ValueClass::Property {
|
|
||||||
field: u8::MAX,
|
|
||||||
family: u8::MAX,
|
|
||||||
},
|
|
||||||
set: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl IntoOperations for RawValue<TokenIndex> {
|
|
||||||
fn build(self, batch: &mut BatchBuilder) {
|
|
||||||
self.inner.build_index(batch, true);
|
|
||||||
batch.ops.push(Operation::Value {
|
|
||||||
class: ValueClass::Property {
|
|
||||||
field: u8::MAX,
|
|
||||||
family: u8::MAX,
|
|
||||||
},
|
|
||||||
set: self.raw.into(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait ToTokens {
|
|
||||||
fn to_tokens(&self) -> HashSet<String>;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ToTokens for &str {
|
|
||||||
fn to_tokens(&self) -> HashSet<String> {
|
|
||||||
let mut tokens = HashSet::new();
|
|
||||||
for token in SpaceTokenizer::new(self, MAX_TOKEN_LENGTH) {
|
|
||||||
tokens.insert(token);
|
|
||||||
}
|
|
||||||
tokens
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ToTokens for &String {
|
|
||||||
fn to_tokens(&self) -> HashSet<String> {
|
|
||||||
self.as_str().to_tokens()
|
|
||||||
}
|
|
||||||
}
|
|
372
crates/store/src/fts/index.rs
Normal file
372
crates/store/src/fts/index.rs
Normal file
|
@ -0,0 +1,372 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023 Stalwart Labs Ltd.
|
||||||
|
*
|
||||||
|
* This file is part of the Stalwart Mail Server.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of
|
||||||
|
* the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
* in the LICENSE file at the top-level directory of this distribution.
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* You can be released from the requirements of the AGPLv3 license by
|
||||||
|
* purchasing a commercial license. Please contact licensing@stalw.art
|
||||||
|
* for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::{borrow::Cow, fmt::Display};
|
||||||
|
|
||||||
|
use ahash::{AHashMap, AHashSet};
|
||||||
|
use nlp::{
|
||||||
|
language::{
|
||||||
|
detect::{LanguageDetector, MIN_LANGUAGE_SCORE},
|
||||||
|
stemmer::Stemmer,
|
||||||
|
Language,
|
||||||
|
},
|
||||||
|
tokenizers::word::WordTokenizer,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
backend::MAX_TOKEN_LENGTH,
|
||||||
|
write::{
|
||||||
|
hash::TokenType, key::KeySerializer, BatchBuilder, BitmapClass, BitmapHash, Operation,
|
||||||
|
ValueClass,
|
||||||
|
},
|
||||||
|
Deserialize, Error, Store, ValueKey, U64_LEN,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::Field;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct Text<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> {
|
||||||
|
field: Field<T>,
|
||||||
|
text: Cow<'x, str>,
|
||||||
|
typ: Type,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum Type {
|
||||||
|
Text(Language),
|
||||||
|
Tokenize,
|
||||||
|
Keyword,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct FtsDocument<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> {
|
||||||
|
parts: Vec<Text<'x, T>>,
|
||||||
|
default_language: Language,
|
||||||
|
account_id: u32,
|
||||||
|
collection: u8,
|
||||||
|
document_id: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'x, T: Into<u8> + Display + Clone + std::fmt::Debug> FtsDocument<'x, T> {
|
||||||
|
pub fn with_default_language(default_language: Language) -> FtsDocument<'x, T> {
|
||||||
|
FtsDocument {
|
||||||
|
parts: vec![],
|
||||||
|
default_language,
|
||||||
|
account_id: 0,
|
||||||
|
document_id: 0,
|
||||||
|
collection: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_account_id(mut self, account_id: u32) -> Self {
|
||||||
|
self.account_id = account_id;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_document_id(mut self, document_id: u32) -> Self {
|
||||||
|
self.document_id = document_id;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_collection(mut self, collection: impl Into<u8>) -> Self {
|
||||||
|
self.collection = collection.into();
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn index(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>, language: Language) {
|
||||||
|
self.parts.push(Text {
|
||||||
|
field,
|
||||||
|
text: text.into(),
|
||||||
|
typ: Type::Text(language),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn index_tokenized(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>) {
|
||||||
|
self.parts.push(Text {
|
||||||
|
field,
|
||||||
|
text: text.into(),
|
||||||
|
typ: Type::Tokenize,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn index_keyword(&mut self, field: Field<T>, text: impl Into<Cow<'x, str>>) {
|
||||||
|
self.parts.push(Text {
|
||||||
|
field,
|
||||||
|
text: text.into(),
|
||||||
|
typ: Type::Keyword,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<Field<T>> for u8 {
|
||||||
|
fn from(value: Field<T>) -> Self {
|
||||||
|
match value {
|
||||||
|
Field::Body => 0,
|
||||||
|
Field::Attachment => 1,
|
||||||
|
Field::Keyword => 2,
|
||||||
|
Field::Header(value) => 3 + value.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Store {
|
||||||
|
pub async fn fts_index<T: Into<u8> + Display + Clone + std::fmt::Debug>(
|
||||||
|
&self,
|
||||||
|
document: FtsDocument<'_, T>,
|
||||||
|
) -> crate::Result<()> {
|
||||||
|
let mut detect = LanguageDetector::new();
|
||||||
|
let mut tokens: AHashMap<BitmapHash, AHashSet<u8>> = AHashMap::new();
|
||||||
|
let mut parts = Vec::new();
|
||||||
|
|
||||||
|
for text in document.parts {
|
||||||
|
match text.typ {
|
||||||
|
Type::Text(language) => {
|
||||||
|
let language = if language == Language::Unknown {
|
||||||
|
detect.detect(&text.text, MIN_LANGUAGE_SCORE)
|
||||||
|
} else {
|
||||||
|
language
|
||||||
|
};
|
||||||
|
parts.push((text.field, language, text.text));
|
||||||
|
}
|
||||||
|
Type::Tokenize => {
|
||||||
|
let field = u8::from(text.field);
|
||||||
|
for token in WordTokenizer::new(text.text.as_ref(), MAX_TOKEN_LENGTH) {
|
||||||
|
tokens
|
||||||
|
.entry(BitmapHash::new(token.word.as_ref()))
|
||||||
|
.or_default()
|
||||||
|
.insert(TokenType::word(field));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Type::Keyword => {
|
||||||
|
let field = u8::from(text.field);
|
||||||
|
tokens
|
||||||
|
.entry(BitmapHash::new(text.text.as_ref()))
|
||||||
|
.or_default()
|
||||||
|
.insert(TokenType::word(field));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let default_language = detect
|
||||||
|
.most_frequent_language()
|
||||||
|
.unwrap_or(document.default_language);
|
||||||
|
|
||||||
|
for (field, language, text) in parts.into_iter() {
|
||||||
|
let language = if language != Language::Unknown {
|
||||||
|
language
|
||||||
|
} else {
|
||||||
|
default_language
|
||||||
|
};
|
||||||
|
let field: u8 = field.into();
|
||||||
|
|
||||||
|
let mut last_token = Cow::Borrowed("");
|
||||||
|
for token in Stemmer::new(&text, language, MAX_TOKEN_LENGTH) {
|
||||||
|
if !last_token.is_empty() {
|
||||||
|
tokens
|
||||||
|
.entry(BitmapHash::new(&format!("{} {}", last_token, token.word)))
|
||||||
|
.or_default()
|
||||||
|
.insert(TokenType::bigram(field));
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens
|
||||||
|
.entry(BitmapHash::new(token.word.as_ref()))
|
||||||
|
.or_default()
|
||||||
|
.insert(TokenType::word(field));
|
||||||
|
|
||||||
|
if let Some(stemmed_word) = token.stemmed_word {
|
||||||
|
tokens
|
||||||
|
.entry(BitmapHash::new(stemmed_word.as_ref()))
|
||||||
|
.or_default()
|
||||||
|
.insert(TokenType::stemmed(field));
|
||||||
|
}
|
||||||
|
|
||||||
|
last_token = token.word;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if tokens.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Serialize tokens
|
||||||
|
let mut serializer = KeySerializer::new(tokens.len() * U64_LEN * 2);
|
||||||
|
let mut keys = Vec::with_capacity(tokens.len());
|
||||||
|
|
||||||
|
for (hash, fields) in tokens.into_iter() {
|
||||||
|
serializer = serializer
|
||||||
|
.write(hash.hash.as_slice())
|
||||||
|
.write(hash.len)
|
||||||
|
.write(fields.len() as u8);
|
||||||
|
for field in fields.into_iter() {
|
||||||
|
serializer = serializer.write(field);
|
||||||
|
keys.push(Operation::Bitmap {
|
||||||
|
class: BitmapClass::Text { field, token: hash },
|
||||||
|
set: true,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Write term index
|
||||||
|
let mut batch = BatchBuilder::new();
|
||||||
|
batch
|
||||||
|
.with_account_id(document.account_id)
|
||||||
|
.with_collection(document.collection)
|
||||||
|
.update_document(document.document_id)
|
||||||
|
.set(
|
||||||
|
ValueClass::TermIndex,
|
||||||
|
lz4_flex::compress_prepend_size(&serializer.finalize()),
|
||||||
|
);
|
||||||
|
self.write(batch.build()).await?;
|
||||||
|
let mut batch = BatchBuilder::new();
|
||||||
|
batch
|
||||||
|
.with_account_id(document.account_id)
|
||||||
|
.with_collection(document.collection)
|
||||||
|
.update_document(document.document_id);
|
||||||
|
|
||||||
|
for (pos, key) in keys.into_iter().enumerate() {
|
||||||
|
if pos > 0 && pos & 1023 == 0 {
|
||||||
|
self.write(batch.build()).await?;
|
||||||
|
batch = BatchBuilder::new();
|
||||||
|
batch
|
||||||
|
.with_account_id(document.account_id)
|
||||||
|
.with_collection(document.collection)
|
||||||
|
.update_document(document.document_id);
|
||||||
|
}
|
||||||
|
batch.ops.push(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !batch.is_empty() {
|
||||||
|
self.write(batch.build()).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn fts_remove(
|
||||||
|
&self,
|
||||||
|
account_id: u32,
|
||||||
|
collection: u8,
|
||||||
|
document_id: u32,
|
||||||
|
) -> crate::Result<bool> {
|
||||||
|
// Obtain term index
|
||||||
|
let term_index = if let Some(term_index) = self
|
||||||
|
.get_value::<TermIndex>(ValueKey {
|
||||||
|
account_id,
|
||||||
|
collection,
|
||||||
|
document_id,
|
||||||
|
class: ValueClass::TermIndex,
|
||||||
|
})
|
||||||
|
.await?
|
||||||
|
{
|
||||||
|
term_index
|
||||||
|
} else {
|
||||||
|
return Ok(false);
|
||||||
|
};
|
||||||
|
|
||||||
|
// Remove keys
|
||||||
|
let mut batch = BatchBuilder::new();
|
||||||
|
batch
|
||||||
|
.with_account_id(account_id)
|
||||||
|
.with_collection(collection)
|
||||||
|
.update_document(document_id);
|
||||||
|
|
||||||
|
for (pos, key) in term_index.ops.into_iter().enumerate() {
|
||||||
|
if pos > 0 && pos & 1023 == 0 {
|
||||||
|
self.write(batch.build()).await?;
|
||||||
|
batch = BatchBuilder::new();
|
||||||
|
batch
|
||||||
|
.with_account_id(account_id)
|
||||||
|
.with_collection(collection)
|
||||||
|
.update_document(document_id);
|
||||||
|
}
|
||||||
|
batch.ops.push(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
if !batch.is_empty() {
|
||||||
|
self.write(batch.build()).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove term index
|
||||||
|
let mut batch = BatchBuilder::new();
|
||||||
|
batch
|
||||||
|
.with_account_id(account_id)
|
||||||
|
.with_collection(collection)
|
||||||
|
.update_document(document_id)
|
||||||
|
.clear(ValueClass::TermIndex);
|
||||||
|
|
||||||
|
self.write(batch.build()).await?;
|
||||||
|
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn fts_remove_all(&self, _: u32) -> crate::Result<()> {
|
||||||
|
// No-op
|
||||||
|
// Term indexes are stored in the same key range as the document
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TermIndex {
|
||||||
|
ops: Vec<Operation>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deserialize for TermIndex {
|
||||||
|
fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
|
||||||
|
let bytes = lz4_flex::decompress_size_prepended(bytes)
|
||||||
|
.map_err(|_| Error::InternalError("Failed to decompress term index".to_string()))?;
|
||||||
|
let mut ops = Vec::new();
|
||||||
|
let mut bytes = bytes.iter().peekable();
|
||||||
|
|
||||||
|
while bytes.peek().is_some() {
|
||||||
|
let mut hash = BitmapHash {
|
||||||
|
hash: [0; 8],
|
||||||
|
len: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
for byte in hash.hash.iter_mut() {
|
||||||
|
*byte = *bytes.next().ok_or(Error::InternalError(
|
||||||
|
"Unexpected EOF reading term index".to_string(),
|
||||||
|
))?;
|
||||||
|
}
|
||||||
|
|
||||||
|
hash.len = *bytes.next().ok_or(Error::InternalError(
|
||||||
|
"Unexpected EOF reading term index".to_string(),
|
||||||
|
))?;
|
||||||
|
let num_fields = *bytes.next().ok_or(Error::InternalError(
|
||||||
|
"Unexpected EOF reading term index".to_string(),
|
||||||
|
))?;
|
||||||
|
for _ in 0..num_fields {
|
||||||
|
let field = *bytes.next().ok_or(Error::InternalError(
|
||||||
|
"Unexpected EOF reading term index".to_string(),
|
||||||
|
))?;
|
||||||
|
ops.push(Operation::Bitmap {
|
||||||
|
class: BitmapClass::Text { field, token: hash },
|
||||||
|
set: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Self { ops })
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,55 +21,188 @@
|
||||||
* for more details.
|
* for more details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use crate::{
|
use std::fmt::Display;
|
||||||
write::{BitmapFamily, Operation},
|
|
||||||
BitmapKey, Serialize, BM_HASH,
|
|
||||||
};
|
|
||||||
|
|
||||||
use self::{bloom::hash_token, builder::MAX_TOKEN_MASK};
|
use nlp::language::Language;
|
||||||
|
|
||||||
pub mod bloom;
|
pub mod index;
|
||||||
pub mod builder;
|
|
||||||
pub mod query;
|
pub mod query;
|
||||||
pub mod search_snippet;
|
|
||||||
pub mod term_index;
|
|
||||||
|
|
||||||
impl BitmapKey<Vec<u8>> {
|
#[derive(Clone, Debug)]
|
||||||
pub fn hash(word: &str, account_id: u32, collection: u8, family: u8, field: u8) -> Self {
|
pub enum Field<T: Into<u8> + Display + Clone + std::fmt::Debug> {
|
||||||
BitmapKey {
|
Header(T),
|
||||||
account_id,
|
Body,
|
||||||
collection,
|
Attachment,
|
||||||
family: BM_HASH | family | (word.len() & MAX_TOKEN_MASK) as u8,
|
Keyword,
|
||||||
field,
|
}
|
||||||
block_num: 0,
|
|
||||||
key: hash_token(word),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn value(
|
#[derive(Debug)]
|
||||||
account_id: u32,
|
pub enum FtsFilter<T: Into<u8> + Display + Clone + std::fmt::Debug> {
|
||||||
collection: impl Into<u8>,
|
Exact {
|
||||||
field: impl Into<u8>,
|
field: Field<T>,
|
||||||
value: impl BitmapFamily + Serialize,
|
text: String,
|
||||||
|
language: Language,
|
||||||
|
},
|
||||||
|
Contains {
|
||||||
|
field: Field<T>,
|
||||||
|
text: String,
|
||||||
|
language: Language,
|
||||||
|
},
|
||||||
|
Keyword {
|
||||||
|
field: Field<T>,
|
||||||
|
text: String,
|
||||||
|
},
|
||||||
|
And,
|
||||||
|
Or,
|
||||||
|
Not,
|
||||||
|
End,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> FtsFilter<T> {
|
||||||
|
pub fn has_text_detect(
|
||||||
|
field: Field<T>,
|
||||||
|
text: impl Into<String>,
|
||||||
|
default_language: Language,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
BitmapKey {
|
let (text, language) = Language::detect(text.into(), default_language);
|
||||||
account_id,
|
Self::has_text(field, text, language)
|
||||||
collection: collection.into(),
|
}
|
||||||
family: value.family(),
|
|
||||||
field: field.into(),
|
pub fn has_text(field: Field<T>, text: impl Into<String>, language: Language) -> Self {
|
||||||
block_num: 0,
|
let text = text.into();
|
||||||
key: value.serialize(),
|
if !matches!(language, Language::None) && (text.starts_with('"') && text.ends_with('"'))
|
||||||
|
|| (text.starts_with('\'') && text.ends_with('\''))
|
||||||
|
{
|
||||||
|
FtsFilter::Exact {
|
||||||
|
field,
|
||||||
|
text,
|
||||||
|
language,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
FtsFilter::Contains {
|
||||||
|
field,
|
||||||
|
text,
|
||||||
|
language,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn has_keyword(field: Field<T>, text: impl Into<String>) -> Self {
|
||||||
|
FtsFilter::Keyword {
|
||||||
|
field,
|
||||||
|
text: text.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn has_english_text(field: Field<T>, text: impl Into<String>) -> Self {
|
||||||
|
Self::has_text(field, text, Language::English)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Operation {
|
#[derive(Clone, Copy)]
|
||||||
pub fn hash(word: &str, family: u8, field: u8, set: bool) -> Self {
|
pub enum FilterType {
|
||||||
Operation::Bitmap {
|
And,
|
||||||
family: BM_HASH | family | (word.len() & MAX_TOKEN_MASK) as u8,
|
Or,
|
||||||
field,
|
Not,
|
||||||
key: hash_token(word),
|
End,
|
||||||
set,
|
Store,
|
||||||
|
Fts,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum FilterGroup<T: FilterItem> {
|
||||||
|
Fts(Vec<T>),
|
||||||
|
Store(T),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait FilterItem: Clone {
|
||||||
|
fn filter_type(&self) -> FilterType;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait IntoFilterGroup<T: FilterItem + From<FilterType>> {
|
||||||
|
fn into_filter_group(self) -> Vec<FilterGroup<T>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: FilterItem + From<FilterType>> IntoFilterGroup<T> for Vec<T> {
|
||||||
|
fn into_filter_group(self) -> Vec<FilterGroup<T>> {
|
||||||
|
let mut filter = Vec::with_capacity(self.len());
|
||||||
|
let mut iter = self.into_iter();
|
||||||
|
let mut logical_op = None;
|
||||||
|
|
||||||
|
while let Some(item) = iter.next() {
|
||||||
|
if matches!(item.filter_type(), FilterType::Fts) {
|
||||||
|
let mut store_item = None;
|
||||||
|
let mut depth = 0;
|
||||||
|
let mut fts = Vec::with_capacity(5);
|
||||||
|
|
||||||
|
// Add the logical operator if there is one
|
||||||
|
let in_logical_op = if let Some(op) = logical_op.take() {
|
||||||
|
fts.push(op);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
};
|
||||||
|
fts.push(item);
|
||||||
|
|
||||||
|
for item in iter.by_ref() {
|
||||||
|
match item.filter_type() {
|
||||||
|
FilterType::And | FilterType::Or | FilterType::Not => {
|
||||||
|
depth += 1;
|
||||||
|
fts.push(item);
|
||||||
|
}
|
||||||
|
FilterType::End if depth > 0 => {
|
||||||
|
depth -= 1;
|
||||||
|
fts.push(item);
|
||||||
|
}
|
||||||
|
FilterType::Fts => {
|
||||||
|
fts.push(item);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
store_item = Some(item);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if in_logical_op {
|
||||||
|
fts.push(T::from(FilterType::End));
|
||||||
|
}
|
||||||
|
|
||||||
|
if depth > 0 {
|
||||||
|
let mut store = Vec::with_capacity(depth * 2);
|
||||||
|
while depth > 0 {
|
||||||
|
let item = fts.pop().unwrap();
|
||||||
|
if matches!(
|
||||||
|
item.filter_type(),
|
||||||
|
FilterType::And | FilterType::Or | FilterType::Not
|
||||||
|
) {
|
||||||
|
depth -= 1;
|
||||||
|
}
|
||||||
|
store.push(FilterGroup::Store(item));
|
||||||
|
}
|
||||||
|
|
||||||
|
filter.push(FilterGroup::Fts(fts));
|
||||||
|
filter.extend(store);
|
||||||
|
} else {
|
||||||
|
filter.push(FilterGroup::Fts(fts));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(item) = store_item {
|
||||||
|
filter.push(FilterGroup::Store(item));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
match item.filter_type() {
|
||||||
|
FilterType::And | FilterType::Or => {
|
||||||
|
logical_op = Some(item.clone());
|
||||||
|
}
|
||||||
|
FilterType::Not => {
|
||||||
|
logical_op = Some(T::from(FilterType::And));
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
filter.push(FilterGroup::Store(item));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
filter
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,138 +21,210 @@
|
||||||
* for more details.
|
* for more details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::ops::BitOrAssign;
|
use std::{
|
||||||
|
fmt::Display,
|
||||||
|
ops::{BitAndAssign, BitOrAssign, BitXorAssign},
|
||||||
|
};
|
||||||
|
|
||||||
use nlp::language::{stemmer::Stemmer, Language};
|
use nlp::language::stemmer::Stemmer;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{fts::builder::MAX_TOKEN_LENGTH, BitmapKey, ValueKey, HASH_EXACT, HASH_STEMMED};
|
use crate::{backend::MAX_TOKEN_LENGTH, fts::FtsFilter, write::BitmapClass, BitmapKey, Store};
|
||||||
|
|
||||||
use super::term_index::TermIndex;
|
struct State<T: Into<u8> + Display + Clone + std::fmt::Debug> {
|
||||||
|
pub op: FtsFilter<T>,
|
||||||
|
pub bm: Option<RoaringBitmap>,
|
||||||
|
}
|
||||||
|
|
||||||
#[async_trait::async_trait]
|
impl Store {
|
||||||
pub trait StoreFts: StoreRead {
|
pub async fn fts_query<T: Into<u8> + Display + Clone + std::fmt::Debug>(
|
||||||
async fn fts_query(
|
&self,
|
||||||
&mut self,
|
|
||||||
account_id: u32,
|
account_id: u32,
|
||||||
collection: u8,
|
collection: impl Into<u8>,
|
||||||
field: u8,
|
filters: Vec<FtsFilter<T>>,
|
||||||
text: &str,
|
) -> crate::Result<RoaringBitmap> {
|
||||||
language: Language,
|
let collection = collection.into();
|
||||||
match_phrase: bool,
|
let mut not_mask = RoaringBitmap::new();
|
||||||
) -> crate::Result<Option<RoaringBitmap>> {
|
let mut not_fetch = false;
|
||||||
if match_phrase {
|
|
||||||
let mut phrase = Vec::new();
|
let mut state: State<T> = FtsFilter::And.into();
|
||||||
let mut bit_keys = Vec::new();
|
let mut stack = Vec::new();
|
||||||
for token in language.tokenize_text(text, MAX_TOKEN_LENGTH) {
|
let mut filters = filters.into_iter().peekable();
|
||||||
let key = BitmapKey::hash(
|
|
||||||
token.word.as_ref(),
|
while let Some(filter) = filters.next() {
|
||||||
account_id,
|
let mut result = match filter {
|
||||||
collection,
|
FtsFilter::Exact {
|
||||||
HASH_EXACT,
|
|
||||||
field,
|
field,
|
||||||
);
|
text,
|
||||||
if !bit_keys.contains(&key) {
|
language,
|
||||||
bit_keys.push(key);
|
} => {
|
||||||
|
let field: u8 = field.clone().into();
|
||||||
|
|
||||||
|
let tokens = language
|
||||||
|
.tokenize_text(text.as_ref(), MAX_TOKEN_LENGTH)
|
||||||
|
.map(|t| t.word)
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
let keys = if tokens.len() > 1 {
|
||||||
|
tokens
|
||||||
|
.windows(2)
|
||||||
|
.map(|bg| BitmapKey {
|
||||||
|
account_id,
|
||||||
|
collection,
|
||||||
|
class: BitmapClass::bigram(format!("{} {}", bg[0], bg[1]), field),
|
||||||
|
block_num: 0,
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
} else {
|
||||||
|
tokens
|
||||||
|
.into_iter()
|
||||||
|
.map(|word| BitmapKey {
|
||||||
|
account_id,
|
||||||
|
collection,
|
||||||
|
class: BitmapClass::word(word.as_ref(), field),
|
||||||
|
block_num: 0,
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
};
|
||||||
|
|
||||||
|
self.get_bitmaps_intersection(keys).await?
|
||||||
}
|
}
|
||||||
|
FtsFilter::Contains {
|
||||||
|
field,
|
||||||
|
text,
|
||||||
|
language,
|
||||||
|
} => {
|
||||||
|
let mut result = RoaringBitmap::new();
|
||||||
|
let field: u8 = field.clone().into();
|
||||||
|
|
||||||
phrase.push(token.word);
|
for token in Stemmer::new(text.as_ref(), language, MAX_TOKEN_LENGTH) {
|
||||||
}
|
let token1 = BitmapKey {
|
||||||
let bitmaps = match self.get_bitmaps_intersection(bit_keys).await? {
|
account_id,
|
||||||
Some(b) if !b.is_empty() => b,
|
collection,
|
||||||
_ => return Ok(None),
|
class: BitmapClass::word(token.word.as_ref(), field),
|
||||||
};
|
block_num: 0,
|
||||||
|
};
|
||||||
|
let token2 = BitmapKey {
|
||||||
|
account_id,
|
||||||
|
collection,
|
||||||
|
class: BitmapClass::stemmed(
|
||||||
|
if let Some(stemmed_word) = token.stemmed_word {
|
||||||
|
stemmed_word
|
||||||
|
} else {
|
||||||
|
token.word
|
||||||
|
}
|
||||||
|
.as_ref(),
|
||||||
|
field,
|
||||||
|
),
|
||||||
|
block_num: 0,
|
||||||
|
};
|
||||||
|
|
||||||
match phrase.len() {
|
match self.get_bitmaps_union(vec![token1, token2]).await? {
|
||||||
0 => return Ok(None),
|
Some(b) if !b.is_empty() => {
|
||||||
1 => return Ok(Some(bitmaps)),
|
if !result.is_empty() {
|
||||||
_ => (),
|
result &= b;
|
||||||
}
|
if result.is_empty() {
|
||||||
|
break;
|
||||||
let mut results = RoaringBitmap::new();
|
}
|
||||||
for document_id in bitmaps {
|
} else {
|
||||||
if let Some(term_index) = self
|
result = b;
|
||||||
.get_value::<TermIndex>(ValueKey::term_index(
|
}
|
||||||
account_id,
|
|
||||||
collection,
|
|
||||||
document_id,
|
|
||||||
))
|
|
||||||
.await?
|
|
||||||
{
|
|
||||||
if term_index
|
|
||||||
.match_terms(
|
|
||||||
&phrase
|
|
||||||
.iter()
|
|
||||||
.map(|w| term_index.get_match_term(w, None))
|
|
||||||
.collect::<Vec<_>>(),
|
|
||||||
field.into(),
|
|
||||||
true,
|
|
||||||
false,
|
|
||||||
false,
|
|
||||||
)
|
|
||||||
.map_err(|e| {
|
|
||||||
crate::Error::InternalError(format!(
|
|
||||||
"TermIndex match_terms failed for {account_id}/{collection}/{document_id}: {e:?}"
|
|
||||||
))
|
|
||||||
})?
|
|
||||||
.is_some()
|
|
||||||
{
|
|
||||||
results.insert(document_id);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
tracing::debug!(
|
|
||||||
event = "error",
|
|
||||||
context = "fts_query",
|
|
||||||
account_id = account_id,
|
|
||||||
collection = collection,
|
|
||||||
document_id = document_id,
|
|
||||||
"Document is missing a term index",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if !results.is_empty() {
|
|
||||||
Ok(Some(results))
|
|
||||||
} else {
|
|
||||||
Ok(None)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let mut bitmaps = RoaringBitmap::new();
|
|
||||||
|
|
||||||
for token in Stemmer::new(text, language, MAX_TOKEN_LENGTH) {
|
|
||||||
let token1 =
|
|
||||||
BitmapKey::hash(&token.word, account_id, collection, HASH_EXACT, field);
|
|
||||||
let token2 = if let Some(stemmed_word) = token.stemmed_word {
|
|
||||||
BitmapKey::hash(&stemmed_word, account_id, collection, HASH_STEMMED, field)
|
|
||||||
} else {
|
|
||||||
let mut token2 = token1.clone();
|
|
||||||
token2.family &= !HASH_EXACT;
|
|
||||||
token2.family |= HASH_STEMMED;
|
|
||||||
token2
|
|
||||||
};
|
|
||||||
|
|
||||||
match self.get_bitmaps_union(vec![token1, token2]).await? {
|
|
||||||
Some(b) if !b.is_empty() => {
|
|
||||||
if !bitmaps.is_empty() {
|
|
||||||
bitmaps &= b;
|
|
||||||
if bitmaps.is_empty() {
|
|
||||||
return Ok(None);
|
|
||||||
}
|
}
|
||||||
} else {
|
_ => break,
|
||||||
bitmaps = b;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => return Ok(None),
|
|
||||||
};
|
if !result.is_empty() {
|
||||||
|
Some(result)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FtsFilter::Keyword { field, text } => {
|
||||||
|
self.get_bitmap(BitmapKey {
|
||||||
|
account_id,
|
||||||
|
collection,
|
||||||
|
class: BitmapClass::word(text, field),
|
||||||
|
block_num: 0,
|
||||||
|
})
|
||||||
|
.await?
|
||||||
|
}
|
||||||
|
op @ (FtsFilter::And | FtsFilter::Or | FtsFilter::Not) => {
|
||||||
|
stack.push(state);
|
||||||
|
state = op.into();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
FtsFilter::End => {
|
||||||
|
if let Some(prev_state) = stack.pop() {
|
||||||
|
let bm = state.bm;
|
||||||
|
state = prev_state;
|
||||||
|
bm
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Only fetch not mask if we need it
|
||||||
|
if matches!(state.op, FtsFilter::Not) && !not_fetch {
|
||||||
|
not_mask = self
|
||||||
|
.get_bitmap(BitmapKey::document_ids(account_id, collection))
|
||||||
|
.await?
|
||||||
|
.unwrap_or_else(RoaringBitmap::new);
|
||||||
|
not_fetch = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Some(bitmaps))
|
// Apply logical operation
|
||||||
|
if let Some(dest) = &mut state.bm {
|
||||||
|
match state.op {
|
||||||
|
FtsFilter::And => {
|
||||||
|
if let Some(result) = result {
|
||||||
|
dest.bitand_assign(result);
|
||||||
|
} else {
|
||||||
|
dest.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FtsFilter::Or => {
|
||||||
|
if let Some(result) = result {
|
||||||
|
dest.bitor_assign(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
FtsFilter::Not => {
|
||||||
|
if let Some(mut result) = result {
|
||||||
|
result.bitxor_assign(¬_mask);
|
||||||
|
dest.bitand_assign(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
} else if let Some(ref mut result_) = result {
|
||||||
|
if let FtsFilter::Not = state.op {
|
||||||
|
result_.bitxor_assign(¬_mask);
|
||||||
|
}
|
||||||
|
state.bm = result;
|
||||||
|
} else if let FtsFilter::Not = state.op {
|
||||||
|
state.bm = Some(not_mask.clone());
|
||||||
|
} else {
|
||||||
|
state.bm = Some(RoaringBitmap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
// And short circuit
|
||||||
|
if matches!(state.op, FtsFilter::And) && state.bm.as_ref().unwrap().is_empty() {
|
||||||
|
while let Some(filter) = filters.peek() {
|
||||||
|
if matches!(filter, FtsFilter::End) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
filters.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(state.bm.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn get_bitmaps_union<T: AsRef<[u8]> + Sync + Send>(
|
async fn get_bitmaps_union(
|
||||||
&self,
|
&self,
|
||||||
keys: Vec<BitmapKey<T>>,
|
keys: Vec<BitmapKey<BitmapClass>>,
|
||||||
) -> crate::Result<Option<RoaringBitmap>> {
|
) -> crate::Result<Option<RoaringBitmap>> {
|
||||||
let mut bm = RoaringBitmap::new();
|
let mut bm = RoaringBitmap::new();
|
||||||
|
|
||||||
|
@ -165,3 +237,12 @@ pub trait StoreFts: StoreRead {
|
||||||
Ok(if !bm.is_empty() { Some(bm) } else { None })
|
Ok(if !bm.is_empty() { Some(bm) } else { None })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<T: Into<u8> + Display + Clone + std::fmt::Debug> From<FtsFilter<T>> for State<T> {
|
||||||
|
fn from(value: FtsFilter<T>) -> Self {
|
||||||
|
Self {
|
||||||
|
op: value,
|
||||||
|
bm: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
use std::{borrow::Cow, convert::TryInto};
|
use std::{borrow::Cow, convert::TryInto};
|
||||||
|
|
||||||
use crate::{Deserialize, Serialize};
|
use crate::{Deserialize, Serialize, U32_LEN, U64_LEN};
|
||||||
|
|
||||||
use ahash::{AHashMap, AHashSet};
|
use ahash::{AHashMap, AHashSet};
|
||||||
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
|
use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
|
||||||
|
|
|
@ -24,8 +24,8 @@
|
||||||
use std::{fmt::Display, sync::Arc};
|
use std::{fmt::Display, sync::Arc};
|
||||||
|
|
||||||
pub mod backend;
|
pub mod backend;
|
||||||
//pub mod fts;
|
|
||||||
pub mod dispatch;
|
pub mod dispatch;
|
||||||
|
pub mod fts;
|
||||||
pub mod query;
|
pub mod query;
|
||||||
pub mod write;
|
pub mod write;
|
||||||
|
|
||||||
|
@ -37,11 +37,6 @@ pub use rand;
|
||||||
pub use roaring;
|
pub use roaring;
|
||||||
use write::{BitmapClass, BlobOp, ValueClass};
|
use write::{BitmapClass, BlobOp, ValueClass};
|
||||||
|
|
||||||
#[cfg(feature = "rocks")]
|
|
||||||
pub struct Store {
|
|
||||||
db: rocksdb::OptimisticTransactionDB<rocksdb::MultiThreaded>,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub trait Deserialize: Sized + Sync + Send {
|
pub trait Deserialize: Sized + Sync + Send {
|
||||||
fn deserialize(bytes: &[u8]) -> crate::Result<Self>;
|
fn deserialize(bytes: &[u8]) -> crate::Result<Self>;
|
||||||
}
|
}
|
||||||
|
@ -103,9 +98,9 @@ pub struct LogKey {
|
||||||
pub change_id: u64,
|
pub change_id: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
const BLOB_HASH_LEN: usize = 32;
|
pub const BLOB_HASH_LEN: usize = 32;
|
||||||
const U64_LEN: usize = std::mem::size_of::<u64>();
|
pub const U64_LEN: usize = std::mem::size_of::<u64>();
|
||||||
const U32_LEN: usize = std::mem::size_of::<u32>();
|
pub const U32_LEN: usize = std::mem::size_of::<u32>();
|
||||||
|
|
||||||
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
|
#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, serde::Serialize, serde::Deserialize)]
|
||||||
pub struct BlobHash([u8; BLOB_HASH_LEN]);
|
pub struct BlobHash([u8; BLOB_HASH_LEN]);
|
||||||
|
@ -158,6 +153,7 @@ pub const SUBSPACE_VALUES: u8 = b'v';
|
||||||
pub const SUBSPACE_LOGS: u8 = b'l';
|
pub const SUBSPACE_LOGS: u8 = b'l';
|
||||||
pub const SUBSPACE_INDEXES: u8 = b'i';
|
pub const SUBSPACE_INDEXES: u8 = b'i';
|
||||||
pub const SUBSPACE_BLOBS: u8 = b'o';
|
pub const SUBSPACE_BLOBS: u8 = b'o';
|
||||||
|
pub const SUBSPACE_BLOB_DATA: u8 = b't';
|
||||||
pub const SUBSPACE_ACLS: u8 = b'a';
|
pub const SUBSPACE_ACLS: u8 = b'a';
|
||||||
pub const SUBSPACE_COUNTERS: u8 = b'c';
|
pub const SUBSPACE_COUNTERS: u8 = b'c';
|
||||||
|
|
||||||
|
@ -179,6 +175,13 @@ pub enum Store {
|
||||||
pub enum BlobStore {
|
pub enum BlobStore {
|
||||||
Fs(Arc<FsStore>),
|
Fs(Arc<FsStore>),
|
||||||
S3(Arc<S3Store>),
|
S3(Arc<S3Store>),
|
||||||
|
Sqlite(Arc<SqliteStore>),
|
||||||
|
FoundationDb(Arc<FdbStore>),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum FtsStore {
|
||||||
|
Store(Store),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<SqliteStore> for Store {
|
impl From<SqliteStore> for Store {
|
||||||
|
@ -204,3 +207,9 @@ impl From<S3Store> for BlobStore {
|
||||||
Self::S3(Arc::new(store))
|
Self::S3(Arc::new(store))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<Store> for FtsStore {
|
||||||
|
fn from(store: Store) -> Self {
|
||||||
|
Self::Store(store)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign};
|
use std::ops::{BitAndAssign, BitOrAssign, BitXorAssign};
|
||||||
|
|
||||||
use ahash::HashSet;
|
use ahash::HashSet;
|
||||||
use nlp::tokenizers::space::SpaceTokenizer;
|
use nlp::tokenizers::word::WordTokenizer;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{backend::MAX_TOKEN_LENGTH, BitmapKey, Store};
|
use crate::{backend::MAX_TOKEN_LENGTH, BitmapKey, Store};
|
||||||
|
@ -32,8 +32,8 @@ use crate::{backend::MAX_TOKEN_LENGTH, BitmapKey, Store};
|
||||||
use super::{Filter, ResultSet};
|
use super::{Filter, ResultSet};
|
||||||
|
|
||||||
struct State {
|
struct State {
|
||||||
op: Filter,
|
pub op: Filter,
|
||||||
bm: Option<RoaringBitmap>,
|
pub bm: Option<RoaringBitmap>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Store {
|
impl Store {
|
||||||
|
@ -44,8 +44,6 @@ impl Store {
|
||||||
filters: Vec<Filter>,
|
filters: Vec<Filter>,
|
||||||
) -> crate::Result<ResultSet> {
|
) -> crate::Result<ResultSet> {
|
||||||
let collection = collection.into();
|
let collection = collection.into();
|
||||||
let mut not_mask = RoaringBitmap::new();
|
|
||||||
let mut not_fetch = false;
|
|
||||||
if filters.is_empty() {
|
if filters.is_empty() {
|
||||||
return Ok(ResultSet {
|
return Ok(ResultSet {
|
||||||
account_id,
|
account_id,
|
||||||
|
@ -61,10 +59,13 @@ impl Store {
|
||||||
let mut stack = Vec::new();
|
let mut stack = Vec::new();
|
||||||
let mut filters = filters.into_iter().peekable();
|
let mut filters = filters.into_iter().peekable();
|
||||||
|
|
||||||
|
let mut not_mask = RoaringBitmap::new();
|
||||||
|
let mut not_fetch = false;
|
||||||
|
|
||||||
while let Some(filter) = filters.next() {
|
while let Some(filter) = filters.next() {
|
||||||
let result = match filter {
|
let mut result = match filter {
|
||||||
Filter::MatchValue { field, op, value } => {
|
Filter::MatchValue { field, op, value } => {
|
||||||
self.range_to_bitmap(account_id, collection, field, value, op)
|
self.range_to_bitmap(account_id, collection, field, &value, op)
|
||||||
.await?
|
.await?
|
||||||
}
|
}
|
||||||
Filter::HasText {
|
Filter::HasText {
|
||||||
|
@ -74,7 +75,8 @@ impl Store {
|
||||||
} => {
|
} => {
|
||||||
if tokenize {
|
if tokenize {
|
||||||
self.get_bitmaps_intersection(
|
self.get_bitmaps_intersection(
|
||||||
SpaceTokenizer::new(&text, MAX_TOKEN_LENGTH)
|
WordTokenizer::new(&text, MAX_TOKEN_LENGTH)
|
||||||
|
.map(|token| token.word.into_owned())
|
||||||
.collect::<HashSet<String>>()
|
.collect::<HashSet<String>>()
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|word| {
|
.map(|word| {
|
||||||
|
@ -114,6 +116,7 @@ impl Store {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Only fetch not mask if we need it
|
||||||
if matches!(state.op, Filter::Not) && !not_fetch {
|
if matches!(state.op, Filter::Not) && !not_fetch {
|
||||||
not_mask = self
|
not_mask = self
|
||||||
.get_bitmap(BitmapKey::document_ids(account_id, collection))
|
.get_bitmap(BitmapKey::document_ids(account_id, collection))
|
||||||
|
@ -122,8 +125,41 @@ impl Store {
|
||||||
not_fetch = true;
|
not_fetch = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
state.op.apply(&mut state.bm, result, ¬_mask);
|
// Apply logical operation
|
||||||
|
if let Some(dest) = &mut state.bm {
|
||||||
|
match state.op {
|
||||||
|
Filter::And => {
|
||||||
|
if let Some(result) = result {
|
||||||
|
dest.bitand_assign(result);
|
||||||
|
} else {
|
||||||
|
dest.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Filter::Or => {
|
||||||
|
if let Some(result) = result {
|
||||||
|
dest.bitor_assign(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Filter::Not => {
|
||||||
|
if let Some(mut result) = result {
|
||||||
|
result.bitxor_assign(¬_mask);
|
||||||
|
dest.bitand_assign(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
}
|
||||||
|
} else if let Some(ref mut result_) = result {
|
||||||
|
if let Filter::Not = state.op {
|
||||||
|
result_.bitxor_assign(¬_mask);
|
||||||
|
}
|
||||||
|
state.bm = result;
|
||||||
|
} else if let Filter::Not = state.op {
|
||||||
|
state.bm = Some(not_mask.clone());
|
||||||
|
} else {
|
||||||
|
state.bm = Some(RoaringBitmap::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
// And short-circuit
|
||||||
if matches!(state.op, Filter::And) && state.bm.as_ref().unwrap().is_empty() {
|
if matches!(state.op, Filter::And) && state.bm.as_ref().unwrap().is_empty() {
|
||||||
while let Some(filter) = filters.peek() {
|
while let Some(filter) = filters.peek() {
|
||||||
if matches!(filter, Filter::End) {
|
if matches!(filter, Filter::End) {
|
||||||
|
@ -143,49 +179,6 @@ impl Store {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Filter {
|
|
||||||
#[inline(always)]
|
|
||||||
pub fn apply(
|
|
||||||
&self,
|
|
||||||
dest: &mut Option<RoaringBitmap>,
|
|
||||||
mut src: Option<RoaringBitmap>,
|
|
||||||
not_mask: &RoaringBitmap,
|
|
||||||
) {
|
|
||||||
if let Some(dest) = dest {
|
|
||||||
match self {
|
|
||||||
Filter::And => {
|
|
||||||
if let Some(src) = src {
|
|
||||||
dest.bitand_assign(src);
|
|
||||||
} else {
|
|
||||||
dest.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Filter::Or => {
|
|
||||||
if let Some(src) = src {
|
|
||||||
dest.bitor_assign(src);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Filter::Not => {
|
|
||||||
if let Some(mut src) = src {
|
|
||||||
src.bitxor_assign(not_mask);
|
|
||||||
dest.bitand_assign(src);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => unreachable!(),
|
|
||||||
}
|
|
||||||
} else if let Some(ref mut src_) = src {
|
|
||||||
if let Filter::Not = self {
|
|
||||||
src_.bitxor_assign(not_mask);
|
|
||||||
}
|
|
||||||
*dest = src;
|
|
||||||
} else if let Filter::Not = self {
|
|
||||||
*dest = Some(not_mask.clone());
|
|
||||||
} else {
|
|
||||||
*dest = Some(RoaringBitmap::new());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<Filter> for State {
|
impl From<Filter> for State {
|
||||||
fn from(value: Filter) -> Self {
|
fn from(value: Filter) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
|
|
@ -130,12 +130,12 @@ impl Store {
|
||||||
let from_key = LogKey {
|
let from_key = LogKey {
|
||||||
account_id,
|
account_id,
|
||||||
collection,
|
collection,
|
||||||
change_id: u64::MAX,
|
change_id: 0,
|
||||||
};
|
};
|
||||||
let to_key = LogKey {
|
let to_key = LogKey {
|
||||||
account_id,
|
account_id,
|
||||||
collection,
|
collection,
|
||||||
change_id: 0,
|
change_id: u64::MAX,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut last_change_id = None;
|
let mut last_change_id = None;
|
||||||
|
|
|
@ -29,7 +29,7 @@ pub mod sort;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
write::{BitmapClass, TagValue},
|
write::{BitmapClass, BitmapHash, TagValue},
|
||||||
BitmapKey, IterateParams, Key, Serialize,
|
BitmapKey, IterateParams, Key, Serialize,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -144,48 +144,6 @@ impl Filter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*pub fn has_text_detect(
|
|
||||||
field: impl Into<u8>,
|
|
||||||
text: impl Into<String>,
|
|
||||||
default_language: Language,
|
|
||||||
) -> Self {
|
|
||||||
let (text, language) = Language::detect(text.into(), default_language);
|
|
||||||
Self::has_text(field, text, language)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn has_text(field: impl Into<u8>, text: impl Into<String>, language: Language) -> Self {
|
|
||||||
let text = text.into();
|
|
||||||
let op = if !matches!(language, Language::None) {
|
|
||||||
if (text.starts_with('"') && text.ends_with('"'))
|
|
||||||
|| (text.starts_with('\'') && text.ends_with('\''))
|
|
||||||
{
|
|
||||||
TextMatch::Exact(language)
|
|
||||||
} else {
|
|
||||||
TextMatch::Stemmed(language)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
TextMatch::Tokenized
|
|
||||||
};
|
|
||||||
|
|
||||||
Filter::HasText {
|
|
||||||
field: field.into(),
|
|
||||||
text,
|
|
||||||
op,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn has_raw_text(field: impl Into<u8>, text: impl Into<String>) -> Self {
|
|
||||||
Filter::HasText {
|
|
||||||
field: field.into(),
|
|
||||||
text: text.into(),
|
|
||||||
op: TextMatch::Raw,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn has_english_text(field: impl Into<u8>, text: impl Into<String>) -> Self {
|
|
||||||
Self::has_text(field, text, Language::English)
|
|
||||||
}*/
|
|
||||||
|
|
||||||
pub fn has_text(field: impl Into<u8>, text: impl Into<String>) -> Self {
|
pub fn has_text(field: impl Into<u8>, text: impl Into<String>) -> Self {
|
||||||
Filter::HasText {
|
Filter::HasText {
|
||||||
field: field.into(),
|
field: field.into(),
|
||||||
|
@ -255,14 +213,14 @@ impl BitmapKey<BitmapClass> {
|
||||||
account_id: u32,
|
account_id: u32,
|
||||||
collection: impl Into<u8>,
|
collection: impl Into<u8>,
|
||||||
field: impl Into<u8>,
|
field: impl Into<u8>,
|
||||||
token: impl Into<Vec<u8>>,
|
token: impl AsRef<[u8]>,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
BitmapKey {
|
BitmapKey {
|
||||||
account_id,
|
account_id,
|
||||||
collection: collection.into(),
|
collection: collection.into(),
|
||||||
class: BitmapClass::Text {
|
class: BitmapClass::Text {
|
||||||
field: field.into(),
|
field: field.into(),
|
||||||
token: token.into(),
|
token: BitmapHash::new(token),
|
||||||
},
|
},
|
||||||
block_num: 0,
|
block_num: 0,
|
||||||
}
|
}
|
||||||
|
@ -317,20 +275,3 @@ impl<T: Key> IterateParams<T> {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct RawValue<T: Deserialize> {
|
|
||||||
pub raw: Vec<u8>,
|
|
||||||
pub inner: T,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: Deserialize> Deserialize for RawValue<T> {
|
|
||||||
fn deserialize(bytes: &[u8]) -> crate::Result<Self> {
|
|
||||||
Ok(RawValue {
|
|
||||||
inner: T::deserialize(bytes)?,
|
|
||||||
raw: bytes.to_vec(),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
|
@ -160,10 +160,10 @@ impl BatchBuilder {
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set(&mut self, class: impl Into<ValueClass>, value: Vec<u8>) -> &mut Self {
|
pub fn set(&mut self, class: impl Into<ValueClass>, value: impl Into<Vec<u8>>) -> &mut Self {
|
||||||
self.ops.push(Operation::Value {
|
self.ops.push(Operation::Value {
|
||||||
class: class.into(),
|
class: class.into(),
|
||||||
op: ValueOp::Set(value),
|
op: ValueOp::Set(value.into()),
|
||||||
});
|
});
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
158
crates/store/src/write/hash.rs
Normal file
158
crates/store/src/write/hash.rs
Normal file
|
@ -0,0 +1,158 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023 Stalwart Labs Ltd.
|
||||||
|
*
|
||||||
|
* This file is part of the Stalwart Mail Server.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of
|
||||||
|
* the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
* in the LICENSE file at the top-level directory of this distribution.
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* You can be released from the requirements of the AGPLv3 license by
|
||||||
|
* purchasing a commercial license. Please contact licensing@stalw.art
|
||||||
|
* for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use crate::backend::MAX_TOKEN_LENGTH;
|
||||||
|
|
||||||
|
use super::{BitmapClass, BitmapHash};
|
||||||
|
|
||||||
|
impl BitmapClass {
|
||||||
|
pub fn word(token: impl AsRef<[u8]>, field: impl Into<u8>) -> Self {
|
||||||
|
BitmapClass::Text {
|
||||||
|
field: field.into(),
|
||||||
|
token: BitmapHash::new(token),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn stemmed(token: impl AsRef<[u8]>, field: impl Into<u8>) -> Self {
|
||||||
|
BitmapClass::Text {
|
||||||
|
field: field.into() | 1 << 6,
|
||||||
|
token: BitmapHash::new(token),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bigram(token: impl AsRef<[u8]>, field: impl Into<u8>) -> Self {
|
||||||
|
BitmapClass::Text {
|
||||||
|
field: field.into() | 1 << 7,
|
||||||
|
token: BitmapHash::new(token),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BitmapHash {
|
||||||
|
pub fn new(item: impl AsRef<[u8]>) -> Self {
|
||||||
|
Self {
|
||||||
|
len: std::cmp::min(item.as_ref().len(), MAX_TOKEN_LENGTH) as u8,
|
||||||
|
hash: hash(item),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn to_u64(&self) -> u64 {
|
||||||
|
u64::from_be_bytes(self.hash)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hash(item: impl AsRef<[u8]>) -> [u8; 8] {
|
||||||
|
let item = item.as_ref();
|
||||||
|
let mut result = [0u8; 8];
|
||||||
|
|
||||||
|
if item.len() <= 8 {
|
||||||
|
result[..item.len()].copy_from_slice(item);
|
||||||
|
} else {
|
||||||
|
result[..4].copy_from_slice(&xxhash_rust::xxh3::xxh3_64(item).to_le_bytes()[..4]);
|
||||||
|
result[4..8].copy_from_slice(&farmhash::hash64(item).to_le_bytes()[..4]);
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
|
||||||
|
pub struct TokenType {}
|
||||||
|
|
||||||
|
impl TokenType {
|
||||||
|
pub fn word(field: u8) -> u8 {
|
||||||
|
field
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn stemmed(field: u8) -> u8 {
|
||||||
|
1 << 6 | field
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn bigram(field: u8) -> u8 {
|
||||||
|
1 << 7 | field
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
|
||||||
|
0xaf1f2242106c64b3,
|
||||||
|
0x60ca4cfb4b3ed0ce,
|
||||||
|
0xc7dbc0bb615e82b3,
|
||||||
|
0x520ad065378daf88,
|
||||||
|
);
|
||||||
|
lazy_static::lazy_static! {
|
||||||
|
static ref SIPHASHER: siphasher::sip::SipHasher13 =
|
||||||
|
siphasher::sip::SipHasher13::new_with_keys(0x56205cbdba8f02a6, 0xbd0dbc4bb06d687b);
|
||||||
|
}
|
||||||
|
|
||||||
|
let h1 = xxhash_rust::xxh3::xxh3_64(item).to_le_bytes();
|
||||||
|
let h2 = farmhash::hash64(item).to_le_bytes();
|
||||||
|
let h3 = AHASHER.hash_one(item).to_le_bytes();
|
||||||
|
let mut sh = *SIPHASHER;
|
||||||
|
sh.write(item.as_ref());
|
||||||
|
let h4 = sh.finish().to_le_bytes();
|
||||||
|
|
||||||
|
result[..2].copy_from_slice(&h1[..2]);
|
||||||
|
result[2..4].copy_from_slice(&h2[..2]);
|
||||||
|
result[4..6].copy_from_slice(&h3[..2]);
|
||||||
|
result[6..8].copy_from_slice(&h4[..2]);
|
||||||
|
|
||||||
|
impl KeySerializer {
|
||||||
|
pub fn hash_text(mut self, item: impl AsRef<[u8]>) -> Self {
|
||||||
|
let item = item.as_ref();
|
||||||
|
|
||||||
|
if item.len() <= 8 {
|
||||||
|
self.buf.extend_from_slice(item);
|
||||||
|
} else {
|
||||||
|
let h1 = xxhash_rust::xxh3::xxh3_64(item).to_le_bytes();
|
||||||
|
let h2 = farmhash::hash64(item).to_le_bytes();
|
||||||
|
let h3 = AHASHER.hash_one(item).to_le_bytes();
|
||||||
|
let mut sh = *SIPHASHER;
|
||||||
|
sh.write(item.as_ref());
|
||||||
|
let h4 = sh.finish().to_le_bytes();
|
||||||
|
|
||||||
|
match item.len() {
|
||||||
|
9..=16 => {
|
||||||
|
self.buf.extend_from_slice(&h1[..2]);
|
||||||
|
self.buf.extend_from_slice(&h2[..2]);
|
||||||
|
self.buf.extend_from_slice(&h3[..2]);
|
||||||
|
self.buf.extend_from_slice(&h4[..2]);
|
||||||
|
}
|
||||||
|
17..=32 => {
|
||||||
|
self.buf.extend_from_slice(&h1[..3]);
|
||||||
|
self.buf.extend_from_slice(&h2[..3]);
|
||||||
|
self.buf.extend_from_slice(&h3[..3]);
|
||||||
|
self.buf.extend_from_slice(&h4[..3]);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
self.buf.extend_from_slice(&h1[..4]);
|
||||||
|
self.buf.extend_from_slice(&h2[..4]);
|
||||||
|
self.buf.extend_from_slice(&h3[..4]);
|
||||||
|
self.buf.extend_from_slice(&h4[..4]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
|
@ -21,19 +21,19 @@
|
||||||
* for more details.
|
* for more details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::{convert::TryInto, hash::Hasher};
|
use std::convert::TryInto;
|
||||||
use utils::codec::leb128::Leb128_;
|
use utils::codec::leb128::Leb128_;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
backend::MAX_TOKEN_MASK, BitmapKey, BlobHash, BlobKey, IndexKey, IndexKeyPrefix, Key, LogKey,
|
BitmapKey, BlobHash, BlobKey, IndexKey, IndexKeyPrefix, Key, LogKey, ValueKey, BLOB_HASH_LEN,
|
||||||
ValueKey, BLOB_HASH_LEN, SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_INDEXES, SUBSPACE_LOGS,
|
SUBSPACE_ACLS, SUBSPACE_BITMAPS, SUBSPACE_INDEXES, SUBSPACE_LOGS, SUBSPACE_VALUES, U32_LEN,
|
||||||
SUBSPACE_VALUES, U32_LEN, U64_LEN,
|
U64_LEN,
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{BitmapClass, BlobOp, TagValue, ValueClass};
|
use super::{BitmapClass, BlobOp, TagValue, ValueClass};
|
||||||
|
|
||||||
pub struct KeySerializer {
|
pub struct KeySerializer {
|
||||||
buf: Vec<u8>,
|
pub buf: Vec<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait KeySerialize {
|
pub trait KeySerialize {
|
||||||
|
@ -241,6 +241,15 @@ impl<T: AsRef<ValueClass> + Sync + Send> Key for ValueKey<T> {
|
||||||
}
|
}
|
||||||
.write(u32::MAX)
|
.write(u32::MAX)
|
||||||
.write(name.as_slice()),
|
.write(name.as_slice()),
|
||||||
|
ValueClass::TermIndex => if include_subspace {
|
||||||
|
KeySerializer::new(U32_LEN * 2 + 3).write(crate::SUBSPACE_VALUES)
|
||||||
|
} else {
|
||||||
|
KeySerializer::new(U32_LEN * 2 + 2)
|
||||||
|
}
|
||||||
|
.write(self.account_id)
|
||||||
|
.write(self.collection)
|
||||||
|
.write_leb128(self.document_id)
|
||||||
|
.write(u8::MAX),
|
||||||
}
|
}
|
||||||
.finalize()
|
.finalize()
|
||||||
}
|
}
|
||||||
|
@ -277,35 +286,64 @@ impl<T: AsRef<BitmapClass> + Sync + Send> Key for BitmapKey<T> {
|
||||||
|
|
||||||
fn serialize(&self, include_subspace: bool) -> Vec<u8> {
|
fn serialize(&self, include_subspace: bool) -> Vec<u8> {
|
||||||
const BM_DOCUMENT_IDS: u8 = 0;
|
const BM_DOCUMENT_IDS: u8 = 0;
|
||||||
const BM_TAG: u8 = 1 << 5;
|
const BM_TAG: u8 = 1 << 6;
|
||||||
const BM_TEXT: u8 = 1 << 6;
|
const BM_TEXT: u8 = 1 << 7;
|
||||||
|
|
||||||
const TAG_ID: u8 = 0;
|
const TAG_ID: u8 = 0;
|
||||||
const TAG_TEXT: u8 = 1 << 0;
|
const TAG_TEXT: u8 = 1 << 0;
|
||||||
const TAG_STATIC: u8 = 1 << 1;
|
const TAG_STATIC: u8 = 1 << 1;
|
||||||
|
|
||||||
let ks = if include_subspace {
|
|
||||||
KeySerializer::new(self.len() + 1).write(crate::SUBSPACE_BITMAPS)
|
|
||||||
} else {
|
|
||||||
KeySerializer::new(self.len())
|
|
||||||
}
|
|
||||||
.write(self.account_id)
|
|
||||||
.write(self.collection);
|
|
||||||
|
|
||||||
match self.class.as_ref() {
|
match self.class.as_ref() {
|
||||||
BitmapClass::DocumentIds => ks.write(BM_DOCUMENT_IDS),
|
BitmapClass::DocumentIds => if include_subspace {
|
||||||
|
KeySerializer::new(U32_LEN + 3).write(SUBSPACE_BITMAPS)
|
||||||
|
} else {
|
||||||
|
KeySerializer::new(U32_LEN + 2)
|
||||||
|
}
|
||||||
|
.write(self.account_id)
|
||||||
|
.write(self.collection)
|
||||||
|
.write(BM_DOCUMENT_IDS),
|
||||||
BitmapClass::Tag { field, value } => match value {
|
BitmapClass::Tag { field, value } => match value {
|
||||||
TagValue::Id(id) => ks.write(BM_TAG | TAG_ID).write(*field).write_leb128(*id),
|
TagValue::Id(id) => if include_subspace {
|
||||||
TagValue::Text(text) => ks
|
KeySerializer::new((U32_LEN * 2) + 4).write(SUBSPACE_BITMAPS)
|
||||||
.write(BM_TAG | TAG_TEXT)
|
} else {
|
||||||
.write(*field)
|
KeySerializer::new((U32_LEN * 2) + 3)
|
||||||
.write(text.as_slice()),
|
}
|
||||||
TagValue::Static(id) => ks.write(BM_TAG | TAG_STATIC).write(*field).write(*id),
|
.write(self.account_id)
|
||||||
},
|
.write(self.collection)
|
||||||
BitmapClass::Text { field, token } => ks
|
.write(BM_TAG | TAG_ID)
|
||||||
.write(BM_TEXT | (token.len() & MAX_TOKEN_MASK) as u8)
|
|
||||||
.write(*field)
|
.write(*field)
|
||||||
.hash_text(token),
|
.write_leb128(*id),
|
||||||
|
TagValue::Text(text) => if include_subspace {
|
||||||
|
KeySerializer::new(U32_LEN + 4 + text.len()).write(SUBSPACE_BITMAPS)
|
||||||
|
} else {
|
||||||
|
KeySerializer::new(U32_LEN + 3 + text.len())
|
||||||
|
}
|
||||||
|
.write(self.account_id)
|
||||||
|
.write(self.collection)
|
||||||
|
.write(BM_TAG | TAG_TEXT)
|
||||||
|
.write(*field)
|
||||||
|
.write(text.as_slice()),
|
||||||
|
TagValue::Static(id) => if include_subspace {
|
||||||
|
KeySerializer::new(U32_LEN + 5).write(SUBSPACE_BITMAPS)
|
||||||
|
} else {
|
||||||
|
KeySerializer::new(U32_LEN + 4)
|
||||||
|
}
|
||||||
|
.write(self.account_id)
|
||||||
|
.write(self.collection)
|
||||||
|
.write(BM_TAG | TAG_STATIC)
|
||||||
|
.write(*field)
|
||||||
|
.write(*id),
|
||||||
|
},
|
||||||
|
BitmapClass::Text { field, token } => if include_subspace {
|
||||||
|
KeySerializer::new(U32_LEN + 16 + 3 + 1).write(SUBSPACE_BITMAPS)
|
||||||
|
} else {
|
||||||
|
KeySerializer::new(U32_LEN + 16 + 3)
|
||||||
|
}
|
||||||
|
.write(self.account_id)
|
||||||
|
.write(self.collection)
|
||||||
|
.write(BM_TEXT | token.len)
|
||||||
|
.write(*field)
|
||||||
|
.write(token.hash.as_slice()),
|
||||||
}
|
}
|
||||||
.write(self.block_num)
|
.write(self.block_num)
|
||||||
.finalize()
|
.finalize()
|
||||||
|
@ -349,81 +387,3 @@ impl<T: AsRef<BlobHash> + Sync + Send> Key for BlobKey<T> {
|
||||||
crate::SUBSPACE_BLOBS
|
crate::SUBSPACE_BLOBS
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const AHASHER: ahash::RandomState = ahash::RandomState::with_seeds(
|
|
||||||
0xaf1f2242106c64b3,
|
|
||||||
0x60ca4cfb4b3ed0ce,
|
|
||||||
0xc7dbc0bb615e82b3,
|
|
||||||
0x520ad065378daf88,
|
|
||||||
);
|
|
||||||
lazy_static::lazy_static! {
|
|
||||||
static ref SIPHASHER: siphasher::sip::SipHasher13 =
|
|
||||||
siphasher::sip::SipHasher13::new_with_keys(0x56205cbdba8f02a6, 0xbd0dbc4bb06d687b);
|
|
||||||
}
|
|
||||||
|
|
||||||
impl KeySerializer {
|
|
||||||
fn hash_text(mut self, item: impl AsRef<[u8]>) -> Self {
|
|
||||||
let item = item.as_ref();
|
|
||||||
|
|
||||||
if item.len() <= 8 {
|
|
||||||
self.buf.extend_from_slice(item);
|
|
||||||
} else {
|
|
||||||
let h1 = xxhash_rust::xxh3::xxh3_64(item).to_le_bytes();
|
|
||||||
let h2 = farmhash::hash64(item).to_le_bytes();
|
|
||||||
let h3 = AHASHER.hash_one(item).to_le_bytes();
|
|
||||||
let mut sh = *SIPHASHER;
|
|
||||||
sh.write(item.as_ref());
|
|
||||||
let h4 = sh.finish().to_le_bytes();
|
|
||||||
|
|
||||||
match item.len() {
|
|
||||||
9..=16 => {
|
|
||||||
self.buf.extend_from_slice(&h1[..2]);
|
|
||||||
self.buf.extend_from_slice(&h2[..2]);
|
|
||||||
self.buf.extend_from_slice(&h3[..2]);
|
|
||||||
self.buf.extend_from_slice(&h4[..2]);
|
|
||||||
}
|
|
||||||
17..=32 => {
|
|
||||||
self.buf.extend_from_slice(&h1[..3]);
|
|
||||||
self.buf.extend_from_slice(&h2[..3]);
|
|
||||||
self.buf.extend_from_slice(&h3[..3]);
|
|
||||||
self.buf.extend_from_slice(&h4[..3]);
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
self.buf.extend_from_slice(&h1[..4]);
|
|
||||||
self.buf.extend_from_slice(&h2[..4]);
|
|
||||||
self.buf.extend_from_slice(&h3[..4]);
|
|
||||||
self.buf.extend_from_slice(&h4[..4]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: AsRef<BitmapClass>> BitmapKey<T> {
|
|
||||||
#[allow(clippy::len_without_is_empty)]
|
|
||||||
pub fn len(&self) -> usize {
|
|
||||||
std::mem::size_of::<BitmapKey<BitmapClass>>()
|
|
||||||
+ match self.class.as_ref() {
|
|
||||||
BitmapClass::DocumentIds => 0,
|
|
||||||
BitmapClass::Tag { value, .. } => match value {
|
|
||||||
TagValue::Id(_) => U32_LEN,
|
|
||||||
TagValue::Text(v) => v.len(),
|
|
||||||
TagValue::Static(_) => 1,
|
|
||||||
},
|
|
||||||
BitmapClass::Text { token, .. } => token.len(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T: AsRef<ValueClass>> ValueKey<T> {
|
|
||||||
#[allow(clippy::len_without_is_empty)]
|
|
||||||
pub fn len(&self) -> usize {
|
|
||||||
std::mem::size_of::<ValueKey<ValueClass>>()
|
|
||||||
+ match self.class.as_ref() {
|
|
||||||
ValueClass::Property(_) => 1,
|
|
||||||
ValueClass::Acl(_) => U32_LEN,
|
|
||||||
ValueClass::Named(v) => v.len(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
use std::{collections::HashSet, hash::Hash, slice::Iter, time::SystemTime};
|
use std::{collections::HashSet, hash::Hash, slice::Iter, time::SystemTime};
|
||||||
|
|
||||||
use nlp::tokenizers::space::SpaceTokenizer;
|
use nlp::tokenizers::word::WordTokenizer;
|
||||||
use utils::codec::leb128::{Leb128Iterator, Leb128Vec};
|
use utils::codec::leb128::{Leb128Iterator, Leb128Vec};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
@ -35,6 +35,7 @@ use self::assert::AssertValue;
|
||||||
pub mod assert;
|
pub mod assert;
|
||||||
pub mod batch;
|
pub mod batch;
|
||||||
pub mod blob;
|
pub mod blob;
|
||||||
|
pub mod hash;
|
||||||
pub mod key;
|
pub mod key;
|
||||||
pub mod log;
|
pub mod log;
|
||||||
|
|
||||||
|
@ -92,14 +93,20 @@ pub enum Operation {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
pub enum BitmapClass {
|
pub enum BitmapClass {
|
||||||
DocumentIds,
|
DocumentIds,
|
||||||
Tag { field: u8, value: TagValue },
|
Tag { field: u8, value: TagValue },
|
||||||
Text { field: u8, token: Vec<u8> },
|
Text { field: u8, token: BitmapHash },
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct BitmapHash {
|
||||||
|
pub hash: [u8; 8],
|
||||||
|
pub len: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
pub enum TagValue {
|
pub enum TagValue {
|
||||||
Id(u32),
|
Id(u32),
|
||||||
Text(Vec<u8>),
|
Text(Vec<u8>),
|
||||||
|
@ -111,6 +118,7 @@ pub enum ValueClass {
|
||||||
Property(u8),
|
Property(u8),
|
||||||
Acl(u32),
|
Acl(u32),
|
||||||
Named(Vec<u8>),
|
Named(Vec<u8>),
|
||||||
|
TermIndex,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, Hash, Default)]
|
#[derive(Debug, PartialEq, Eq, Hash, Default)]
|
||||||
|
@ -352,7 +360,7 @@ impl ToBitmaps for &str {
|
||||||
ops.push(Operation::Bitmap {
|
ops.push(Operation::Bitmap {
|
||||||
class: BitmapClass::Text {
|
class: BitmapClass::Text {
|
||||||
field,
|
field,
|
||||||
token: token.into_bytes(),
|
token: BitmapHash::new(token),
|
||||||
},
|
},
|
||||||
set,
|
set,
|
||||||
});
|
});
|
||||||
|
@ -362,8 +370,8 @@ impl ToBitmaps for &str {
|
||||||
|
|
||||||
impl TokenizeText for &str {
|
impl TokenizeText for &str {
|
||||||
fn tokenize_into(&self, tokens: &mut HashSet<String>) {
|
fn tokenize_into(&self, tokens: &mut HashSet<String>) {
|
||||||
for token in SpaceTokenizer::new(self, MAX_TOKEN_LENGTH) {
|
for token in WordTokenizer::new(self, MAX_TOKEN_LENGTH) {
|
||||||
tokens.insert(token);
|
tokens.insert(token.word.into_owned());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -479,6 +487,10 @@ impl BlobHash {
|
||||||
pub fn try_from_hash_slice(value: &[u8]) -> Result<BlobHash, std::array::TryFromSliceError> {
|
pub fn try_from_hash_slice(value: &[u8]) -> Result<BlobHash, std::array::TryFromSliceError> {
|
||||||
value.try_into().map(BlobHash)
|
value.try_into().map(BlobHash)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn as_slice(&self) -> &[u8] {
|
||||||
|
self.0.as_ref()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<&[u8]> for BlobHash {
|
impl From<&[u8]> for BlobHash {
|
||||||
|
@ -523,6 +535,12 @@ impl AsRef<BlobClass> for BlobClass {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<BlobHash> for Vec<u8> {
|
||||||
|
fn from(value: BlobHash) -> Self {
|
||||||
|
value.0.to_vec()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl BlobClass {
|
impl BlobClass {
|
||||||
pub fn account_id(&self) -> u32 {
|
pub fn account_id(&self) -> u32 {
|
||||||
match self {
|
match self {
|
||||||
|
|
|
@ -23,6 +23,7 @@ opentelemetry-semantic-conventions = { version = "0.12.0" }
|
||||||
dashmap = "5.4"
|
dashmap = "5.4"
|
||||||
ahash = { version = "0.8" }
|
ahash = { version = "0.8" }
|
||||||
chrono = "0.4"
|
chrono = "0.4"
|
||||||
|
rand = "0.8.5"
|
||||||
|
|
||||||
[target.'cfg(unix)'.dependencies]
|
[target.'cfg(unix)'.dependencies]
|
||||||
privdrop = "0.5.3"
|
privdrop = "0.5.3"
|
||||||
|
|
|
@ -30,6 +30,7 @@ pub mod config;
|
||||||
pub mod ipc;
|
pub mod ipc;
|
||||||
pub mod listener;
|
pub mod listener;
|
||||||
pub mod map;
|
pub mod map;
|
||||||
|
pub mod snowflake;
|
||||||
pub mod suffixlist;
|
pub mod suffixlist;
|
||||||
|
|
||||||
use opentelemetry::{
|
use opentelemetry::{
|
||||||
|
|
69
crates/utils/src/snowflake.rs
Normal file
69
crates/utils/src/snowflake.rs
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023 Stalwart Labs Ltd.
|
||||||
|
*
|
||||||
|
* This file is part of Stalwart Mail Server.
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU Affero General Public License as
|
||||||
|
* published by the Free Software Foundation, either version 3 of
|
||||||
|
* the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU Affero General Public License for more details.
|
||||||
|
* in the LICENSE file at the top-level directory of this distribution.
|
||||||
|
* You should have received a copy of the GNU Affero General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*
|
||||||
|
* You can be released from the requirements of the AGPLv3 license by
|
||||||
|
* purchasing a commercial license. Please contact licensing@stalw.art
|
||||||
|
* for more details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::{
|
||||||
|
sync::atomic::{AtomicU64, Ordering},
|
||||||
|
time::{Duration, SystemTime},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct SnowflakeIdGenerator {
|
||||||
|
epoch: SystemTime,
|
||||||
|
node_id: u64,
|
||||||
|
sequence: AtomicU64,
|
||||||
|
}
|
||||||
|
|
||||||
|
const SEQUENCE_LEN: u64 = 12;
|
||||||
|
const NODE_ID_LEN: u64 = 9;
|
||||||
|
|
||||||
|
const SEQUENCE_MASK: u64 = (1 << SEQUENCE_LEN) - 1;
|
||||||
|
const NODE_ID_MASK: u64 = (1 << NODE_ID_LEN) - 1;
|
||||||
|
|
||||||
|
impl SnowflakeIdGenerator {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::with_node_id(rand::random::<u64>())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_node_id(node_id: u64) -> Self {
|
||||||
|
Self {
|
||||||
|
epoch: SystemTime::UNIX_EPOCH + Duration::from_secs(1632280000), // 52 years after UNIX_EPOCH
|
||||||
|
node_id,
|
||||||
|
sequence: 0.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn generate(&self) -> Option<u64> {
|
||||||
|
let elapsed = self.epoch.elapsed().ok()?.as_millis() as u64;
|
||||||
|
let sequence = self.sequence.fetch_add(1, Ordering::Relaxed);
|
||||||
|
|
||||||
|
(elapsed << (SEQUENCE_LEN + NODE_ID_LEN)
|
||||||
|
| (self.node_id & NODE_ID_MASK) << SEQUENCE_LEN
|
||||||
|
| (sequence & SEQUENCE_MASK))
|
||||||
|
.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for SnowflakeIdGenerator {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
|
}
|
||||||
|
}
|
|
@ -25,9 +25,11 @@ use std::{fs, io};
|
||||||
|
|
||||||
use imap_proto::ResponseType;
|
use imap_proto::ResponseType;
|
||||||
|
|
||||||
use super::{resources_dir, AssertResult, ImapConnection, Type};
|
use crate::jmap::wait_for_index;
|
||||||
|
|
||||||
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) {
|
use super::{resources_dir, AssertResult, IMAPTest, ImapConnection, Type};
|
||||||
|
|
||||||
|
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection, handle: &IMAPTest) {
|
||||||
// Invalid APPEND commands
|
// Invalid APPEND commands
|
||||||
imap.send("APPEND \"All Mail\" {1+}\r\na").await;
|
imap.send("APPEND \"All Mail\" {1+}\r\na").await;
|
||||||
imap.assert_read(Type::Tagged, ResponseType::No)
|
imap.assert_read(Type::Tagged, ResponseType::No)
|
||||||
|
@ -80,6 +82,8 @@ pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) {
|
||||||
assert_eq!(code.next(), Some(expected_uid.to_string().as_str()));
|
assert_eq!(code.next(), Some(expected_uid.to_string().as_str()));
|
||||||
expected_uid += 1;
|
expected_uid += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wait_for_index(&handle.jmap).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn assert_append_message(
|
pub async fn assert_append_message(
|
||||||
|
|
|
@ -225,7 +225,7 @@ refresh-token-renew = "2s"
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
struct IMAPTest {
|
pub struct IMAPTest {
|
||||||
jmap: Arc<JMAP>,
|
jmap: Arc<JMAP>,
|
||||||
imap: Arc<IMAP>,
|
imap: Arc<IMAP>,
|
||||||
temp_dir: TempDir,
|
temp_dir: TempDir,
|
||||||
|
@ -331,7 +331,7 @@ async fn init_imap_tests(delete_if_exists: bool) -> IMAPTest {
|
||||||
pub async fn imap_tests() {
|
pub async fn imap_tests() {
|
||||||
/*tracing::subscriber::set_global_default(
|
/*tracing::subscriber::set_global_default(
|
||||||
tracing_subscriber::FmtSubscriber::builder()
|
tracing_subscriber::FmtSubscriber::builder()
|
||||||
.with_max_level(tracing::Level::TRACE)
|
.with_max_level(tracing::Level::DEBUG)
|
||||||
.finish(),
|
.finish(),
|
||||||
)
|
)
|
||||||
.unwrap();*/
|
.unwrap();*/
|
||||||
|
@ -364,10 +364,10 @@ pub async fn imap_tests() {
|
||||||
}
|
}
|
||||||
|
|
||||||
mailbox::test(&mut imap, &mut imap_check).await;
|
mailbox::test(&mut imap, &mut imap_check).await;
|
||||||
append::test(&mut imap, &mut imap_check).await;
|
append::test(&mut imap, &mut imap_check, &handle).await;
|
||||||
search::test(&mut imap, &mut imap_check).await;
|
search::test(&mut imap, &mut imap_check).await;
|
||||||
fetch::test(&mut imap, &mut imap_check).await;
|
fetch::test(&mut imap, &mut imap_check).await;
|
||||||
store::test(&mut imap, &mut imap_check).await;
|
store::test(&mut imap, &mut imap_check, &handle).await;
|
||||||
copy_move::test(&mut imap, &mut imap_check).await;
|
copy_move::test(&mut imap, &mut imap_check).await;
|
||||||
thread::test(&mut imap, &mut imap_check).await;
|
thread::test(&mut imap, &mut imap_check).await;
|
||||||
idle::test(&mut imap, &mut imap_check).await;
|
idle::test(&mut imap, &mut imap_check).await;
|
||||||
|
|
|
@ -23,9 +23,11 @@
|
||||||
|
|
||||||
use imap_proto::ResponseType;
|
use imap_proto::ResponseType;
|
||||||
|
|
||||||
use super::{AssertResult, ImapConnection, Type};
|
use crate::jmap::wait_for_index;
|
||||||
|
|
||||||
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) {
|
use super::{AssertResult, IMAPTest, ImapConnection, Type};
|
||||||
|
|
||||||
|
pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection, handle: &IMAPTest) {
|
||||||
// Select INBOX
|
// Select INBOX
|
||||||
imap.send("SELECT INBOX").await;
|
imap.send("SELECT INBOX").await;
|
||||||
imap.assert_read(Type::Tagged, ResponseType::Ok)
|
imap.assert_read(Type::Tagged, ResponseType::Ok)
|
||||||
|
@ -73,6 +75,7 @@ pub async fn test(imap: &mut ImapConnection, _imap_check: &mut ImapConnection) {
|
||||||
.assert_contains("UIDNEXT 11");
|
.assert_contains("UIDNEXT 11");
|
||||||
|
|
||||||
// Store using saved searches
|
// Store using saved searches
|
||||||
|
wait_for_index(&handle.jmap).await;
|
||||||
imap.send("SEARCH RETURN (SAVE) FROM nathaniel").await;
|
imap.send("SEARCH RETURN (SAVE) FROM nathaniel").await;
|
||||||
imap.assert_read(Type::Tagged, ResponseType::Ok).await;
|
imap.assert_read(Type::Tagged, ResponseType::Ok).await;
|
||||||
imap.send("UID STORE $ +FLAGS (\\Answered)").await;
|
imap.send("UID STORE $ +FLAGS (\\Answered)").await;
|
||||||
|
|
|
@ -45,7 +45,7 @@ use crate::{
|
||||||
directory::sql::{
|
directory::sql::{
|
||||||
add_to_group, create_test_group_with_email, create_test_user_with_email, remove_from_group,
|
add_to_group, create_test_group_with_email, create_test_user_with_email, remove_from_group,
|
||||||
},
|
},
|
||||||
jmap::{mailbox::destroy_all_mailboxes, test_account_login},
|
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, test_account_login},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
|
@ -777,10 +777,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
admin_client.set_default_account_id(&id.to_string());
|
admin_client.set_default_account_id(&id.to_string());
|
||||||
destroy_all_mailboxes(admin_client).await;
|
destroy_all_mailboxes(admin_client).await;
|
||||||
}
|
}
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn assert_forbidden<T: Debug>(result: Result<T, jmap_client::Error>) {
|
pub fn assert_forbidden<T: Debug>(result: Result<T, jmap_client::Error>) {
|
||||||
|
|
|
@ -33,7 +33,7 @@ use jmap_proto::types::id::Id;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
directory::sql::{create_test_user_with_email, link_test_address},
|
directory::sql::{create_test_user_with_email, link_test_address},
|
||||||
jmap::mailbox::destroy_all_mailboxes,
|
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
|
@ -202,8 +202,5 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
// Destroy test accounts
|
// Destroy test accounts
|
||||||
admin_client.set_default_account_id(&account_id);
|
admin_client.set_default_account_id(&account_id);
|
||||||
destroy_all_mailboxes(admin_client).await;
|
destroy_all_mailboxes(admin_client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,10 @@ use reqwest::{header, redirect::Policy};
|
||||||
use serde::de::DeserializeOwned;
|
use serde::de::DeserializeOwned;
|
||||||
use store::ahash::AHashMap;
|
use store::ahash::AHashMap;
|
||||||
|
|
||||||
use crate::{directory::sql::create_test_user_with_email, jmap::mailbox::destroy_all_mailboxes};
|
use crate::{
|
||||||
|
directory::sql::create_test_user_with_email,
|
||||||
|
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes},
|
||||||
|
};
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
println!("Running OAuth tests...");
|
println!("Running OAuth tests...");
|
||||||
|
@ -307,10 +310,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
// Destroy test accounts
|
// Destroy test accounts
|
||||||
admin_client.set_default_account_id(john_id);
|
admin_client.set_default_account_id(john_id);
|
||||||
destroy_all_mailboxes(admin_client).await;
|
destroy_all_mailboxes(admin_client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn post_bytes(url: &str, params: &AHashMap<String, String>) -> Bytes {
|
async fn post_bytes(url: &str, params: &AHashMap<String, String>) -> Bytes {
|
||||||
|
|
|
@ -30,7 +30,7 @@ use serde_json::Value;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
directory::sql::create_test_user_with_email,
|
directory::sql::create_test_user_with_email,
|
||||||
jmap::{jmap_json_request, mailbox::destroy_all_mailboxes},
|
jmap::{assert_is_empty, jmap_json_request, mailbox::destroy_all_mailboxes},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
|
@ -489,8 +489,5 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
// Remove test data
|
// Remove test data
|
||||||
admin_client.set_default_account_id(account_id.to_string());
|
admin_client.set_default_account_id(account_id.to_string());
|
||||||
destroy_all_mailboxes(admin_client).await;
|
destroy_all_mailboxes(admin_client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -34,7 +34,7 @@ use tokio::{
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
directory::sql::{create_test_user_with_email, link_test_address, remove_test_alias},
|
directory::sql::{create_test_user_with_email, link_test_address, remove_test_alias},
|
||||||
jmap::mailbox::destroy_all_mailboxes,
|
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
|
@ -248,10 +248,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
client.set_default_account_id(account_id);
|
client.set_default_account_id(account_id);
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
}
|
}
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct SmtpConnection {
|
pub struct SmtpConnection {
|
||||||
|
|
|
@ -34,6 +34,8 @@ use store::{
|
||||||
write::{log::ChangeLogBuilder, BatchBuilder},
|
write::{log::ChangeLogBuilder, BatchBuilder},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use crate::jmap::assert_is_empty;
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
println!("Running Email Changes tests...");
|
println!("Running Email Changes tests...");
|
||||||
|
|
||||||
|
@ -315,10 +317,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
assert_eq!(created, vec![2, 3, 11, 12]);
|
assert_eq!(created, vec![2, 3, 11, 12]);
|
||||||
assert_eq!(changes.updated(), Vec::<String>::new());
|
assert_eq!(changes.updated(), Vec::<String>::new());
|
||||||
assert_eq!(changes.destroyed(), Vec::<String>::new());
|
assert_eq!(changes.destroyed(), Vec::<String>::new());
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
|
|
@ -27,7 +27,7 @@ use jmap::JMAP;
|
||||||
use jmap_client::{client::Client, mailbox::Role};
|
use jmap_client::{client::Client, mailbox::Role};
|
||||||
use jmap_proto::types::id::Id;
|
use jmap_proto::types::id::Id;
|
||||||
|
|
||||||
use crate::jmap::mailbox::destroy_all_mailboxes;
|
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
println!("Running Email Copy tests...");
|
println!("Running Email Copy tests...");
|
||||||
|
@ -116,8 +116,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
client.set_default_account_id(Id::new(2).to_string());
|
client.set_default_account_id(Id::new(2).to_string());
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ use jmap_client::{
|
||||||
use jmap_proto::types::id::Id;
|
use jmap_proto::types::id::Id;
|
||||||
use mail_parser::HeaderName;
|
use mail_parser::HeaderName;
|
||||||
|
|
||||||
use crate::jmap::{mailbox::destroy_all_mailboxes, replace_blob_ids};
|
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, replace_blob_ids};
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
println!("Running Email Get tests...");
|
println!("Running Email Get tests...");
|
||||||
|
@ -177,11 +177,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
}
|
}
|
||||||
|
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
|
assert_is_empty(server).await;
|
||||||
server
|
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn all_headers() -> Vec<email::Property> {
|
pub fn all_headers() -> Vec<email::Property> {
|
||||||
|
|
|
@ -31,7 +31,9 @@ use jmap_client::{
|
||||||
};
|
};
|
||||||
use jmap_proto::types::id::Id;
|
use jmap_proto::types::id::Id;
|
||||||
|
|
||||||
use crate::jmap::{email_get::all_headers, mailbox::destroy_all_mailboxes, replace_blob_ids};
|
use crate::jmap::{
|
||||||
|
assert_is_empty, email_get::all_headers, mailbox::destroy_all_mailboxes, replace_blob_ids,
|
||||||
|
};
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
println!("Running Email Parse tests...");
|
println!("Running Email Parse tests...");
|
||||||
|
@ -243,9 +245,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
}
|
}
|
||||||
|
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
|
assert_is_empty(server).await;
|
||||||
server
|
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
use std::{collections::hash_map::Entry, sync::Arc, time::Instant};
|
use std::{collections::hash_map::Entry, sync::Arc, time::Instant};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
jmap::mailbox::destroy_all_mailboxes,
|
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, wait_for_index},
|
||||||
store::{deflate_artwork_data, query::FIELDS},
|
store::{deflate_artwork_data, query::FIELDS},
|
||||||
};
|
};
|
||||||
use jmap::JMAP;
|
use jmap::JMAP;
|
||||||
|
@ -94,6 +94,9 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client, insert: bool) {
|
||||||
"thread {} found",
|
"thread {} found",
|
||||||
MAX_THREADS
|
MAX_THREADS
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Wait for indexing to complete
|
||||||
|
wait_for_index(&server).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("Running JMAP Mail query tests...");
|
println!("Running JMAP Mail query tests...");
|
||||||
|
@ -115,10 +118,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client, insert: bool) {
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn query(client: &mut Client) {
|
pub async fn query(client: &mut Client) {
|
||||||
|
|
|
@ -37,6 +37,7 @@ use store::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::jmap::{
|
use crate::jmap::{
|
||||||
|
assert_is_empty,
|
||||||
email_changes::{LogAction, ParseState},
|
email_changes::{LogAction, ParseState},
|
||||||
mailbox::destroy_all_mailboxes,
|
mailbox::destroy_all_mailboxes,
|
||||||
};
|
};
|
||||||
|
@ -287,10 +288,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
}
|
}
|
||||||
server.store.write(batch.build_batch()).await.unwrap();
|
server.store.write(batch.build_batch()).await.unwrap();
|
||||||
|
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
use std::{fs, path::PathBuf, sync::Arc};
|
use std::{fs, path::PathBuf, sync::Arc};
|
||||||
|
|
||||||
use crate::jmap::mailbox::destroy_all_mailboxes;
|
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, wait_for_index};
|
||||||
use jmap::{mailbox::INBOX_ID, JMAP};
|
use jmap::{mailbox::INBOX_ID, JMAP};
|
||||||
use jmap_client::{client::Client, core::query, email::query::Filter};
|
use jmap_client::{client::Client, core::query, email::query::Filter};
|
||||||
use jmap_proto::types::id::Id;
|
use jmap_proto::types::id::Id;
|
||||||
|
@ -64,6 +64,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
.take_id();
|
.take_id();
|
||||||
email_ids.insert(email_name, email_id);
|
email_ids.insert(email_name, email_id);
|
||||||
}
|
}
|
||||||
|
wait_for_index(&server).await;
|
||||||
|
|
||||||
// Run tests
|
// Run tests
|
||||||
for (filter, email_name, snippet_subject, snippet_preview) in [
|
for (filter, email_name, snippet_subject, snippet_preview) in [
|
||||||
|
@ -179,8 +180,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
|
|
||||||
// Destroy test data
|
// Destroy test data
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
use std::{fs, path::PathBuf, sync::Arc};
|
use std::{fs, path::PathBuf, sync::Arc};
|
||||||
|
|
||||||
use crate::jmap::mailbox::destroy_all_mailboxes;
|
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
|
||||||
use jmap::{mailbox::INBOX_ID, JMAP};
|
use jmap::{mailbox::INBOX_ID, JMAP};
|
||||||
use jmap_client::{
|
use jmap_client::{
|
||||||
client::Client,
|
client::Client,
|
||||||
|
@ -46,11 +46,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
update(client, &mailbox_id).await;
|
update(client, &mailbox_id).await;
|
||||||
|
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
|
assert_is_empty(server).await;
|
||||||
server
|
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create(client: &mut Client, mailbox_id: &str) {
|
async fn create(client: &mut Client, mailbox_id: &str) {
|
||||||
|
|
|
@ -46,7 +46,7 @@ use tokio::{
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
directory::sql::create_test_user_with_email,
|
directory::sql::create_test_user_with_email,
|
||||||
jmap::{email_set::assert_email_properties, mailbox::destroy_all_mailboxes},
|
jmap::{assert_is_empty, email_set::assert_email_properties, mailbox::destroy_all_mailboxes},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Default, Debug, PartialEq, Eq)]
|
#[derive(Default, Debug, PartialEq, Eq)]
|
||||||
|
@ -471,10 +471,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
client.email_submission_destroy(&id).await.unwrap();
|
client.email_submission_destroy(&id).await.unwrap();
|
||||||
}
|
}
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn spawn_mock_smtp_server() -> (mpsc::Receiver<MockMessage>, Arc<Mutex<MockSMTPSettings>>) {
|
pub fn spawn_mock_smtp_server() -> (mpsc::Receiver<MockMessage>, Arc<Mutex<MockSMTPSettings>>) {
|
||||||
|
|
|
@ -25,7 +25,10 @@ use std::{sync::Arc, time::Duration};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
directory::sql::create_test_user_with_email,
|
directory::sql::create_test_user_with_email,
|
||||||
jmap::{delivery::SmtpConnection, mailbox::destroy_all_mailboxes, test_account_login},
|
jmap::{
|
||||||
|
assert_is_empty, delivery::SmtpConnection, mailbox::destroy_all_mailboxes,
|
||||||
|
test_account_login,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
use futures::StreamExt;
|
use futures::StreamExt;
|
||||||
use jmap::{mailbox::INBOX_ID, JMAP};
|
use jmap::{mailbox::INBOX_ID, JMAP};
|
||||||
|
@ -130,10 +133,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
assert_ping(&mut event_rx).await;
|
assert_ping(&mut event_rx).await;
|
||||||
|
|
||||||
destroy_all_mailboxes(admin_client).await;
|
destroy_all_mailboxes(admin_client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn assert_state(
|
async fn assert_state(
|
||||||
|
|
|
@ -37,6 +37,8 @@ use jmap_proto::types::{id::Id, state::State};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use store::ahash::AHashMap;
|
use store::ahash::AHashMap;
|
||||||
|
|
||||||
|
use crate::jmap::assert_is_empty;
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
println!("Running Mailbox tests...");
|
println!("Running Mailbox tests...");
|
||||||
|
|
||||||
|
@ -606,10 +608,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
|
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
client.set_default_account_id(Id::from(1u64));
|
client.set_default_account_id(Id::from(1u64));
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_test_mailboxes(client: &mut Client) -> AHashMap<String, String> {
|
async fn create_test_mailboxes(client: &mut Client) -> AHashMap<String, String> {
|
||||||
|
|
|
@ -25,7 +25,11 @@ use std::{sync::Arc, time::Duration};
|
||||||
|
|
||||||
use base64::{engine::general_purpose, Engine};
|
use base64::{engine::general_purpose, Engine};
|
||||||
use directory::config::ConfigDirectory;
|
use directory::config::ConfigDirectory;
|
||||||
use jmap::{api::JmapSessionManager, services::IPC_CHANNEL_BUFFER, JMAP};
|
use jmap::{
|
||||||
|
api::JmapSessionManager,
|
||||||
|
services::{housekeeper::Event, IPC_CHANNEL_BUFFER},
|
||||||
|
JMAP,
|
||||||
|
};
|
||||||
use jmap_client::client::{Client, Credentials};
|
use jmap_client::client::{Client, Credentials};
|
||||||
use jmap_proto::types::id::Id;
|
use jmap_proto::types::id::Id;
|
||||||
use reqwest::header;
|
use reqwest::header;
|
||||||
|
@ -222,17 +226,23 @@ refresh-token-renew = "2s"
|
||||||
|
|
||||||
#[tokio::test]
|
#[tokio::test]
|
||||||
pub async fn jmap_tests() {
|
pub async fn jmap_tests() {
|
||||||
let coco = 1;
|
/*let level = "warn";
|
||||||
tracing::subscriber::set_global_default(
|
tracing::subscriber::set_global_default(
|
||||||
tracing_subscriber::FmtSubscriber::builder()
|
tracing_subscriber::FmtSubscriber::builder()
|
||||||
.with_max_level(tracing::Level::WARN)
|
.with_env_filter(
|
||||||
|
tracing_subscriber::EnvFilter::builder()
|
||||||
|
.parse(
|
||||||
|
format!("smtp={level},imap={level},jmap={level},store={level},utils={level},directory={level}"),
|
||||||
|
)
|
||||||
|
.unwrap(),
|
||||||
|
)
|
||||||
.finish(),
|
.finish(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();*/
|
||||||
|
|
||||||
let delete = true;
|
let delete = true;
|
||||||
let mut params = init_jmap_tests(delete).await;
|
let mut params = init_jmap_tests(delete).await;
|
||||||
/*email_query::test(params.server.clone(), &mut params.client, delete).await;
|
email_query::test(params.server.clone(), &mut params.client, delete).await;
|
||||||
email_get::test(params.server.clone(), &mut params.client).await;
|
email_get::test(params.server.clone(), &mut params.client).await;
|
||||||
email_set::test(params.server.clone(), &mut params.client).await;
|
email_set::test(params.server.clone(), &mut params.client).await;
|
||||||
email_parse::test(params.server.clone(), &mut params.client).await;
|
email_parse::test(params.server.clone(), &mut params.client).await;
|
||||||
|
@ -254,7 +264,7 @@ pub async fn jmap_tests() {
|
||||||
email_submission::test(params.server.clone(), &mut params.client).await;
|
email_submission::test(params.server.clone(), &mut params.client).await;
|
||||||
websocket::test(params.server.clone(), &mut params.client).await;
|
websocket::test(params.server.clone(), &mut params.client).await;
|
||||||
quota::test(params.server.clone(), &mut params.client).await;
|
quota::test(params.server.clone(), &mut params.client).await;
|
||||||
crypto::test(params.server.clone(), &mut params.client).await;*/
|
crypto::test(params.server.clone(), &mut params.client).await;
|
||||||
blob::test(params.server.clone(), &mut params.client).await;
|
blob::test(params.server.clone(), &mut params.client).await;
|
||||||
|
|
||||||
if delete {
|
if delete {
|
||||||
|
@ -285,6 +295,33 @@ struct JMAPTest {
|
||||||
shutdown_tx: watch::Sender<bool>,
|
shutdown_tx: watch::Sender<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub async fn wait_for_index(server: &JMAP) {
|
||||||
|
loop {
|
||||||
|
let (tx, rx) = tokio::sync::oneshot::channel();
|
||||||
|
server
|
||||||
|
.housekeeper_tx
|
||||||
|
.send(Event::IndexIsActive(tx))
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
if rx.await.unwrap() {
|
||||||
|
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn assert_is_empty(server: Arc<JMAP>) {
|
||||||
|
// Wait for pending FTS index tasks
|
||||||
|
wait_for_index(&server).await;
|
||||||
|
|
||||||
|
// Assert is empty
|
||||||
|
server
|
||||||
|
.store
|
||||||
|
.assert_is_empty(server.blob_store.clone())
|
||||||
|
.await;
|
||||||
|
}
|
||||||
|
|
||||||
async fn init_jmap_tests(delete_if_exists: bool) -> JMAPTest {
|
async fn init_jmap_tests(delete_if_exists: bool) -> JMAPTest {
|
||||||
// Load and parse config
|
// Load and parse config
|
||||||
let temp_dir = TempDir::new("jmap_tests", delete_if_exists);
|
let temp_dir = TempDir::new("jmap_tests", delete_if_exists);
|
||||||
|
|
|
@ -53,7 +53,7 @@ use utils::listener::SessionData;
|
||||||
use crate::{
|
use crate::{
|
||||||
add_test_certs,
|
add_test_certs,
|
||||||
directory::sql::create_test_user_with_email,
|
directory::sql::create_test_user_with_email,
|
||||||
jmap::{mailbox::destroy_all_mailboxes, test_account_login},
|
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, test_account_login},
|
||||||
};
|
};
|
||||||
|
|
||||||
const SERVER: &str = "
|
const SERVER: &str = "
|
||||||
|
@ -218,11 +218,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
expect_nothing(&mut event_rx).await;
|
expect_nothing(&mut event_rx).await;
|
||||||
|
|
||||||
destroy_all_mailboxes(admin_client).await;
|
destroy_all_mailboxes(admin_client).await;
|
||||||
|
assert_is_empty(server).await;
|
||||||
server
|
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
|
|
@ -26,8 +26,8 @@ use std::sync::Arc;
|
||||||
use crate::{
|
use crate::{
|
||||||
directory::sql::{add_to_group, create_test_user_with_email, set_test_quota},
|
directory::sql::{add_to_group, create_test_user_with_email, set_test_quota},
|
||||||
jmap::{
|
jmap::{
|
||||||
delivery::SmtpConnection, jmap_raw_request, mailbox::destroy_all_mailboxes,
|
assert_is_empty, delivery::SmtpConnection, jmap_raw_request,
|
||||||
test_account_login,
|
mailbox::destroy_all_mailboxes, test_account_login,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use jmap::{blob::upload::DISABLE_UPLOAD_QUOTA, mailbox::INBOX_ID, JMAP};
|
use jmap::{blob::upload::DISABLE_UPLOAD_QUOTA, mailbox::INBOX_ID, JMAP};
|
||||||
|
@ -320,10 +320,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
admin_client.set_default_account_id(account_id.to_string());
|
admin_client.set_default_account_id(account_id.to_string());
|
||||||
destroy_all_mailboxes(admin_client).await;
|
destroy_all_mailboxes(admin_client).await;
|
||||||
}
|
}
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn assert_over_quota<T: std::fmt::Debug>(result: Result<T, jmap_client::Error>) {
|
fn assert_over_quota<T: std::fmt::Debug>(result: Result<T, jmap_client::Error>) {
|
||||||
|
|
|
@ -40,6 +40,7 @@ use std::{
|
||||||
use crate::{
|
use crate::{
|
||||||
directory::sql::create_test_user_with_email,
|
directory::sql::create_test_user_with_email,
|
||||||
jmap::{
|
jmap::{
|
||||||
|
assert_is_empty,
|
||||||
delivery::SmtpConnection,
|
delivery::SmtpConnection,
|
||||||
email_submission::{assert_message_delivery, spawn_mock_smtp_server, MockMessage},
|
email_submission::{assert_message_delivery, spawn_mock_smtp_server, MockMessage},
|
||||||
mailbox::destroy_all_mailboxes,
|
mailbox::destroy_all_mailboxes,
|
||||||
|
@ -486,10 +487,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
client.sieve_script_destroy(&id).await.unwrap();
|
client.sieve_script_destroy(&id).await.unwrap();
|
||||||
}
|
}
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_script(name: &str) -> Vec<u8> {
|
fn get_script(name: &str) -> Vec<u8> {
|
||||||
|
|
|
@ -34,6 +34,8 @@ use jmap_client::{
|
||||||
use jmap_proto::types::{collection::Collection, id::Id, property::Property};
|
use jmap_proto::types::{collection::Collection, id::Id, property::Property};
|
||||||
use store::rand::{self, Rng};
|
use store::rand::{self, Rng};
|
||||||
|
|
||||||
|
use super::assert_is_empty;
|
||||||
|
|
||||||
const TEST_USER_ID: u32 = 1;
|
const TEST_USER_ID: u32 = 1;
|
||||||
const NUM_PASSES: usize = 1;
|
const NUM_PASSES: usize = 1;
|
||||||
|
|
||||||
|
@ -254,11 +256,7 @@ async fn email_tests(server: Arc<JMAP>, client: Arc<Client>) {
|
||||||
}
|
}
|
||||||
|
|
||||||
destroy_all_mailboxes(&client).await;
|
destroy_all_mailboxes(&client).await;
|
||||||
|
assert_is_empty(server.clone()).await;
|
||||||
server
|
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -331,10 +329,7 @@ async fn mailbox_tests(server: Arc<JMAP>, client: Arc<Client>) {
|
||||||
join_all(futures).await;
|
join_all(futures).await;
|
||||||
|
|
||||||
destroy_all_mailboxes(&client).await;
|
destroy_all_mailboxes(&client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn create_mailbox(client: &Client, mailbox: &str) -> Vec<String> {
|
async fn create_mailbox(client: &Client, mailbox: &str) -> Vec<String> {
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use crate::jmap::mailbox::destroy_all_mailboxes;
|
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
|
||||||
use jmap::JMAP;
|
use jmap::JMAP;
|
||||||
use jmap_client::{client::Client, mailbox::Role};
|
use jmap_client::{client::Client, mailbox::Role};
|
||||||
use jmap_proto::types::id::Id;
|
use jmap_proto::types::id::Id;
|
||||||
|
@ -66,8 +66,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
);
|
);
|
||||||
|
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,7 @@
|
||||||
|
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use crate::jmap::mailbox::destroy_all_mailboxes;
|
use crate::jmap::{assert_is_empty, mailbox::destroy_all_mailboxes};
|
||||||
use jmap::JMAP;
|
use jmap::JMAP;
|
||||||
use jmap_client::{client::Client, email, mailbox::Role};
|
use jmap_client::{client::Client, email, mailbox::Role};
|
||||||
use jmap_proto::types::id::Id;
|
use jmap_proto::types::id::Id;
|
||||||
|
@ -203,10 +203,7 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_message(message: usize, in_reply_to: Option<usize>, thread_num: usize) -> String {
|
fn build_message(message: usize, in_reply_to: Option<usize>, thread_num: usize) -> String {
|
||||||
|
|
|
@ -30,6 +30,7 @@ use std::{sync::Arc, time::Instant};
|
||||||
use crate::{
|
use crate::{
|
||||||
directory::sql::create_test_user_with_email,
|
directory::sql::create_test_user_with_email,
|
||||||
jmap::{
|
jmap::{
|
||||||
|
assert_is_empty,
|
||||||
delivery::SmtpConnection,
|
delivery::SmtpConnection,
|
||||||
email_submission::{
|
email_submission::{
|
||||||
assert_message_delivery, expect_nothing, spawn_mock_smtp_server, MockMessage,
|
assert_message_delivery, expect_nothing, spawn_mock_smtp_server, MockMessage,
|
||||||
|
@ -173,8 +174,5 @@ pub async fn test(server: Arc<JMAP>, client: &mut Client) {
|
||||||
// Remove test data
|
// Remove test data
|
||||||
client.vacation_response_destroy().await.unwrap();
|
client.vacation_response_destroy().await.unwrap();
|
||||||
destroy_all_mailboxes(client).await;
|
destroy_all_mailboxes(client).await;
|
||||||
server
|
assert_is_empty(server).await;
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@ use tokio::sync::mpsc;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
directory::sql::create_test_user_with_email,
|
directory::sql::create_test_user_with_email,
|
||||||
jmap::{mailbox::destroy_all_mailboxes, test_account_login},
|
jmap::{assert_is_empty, mailbox::destroy_all_mailboxes, test_account_login},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
|
@ -125,11 +125,7 @@ pub async fn test(server: Arc<JMAP>, admin_client: &mut Client) {
|
||||||
|
|
||||||
admin_client.set_default_account_id(account_id);
|
admin_client.set_default_account_id(account_id);
|
||||||
destroy_all_mailboxes(admin_client).await;
|
destroy_all_mailboxes(admin_client).await;
|
||||||
|
assert_is_empty(server).await;
|
||||||
server
|
|
||||||
.store
|
|
||||||
.assert_is_empty(server.blob_store.clone())
|
|
||||||
.await;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn expect_response(
|
async fn expect_response(
|
||||||
|
|
|
@ -35,37 +35,12 @@ pub async fn test(db: Store) {
|
||||||
|
|
||||||
test_1(db.clone()).await;
|
test_1(db.clone()).await;
|
||||||
test_2(db.clone()).await;
|
test_2(db.clone()).await;
|
||||||
test_3(db.clone()).await;
|
test_3(db).await;
|
||||||
test_4(db).await;
|
|
||||||
|
|
||||||
ID_ASSIGNMENT_EXPIRY.store(60 * 60, std::sync::atomic::Ordering::Relaxed);
|
ID_ASSIGNMENT_EXPIRY.store(60 * 60, std::sync::atomic::Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn test_1(db: Store) {
|
async fn test_1(db: Store) {
|
||||||
// Test change id assignment
|
|
||||||
let mut handles = Vec::new();
|
|
||||||
let mut expected_ids = HashSet::new();
|
|
||||||
|
|
||||||
// Create 100 change ids concurrently
|
|
||||||
for id in 0..100 {
|
|
||||||
handles.push({
|
|
||||||
let db = db.clone();
|
|
||||||
tokio::spawn(async move { db.assign_change_id(0).await })
|
|
||||||
});
|
|
||||||
expected_ids.insert(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
for handle in handles {
|
|
||||||
let assigned_id = handle.await.unwrap().unwrap();
|
|
||||||
assert!(
|
|
||||||
expected_ids.remove(&assigned_id),
|
|
||||||
"already assigned or invalid: {assigned_id} "
|
|
||||||
);
|
|
||||||
}
|
|
||||||
db.destroy().await;
|
|
||||||
}
|
|
||||||
|
|
||||||
async fn test_2(db: Store) {
|
|
||||||
// Test document id assignment
|
// Test document id assignment
|
||||||
for wait_for_expiry in [true, false] {
|
for wait_for_expiry in [true, false] {
|
||||||
let mut handles = Vec::new();
|
let mut handles = Vec::new();
|
||||||
|
@ -101,7 +76,7 @@ async fn test_2(db: Store) {
|
||||||
db.destroy().await;
|
db.destroy().await;
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn test_3(db: Store) {
|
async fn test_2(db: Store) {
|
||||||
// Create document ids and try reassigning
|
// Create document ids and try reassigning
|
||||||
let mut expected_ids = AHashSet::new();
|
let mut expected_ids = AHashSet::new();
|
||||||
let mut batch = BatchBuilder::new();
|
let mut batch = BatchBuilder::new();
|
||||||
|
@ -132,7 +107,7 @@ async fn test_3(db: Store) {
|
||||||
db.destroy().await;
|
db.destroy().await;
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn test_4(db: Store) {
|
async fn test_3(db: Store) {
|
||||||
// Try reassigning deleted ids
|
// Try reassigning deleted ids
|
||||||
let mut expected_ids = AHashSet::new();
|
let mut expected_ids = AHashSet::new();
|
||||||
let mut batch = BatchBuilder::new();
|
let mut batch = BatchBuilder::new();
|
||||||
|
|
|
@ -25,7 +25,7 @@ pub mod assign_id;
|
||||||
pub mod blob;
|
pub mod blob;
|
||||||
pub mod query;
|
pub mod query;
|
||||||
|
|
||||||
use std::{io::Read, sync::Arc};
|
use std::io::Read;
|
||||||
|
|
||||||
use ::store::Store;
|
use ::store::Store;
|
||||||
|
|
||||||
|
@ -56,8 +56,8 @@ pub async fn store_tests() {
|
||||||
if insert {
|
if insert {
|
||||||
db.destroy().await;
|
db.destroy().await;
|
||||||
}
|
}
|
||||||
assign_id::test(db.clone()).await;
|
query::test(db.clone(), insert).await;
|
||||||
query::test(db, insert).await;
|
assign_id::test(db).await;
|
||||||
temp_dir.delete();
|
temp_dir.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,13 +22,20 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use std::{
|
use std::{
|
||||||
|
fmt::Display,
|
||||||
sync::{Arc, Mutex},
|
sync::{Arc, Mutex},
|
||||||
time::Instant,
|
time::Instant,
|
||||||
};
|
};
|
||||||
|
|
||||||
use jmap_proto::types::keyword::Keyword;
|
use jmap_proto::types::keyword::Keyword;
|
||||||
use nlp::language::Language;
|
use nlp::language::Language;
|
||||||
use store::{ahash::AHashMap, query::sort::Pagination, write::ValueClass};
|
use store::{
|
||||||
|
ahash::AHashMap,
|
||||||
|
fts::{index::FtsDocument, Field, FtsFilter},
|
||||||
|
query::sort::Pagination,
|
||||||
|
write::ValueClass,
|
||||||
|
FtsStore,
|
||||||
|
};
|
||||||
|
|
||||||
use store::{
|
use store::{
|
||||||
query::{Comparator, Filter},
|
query::{Comparator, Filter},
|
||||||
|
@ -93,9 +100,34 @@ const FIELDS_OPTIONS: [FieldType; 20] = [
|
||||||
FieldType::Text, // "url",
|
FieldType::Text, // "url",
|
||||||
];
|
];
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
|
||||||
|
pub struct FieldId(u8);
|
||||||
|
|
||||||
|
impl From<FieldId> for u8 {
|
||||||
|
fn from(field_id: FieldId) -> Self {
|
||||||
|
field_id.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl Display for FieldId {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{} ({})", FIELDS[self.0 as usize], self.0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FieldId {
|
||||||
|
pub fn new(field_id: u8) -> Field<FieldId> {
|
||||||
|
Field::Header(Self(field_id))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn inner(&self) -> u8 {
|
||||||
|
self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::mutex_atomic)]
|
#[allow(clippy::mutex_atomic)]
|
||||||
pub async fn test(db: Store, do_insert: bool) {
|
pub async fn test(db: Store, do_insert: bool) {
|
||||||
println!("Running Store query tests...");
|
println!("Running Store query tests...");
|
||||||
|
let fts_store = FtsStore::from(db.clone());
|
||||||
|
|
||||||
let pool = rayon::ThreadPoolBuilder::new()
|
let pool = rayon::ThreadPoolBuilder::new()
|
||||||
.num_threads(8)
|
.num_threads(8)
|
||||||
|
@ -116,7 +148,10 @@ pub async fn test(db: Store, do_insert: bool) {
|
||||||
let documents = documents.clone();
|
let documents = documents.clone();
|
||||||
|
|
||||||
s.spawn_fifo(move |_| {
|
s.spawn_fifo(move |_| {
|
||||||
/*let mut fts_builder = FtsIndexBuilder::with_default_language(Language::English);
|
let mut fts_builder = FtsDocument::with_default_language(Language::English)
|
||||||
|
.with_account_id(0)
|
||||||
|
.with_collection(COLLECTION_ID)
|
||||||
|
.with_document_id(document_id as u32);
|
||||||
let mut builder = BatchBuilder::new();
|
let mut builder = BatchBuilder::new();
|
||||||
builder
|
builder
|
||||||
.with_account_id(0)
|
.with_account_id(0)
|
||||||
|
@ -137,7 +172,7 @@ pub async fn test(db: Store, do_insert: bool) {
|
||||||
FieldType::FullText => {
|
FieldType::FullText => {
|
||||||
if !field.is_empty() {
|
if !field.is_empty() {
|
||||||
fts_builder.index(
|
fts_builder.index(
|
||||||
field_id,
|
FieldId::new(field_id),
|
||||||
field.to_lowercase(),
|
field.to_lowercase(),
|
||||||
Language::English,
|
Language::English,
|
||||||
);
|
);
|
||||||
|
@ -165,8 +200,10 @@ pub async fn test(db: Store, do_insert: bool) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
builder.custom(fts_builder);
|
documents
|
||||||
documents.lock().unwrap().push(builder.build());*/
|
.lock()
|
||||||
|
.unwrap()
|
||||||
|
.push((builder.build(), fts_builder));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -180,22 +217,31 @@ pub async fn test(db: Store, do_insert: bool) {
|
||||||
let now = Instant::now();
|
let now = Instant::now();
|
||||||
let batches = documents.lock().unwrap().drain(..).collect::<Vec<_>>();
|
let batches = documents.lock().unwrap().drain(..).collect::<Vec<_>>();
|
||||||
let mut chunk = Vec::new();
|
let mut chunk = Vec::new();
|
||||||
|
let mut fts_chunk = Vec::new();
|
||||||
|
|
||||||
for batch in batches {
|
for (batch, fts_batch) in batches {
|
||||||
let chunk_instance = Instant::now();
|
let chunk_instance = Instant::now();
|
||||||
chunk.push({
|
chunk.push({
|
||||||
let db = db.clone();
|
let db = db.clone();
|
||||||
tokio::spawn(async move { db.write(batch).await })
|
tokio::spawn(async move { db.write(batch).await })
|
||||||
});
|
});
|
||||||
|
fts_chunk.push({
|
||||||
|
let fts_store = fts_store.clone();
|
||||||
|
tokio::spawn(async move { fts_store.index(fts_batch).await })
|
||||||
|
});
|
||||||
if chunk.len() == 1000 {
|
if chunk.len() == 1000 {
|
||||||
for handle in chunk {
|
for handle in chunk {
|
||||||
handle.await.unwrap().unwrap();
|
handle.await.unwrap().unwrap();
|
||||||
}
|
}
|
||||||
|
for handle in fts_chunk {
|
||||||
|
handle.await.unwrap().unwrap();
|
||||||
|
}
|
||||||
println!(
|
println!(
|
||||||
"Chunk insert took {} ms.",
|
"Store insert took {} ms.",
|
||||||
chunk_instance.elapsed().as_millis()
|
chunk_instance.elapsed().as_millis()
|
||||||
);
|
);
|
||||||
chunk = Vec::new();
|
chunk = Vec::new();
|
||||||
|
fts_chunk = Vec::new();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -209,156 +255,232 @@ pub async fn test(db: Store, do_insert: bool) {
|
||||||
}
|
}
|
||||||
|
|
||||||
println!("Running filter tests...");
|
println!("Running filter tests...");
|
||||||
test_filter(db.clone()).await;
|
test_filter(db.clone(), fts_store).await;
|
||||||
|
|
||||||
println!("Running sort tests...");
|
println!("Running sort tests...");
|
||||||
test_sort(db).await;
|
test_sort(db).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn test_filter(db: Store) {
|
pub async fn test_filter(db: Store, fts: FtsStore) {
|
||||||
/*
|
let mut fields = AHashMap::default();
|
||||||
let mut fields = AHashMap::default();
|
let mut fields_u8 = AHashMap::default();
|
||||||
for (field_num, field) in FIELDS.iter().enumerate() {
|
for (field_num, field) in FIELDS.iter().enumerate() {
|
||||||
fields.insert(field.to_string(), field_num as u8);
|
fields.insert(field.to_string(), FieldId::new(field_num as u8));
|
||||||
}
|
fields_u8.insert(field.to_string(), field_num as u8);
|
||||||
|
}
|
||||||
|
|
||||||
let tests = [
|
let tests = [
|
||||||
(
|
(
|
||||||
vec![
|
vec![
|
||||||
Filter::has_english_text(fields["title"], "water"),
|
Filter::is_in_set(
|
||||||
Filter::eq(fields["year"], 1979u32),
|
fts.query(
|
||||||
],
|
0,
|
||||||
vec!["p11293"],
|
COLLECTION_ID,
|
||||||
),
|
vec![FtsFilter::has_english_text(
|
||||||
(
|
fields["title"].clone(),
|
||||||
vec![
|
"water",
|
||||||
Filter::has_english_text(fields["medium"], "gelatin"),
|
)],
|
||||||
Filter::gt(fields["year"], 2000u32),
|
)
|
||||||
Filter::lt(fields["width"], 180u32),
|
.await
|
||||||
Filter::gt(fields["width"], 0u32),
|
.unwrap(),
|
||||||
],
|
),
|
||||||
vec!["p79426", "p79427", "p79428", "p79429", "p79430"],
|
Filter::eq(fields_u8["year"], 1979u32),
|
||||||
),
|
],
|
||||||
(
|
vec!["p11293"],
|
||||||
vec![Filter::has_english_text(fields["title"], "'rustic bridge'")],
|
),
|
||||||
vec!["d05503"],
|
(
|
||||||
),
|
vec![
|
||||||
(
|
Filter::is_in_set(
|
||||||
vec![
|
fts.query(
|
||||||
Filter::has_english_text(fields["title"], "'rustic'"),
|
0,
|
||||||
Filter::has_english_text(fields["title"], "study"),
|
COLLECTION_ID,
|
||||||
],
|
vec![FtsFilter::has_english_text(
|
||||||
vec!["d00399", "d05352"],
|
fields["medium"].clone(),
|
||||||
),
|
"gelatin",
|
||||||
(
|
)],
|
||||||
vec![
|
)
|
||||||
Filter::has_text(fields["artist"], "mauro kunst", Language::None),
|
.await
|
||||||
Filter::is_in_bitmap(fields["artistRole"], Keyword::Other("artist".to_string())),
|
.unwrap(),
|
||||||
Filter::Or,
|
),
|
||||||
Filter::eq(fields["year"], 1969u32),
|
Filter::gt(fields_u8["year"], 2000u32),
|
||||||
Filter::eq(fields["year"], 1971u32),
|
Filter::lt(fields_u8["width"], 180u32),
|
||||||
Filter::End,
|
Filter::gt(fields_u8["width"], 0u32),
|
||||||
],
|
],
|
||||||
vec!["p01764", "t05843"],
|
vec!["p79426", "p79427", "p79428", "p79429", "p79430"],
|
||||||
),
|
),
|
||||||
(
|
(
|
||||||
vec![
|
vec![Filter::is_in_set(
|
||||||
Filter::Not,
|
fts.query(
|
||||||
Filter::has_english_text(fields["medium"], "oil"),
|
0,
|
||||||
Filter::End,
|
COLLECTION_ID,
|
||||||
Filter::has_english_text(fields["creditLine"], "bequeath"),
|
vec![FtsFilter::has_english_text(
|
||||||
Filter::Or,
|
fields["title"].clone(),
|
||||||
Filter::And,
|
"'rustic bridge'",
|
||||||
Filter::ge(fields["year"], 1900u32),
|
)],
|
||||||
Filter::lt(fields["year"], 1910u32),
|
|
||||||
Filter::End,
|
|
||||||
Filter::And,
|
|
||||||
Filter::ge(fields["year"], 2000u32),
|
|
||||||
Filter::lt(fields["year"], 2010u32),
|
|
||||||
Filter::End,
|
|
||||||
Filter::End,
|
|
||||||
],
|
|
||||||
vec![
|
|
||||||
"n02478", "n02479", "n03568", "n03658", "n04327", "n04328", "n04721", "n04739",
|
|
||||||
"n05095", "n05096", "n05145", "n05157", "n05158", "n05159", "n05298", "n05303",
|
|
||||||
"n06070", "t01181", "t03571", "t05805", "t05806", "t12147", "t12154", "t12155",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
vec![
|
|
||||||
Filter::And,
|
|
||||||
Filter::has_text(fields["artist"], "warhol", Language::None),
|
|
||||||
Filter::Not,
|
|
||||||
Filter::has_english_text(fields["title"], "'campbell'"),
|
|
||||||
Filter::End,
|
|
||||||
Filter::Not,
|
|
||||||
Filter::Or,
|
|
||||||
Filter::gt(fields["year"], 1980u32),
|
|
||||||
Filter::And,
|
|
||||||
Filter::gt(fields["width"], 500u32),
|
|
||||||
Filter::gt(fields["height"], 500u32),
|
|
||||||
Filter::End,
|
|
||||||
Filter::End,
|
|
||||||
Filter::End,
|
|
||||||
Filter::eq(fields["acquisitionYear"], 2008u32),
|
|
||||||
Filter::End,
|
|
||||||
],
|
|
||||||
vec!["ar00039", "t12600"],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
vec![
|
|
||||||
Filter::has_english_text(fields["title"], "study"),
|
|
||||||
Filter::has_english_text(fields["medium"], "paper"),
|
|
||||||
Filter::has_english_text(fields["creditLine"], "'purchased'"),
|
|
||||||
Filter::Not,
|
|
||||||
Filter::has_english_text(fields["title"], "'anatomical'"),
|
|
||||||
Filter::has_english_text(fields["title"], "'for'"),
|
|
||||||
Filter::End,
|
|
||||||
Filter::gt(fields["year"], 1900u32),
|
|
||||||
Filter::gt(fields["acquisitionYear"], 2000u32),
|
|
||||||
],
|
|
||||||
vec![
|
|
||||||
"p80042", "p80043", "p80044", "p80045", "p80203", "t11937", "t12172",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
];
|
|
||||||
|
|
||||||
for (filter, expected_results) in tests {
|
|
||||||
//println!("Running test: {:?}", filter);
|
|
||||||
let docset = db.filter(0, COLLECTION_ID, filter).await.unwrap();
|
|
||||||
let sorted_docset = db
|
|
||||||
.sort(
|
|
||||||
docset,
|
|
||||||
vec![Comparator::ascending(fields["accession_number"])],
|
|
||||||
Pagination::new(0, 0, None, 0),
|
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.unwrap();
|
.unwrap(),
|
||||||
|
)],
|
||||||
assert_eq!(
|
vec!["d05503"],
|
||||||
db.get_values::<String>(
|
),
|
||||||
sorted_docset
|
(
|
||||||
.ids
|
vec![Filter::is_in_set(
|
||||||
.into_iter()
|
fts.query(
|
||||||
.map(|document_id| ValueKey {
|
0,
|
||||||
account_id: 0,
|
COLLECTION_ID,
|
||||||
collection: COLLECTION_ID,
|
vec![
|
||||||
document_id: document_id as u32,
|
FtsFilter::has_english_text(fields["title"].clone(), "'rustic'"),
|
||||||
family: 0,
|
FtsFilter::has_english_text(fields["title"].clone(), "study"),
|
||||||
field: fields["accession_number"],
|
],
|
||||||
})
|
|
||||||
.collect()
|
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
.unwrap()
|
.unwrap(),
|
||||||
.into_iter()
|
)],
|
||||||
.flatten()
|
vec!["d00399", "d05352"],
|
||||||
.collect::<Vec<_>>(),
|
),
|
||||||
expected_results
|
(
|
||||||
);
|
vec![
|
||||||
}
|
Filter::has_text(fields_u8["artist"], "mauro kunst"),
|
||||||
|
Filter::is_in_bitmap(
|
||||||
|
fields_u8["artistRole"],
|
||||||
|
Keyword::Other("artist".to_string()),
|
||||||
|
),
|
||||||
|
Filter::Or,
|
||||||
|
Filter::eq(fields_u8["year"], 1969u32),
|
||||||
|
Filter::eq(fields_u8["year"], 1971u32),
|
||||||
|
Filter::End,
|
||||||
|
],
|
||||||
|
vec!["p01764", "t05843"],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![
|
||||||
|
Filter::is_in_set(
|
||||||
|
fts.query(
|
||||||
|
0,
|
||||||
|
COLLECTION_ID,
|
||||||
|
vec![
|
||||||
|
FtsFilter::Not,
|
||||||
|
FtsFilter::has_english_text(fields["medium"].clone(), "oil"),
|
||||||
|
FtsFilter::End,
|
||||||
|
FtsFilter::has_english_text(fields["creditLine"].clone(), "bequeath"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
),
|
||||||
|
Filter::Or,
|
||||||
|
Filter::And,
|
||||||
|
Filter::ge(fields_u8["year"], 1900u32),
|
||||||
|
Filter::lt(fields_u8["year"], 1910u32),
|
||||||
|
Filter::End,
|
||||||
|
Filter::And,
|
||||||
|
Filter::ge(fields_u8["year"], 2000u32),
|
||||||
|
Filter::lt(fields_u8["year"], 2010u32),
|
||||||
|
Filter::End,
|
||||||
|
Filter::End,
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
"n02478", "n02479", "n03568", "n03658", "n04327", "n04328", "n04721", "n04739",
|
||||||
|
"n05095", "n05096", "n05145", "n05157", "n05158", "n05159", "n05298", "n05303",
|
||||||
|
"n06070", "t01181", "t03571", "t05805", "t05806", "t12147", "t12154", "t12155",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![
|
||||||
|
Filter::And,
|
||||||
|
Filter::has_text(fields_u8["artist"], "warhol"),
|
||||||
|
Filter::Not,
|
||||||
|
Filter::is_in_set(
|
||||||
|
fts.query(
|
||||||
|
0,
|
||||||
|
COLLECTION_ID,
|
||||||
|
vec![FtsFilter::has_english_text(
|
||||||
|
fields["title"].clone(),
|
||||||
|
"'campbell'",
|
||||||
|
)],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
),
|
||||||
|
Filter::End,
|
||||||
|
Filter::Not,
|
||||||
|
Filter::Or,
|
||||||
|
Filter::gt(fields_u8["year"], 1980u32),
|
||||||
|
Filter::And,
|
||||||
|
Filter::gt(fields_u8["width"], 500u32),
|
||||||
|
Filter::gt(fields_u8["height"], 500u32),
|
||||||
|
Filter::End,
|
||||||
|
Filter::End,
|
||||||
|
Filter::End,
|
||||||
|
Filter::eq(fields_u8["acquisitionYear"], 2008u32),
|
||||||
|
Filter::End,
|
||||||
|
],
|
||||||
|
vec!["ar00039", "t12600"],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
vec![
|
||||||
|
Filter::is_in_set(
|
||||||
|
fts.query(
|
||||||
|
0,
|
||||||
|
COLLECTION_ID,
|
||||||
|
vec![
|
||||||
|
FtsFilter::has_english_text(fields["title"].clone(), "study"),
|
||||||
|
FtsFilter::has_english_text(fields["medium"].clone(), "paper"),
|
||||||
|
FtsFilter::has_english_text(
|
||||||
|
fields["creditLine"].clone(),
|
||||||
|
"'purchased'",
|
||||||
|
),
|
||||||
|
FtsFilter::Not,
|
||||||
|
FtsFilter::has_english_text(fields["title"].clone(), "'anatomical'"),
|
||||||
|
FtsFilter::has_english_text(fields["title"].clone(), "'for'"),
|
||||||
|
FtsFilter::End,
|
||||||
|
],
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap(),
|
||||||
|
),
|
||||||
|
Filter::gt(fields_u8["year"], 1900u32),
|
||||||
|
Filter::gt(fields_u8["acquisitionYear"], 2000u32),
|
||||||
|
],
|
||||||
|
vec![
|
||||||
|
"p80042", "p80043", "p80044", "p80045", "p80203", "t11937", "t12172",
|
||||||
|
],
|
||||||
|
),
|
||||||
|
];
|
||||||
|
|
||||||
*/
|
for (filter, expected_results) in tests {
|
||||||
|
//println!("Running test: {:?}", filter);
|
||||||
|
let docset = db.filter(0, COLLECTION_ID, filter).await.unwrap();
|
||||||
|
let sorted_docset = db
|
||||||
|
.sort(
|
||||||
|
docset,
|
||||||
|
vec![Comparator::ascending(fields_u8["accession_number"])],
|
||||||
|
Pagination::new(0, 0, None, 0),
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
db.get_values::<String>(
|
||||||
|
sorted_docset
|
||||||
|
.ids
|
||||||
|
.into_iter()
|
||||||
|
.map(|document_id| ValueKey {
|
||||||
|
account_id: 0,
|
||||||
|
collection: COLLECTION_ID,
|
||||||
|
document_id: document_id as u32,
|
||||||
|
class: ValueClass::Property(fields_u8["accession_number"])
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.unwrap()
|
||||||
|
.into_iter()
|
||||||
|
.flatten()
|
||||||
|
.collect::<Vec<_>>(),
|
||||||
|
expected_results
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn test_sort(db: Store) {
|
pub async fn test_sort(db: Store) {
|
||||||
|
|
Loading…
Reference in a new issue