diff --git a/crates/project/src/project_tests.rs b/crates/project/src/project_tests.rs index 16e706a77e..259c10ca05 100644 --- a/crates/project/src/project_tests.rs +++ b/crates/project/src/project_tests.rs @@ -1,7 +1,6 @@ -use crate::{worktree::WorktreeHandle, Event, *}; +use crate::{search::PathMatcher, worktree::WorktreeHandle, Event, *}; use fs::{FakeFs, LineEnding, RealFs}; use futures::{future, StreamExt}; -use globset::Glob; use gpui::{executor::Deterministic, test::subscribe, AppContext}; use language::{ language_settings::{AllLanguageSettings, LanguageSettingsContent}, @@ -3641,7 +3640,7 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) { search_query, false, true, - vec![Glob::new("*.odd").unwrap().compile_matcher()], + vec![PathMatcher::new("*.odd").unwrap()], Vec::new() ), cx @@ -3659,7 +3658,7 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) { search_query, false, true, - vec![Glob::new("*.rs").unwrap().compile_matcher()], + vec![PathMatcher::new("*.rs").unwrap()], Vec::new() ), cx @@ -3681,8 +3680,8 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) { false, true, vec![ - Glob::new("*.ts").unwrap().compile_matcher(), - Glob::new("*.odd").unwrap().compile_matcher(), + PathMatcher::new("*.ts").unwrap(), + PathMatcher::new("*.odd").unwrap(), ], Vec::new() ), @@ -3705,9 +3704,9 @@ async fn test_search_with_inclusions(cx: &mut gpui::TestAppContext) { false, true, vec![ - Glob::new("*.rs").unwrap().compile_matcher(), - Glob::new("*.ts").unwrap().compile_matcher(), - Glob::new("*.odd").unwrap().compile_matcher(), + PathMatcher::new("*.rs").unwrap(), + PathMatcher::new("*.ts").unwrap(), + PathMatcher::new("*.odd").unwrap(), ], Vec::new() ), @@ -3752,7 +3751,7 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) { false, true, Vec::new(), - vec![Glob::new("*.odd").unwrap().compile_matcher()], + vec![PathMatcher::new("*.odd").unwrap()], ), cx ) @@ -3775,7 +3774,7 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) { false, true, Vec::new(), - vec![Glob::new("*.rs").unwrap().compile_matcher()], + vec![PathMatcher::new("*.rs").unwrap()], ), cx ) @@ -3797,8 +3796,8 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) { true, Vec::new(), vec![ - Glob::new("*.ts").unwrap().compile_matcher(), - Glob::new("*.odd").unwrap().compile_matcher(), + PathMatcher::new("*.ts").unwrap(), + PathMatcher::new("*.odd").unwrap(), ], ), cx @@ -3821,9 +3820,9 @@ async fn test_search_with_exclusions(cx: &mut gpui::TestAppContext) { true, Vec::new(), vec![ - Glob::new("*.rs").unwrap().compile_matcher(), - Glob::new("*.ts").unwrap().compile_matcher(), - Glob::new("*.odd").unwrap().compile_matcher(), + PathMatcher::new("*.rs").unwrap(), + PathMatcher::new("*.ts").unwrap(), + PathMatcher::new("*.odd").unwrap(), ], ), cx @@ -3860,8 +3859,8 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex search_query, false, true, - vec![Glob::new("*.odd").unwrap().compile_matcher()], - vec![Glob::new("*.odd").unwrap().compile_matcher()], + vec![PathMatcher::new("*.odd").unwrap()], + vec![PathMatcher::new("*.odd").unwrap()], ), cx ) @@ -3878,8 +3877,8 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex search_query, false, true, - vec![Glob::new("*.ts").unwrap().compile_matcher()], - vec![Glob::new("*.ts").unwrap().compile_matcher()], + vec![PathMatcher::new("*.ts").unwrap()], + vec![PathMatcher::new("*.ts").unwrap()], ), cx ) @@ -3897,12 +3896,12 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex false, true, vec![ - Glob::new("*.ts").unwrap().compile_matcher(), - Glob::new("*.odd").unwrap().compile_matcher() + PathMatcher::new("*.ts").unwrap(), + PathMatcher::new("*.odd").unwrap() ], vec![ - Glob::new("*.ts").unwrap().compile_matcher(), - Glob::new("*.odd").unwrap().compile_matcher() + PathMatcher::new("*.ts").unwrap(), + PathMatcher::new("*.odd").unwrap() ], ), cx @@ -3921,12 +3920,12 @@ async fn test_search_with_exclusions_and_inclusions(cx: &mut gpui::TestAppContex false, true, vec![ - Glob::new("*.ts").unwrap().compile_matcher(), - Glob::new("*.odd").unwrap().compile_matcher() + PathMatcher::new("*.ts").unwrap(), + PathMatcher::new("*.odd").unwrap() ], vec![ - Glob::new("*.rs").unwrap().compile_matcher(), - Glob::new("*.odd").unwrap().compile_matcher() + PathMatcher::new("*.rs").unwrap(), + PathMatcher::new("*.odd").unwrap() ], ), cx diff --git a/crates/project/src/search.rs b/crates/project/src/search.rs index 4b4126fef2..71a0b70b81 100644 --- a/crates/project/src/search.rs +++ b/crates/project/src/search.rs @@ -1,5 +1,5 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder}; -use anyhow::Result; +use anyhow::{Context, Result}; use client::proto; use globset::{Glob, GlobMatcher}; use itertools::Itertools; @@ -9,7 +9,7 @@ use smol::future::yield_now; use std::{ io::{BufRead, BufReader, Read}, ops::Range, - path::Path, + path::{Path, PathBuf}, sync::Arc, }; @@ -20,8 +20,8 @@ pub enum SearchQuery { query: Arc, whole_word: bool, case_sensitive: bool, - files_to_include: Vec, - files_to_exclude: Vec, + files_to_include: Vec, + files_to_exclude: Vec, }, Regex { regex: Regex, @@ -29,18 +29,43 @@ pub enum SearchQuery { multiline: bool, whole_word: bool, case_sensitive: bool, - files_to_include: Vec, - files_to_exclude: Vec, + files_to_include: Vec, + files_to_exclude: Vec, }, } +#[derive(Clone, Debug)] +pub struct PathMatcher { + maybe_path: PathBuf, + glob: GlobMatcher, +} + +impl std::fmt::Display for PathMatcher { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.maybe_path.to_string_lossy().fmt(f) + } +} + +impl PathMatcher { + pub fn new(maybe_glob: &str) -> Result { + Ok(PathMatcher { + glob: Glob::new(&maybe_glob)?.compile_matcher(), + maybe_path: PathBuf::from(maybe_glob), + }) + } + + pub fn is_match>(&self, other: P) -> bool { + other.as_ref().starts_with(&self.maybe_path) || self.glob.is_match(other) + } +} + impl SearchQuery { pub fn text( query: impl ToString, whole_word: bool, case_sensitive: bool, - files_to_include: Vec, - files_to_exclude: Vec, + files_to_include: Vec, + files_to_exclude: Vec, ) -> Self { let query = query.to_string(); let search = AhoCorasickBuilder::new() @@ -61,8 +86,8 @@ impl SearchQuery { query: impl ToString, whole_word: bool, case_sensitive: bool, - files_to_include: Vec, - files_to_exclude: Vec, + files_to_include: Vec, + files_to_exclude: Vec, ) -> Result { let mut query = query.to_string(); let initial_query = Arc::from(query.as_str()); @@ -96,16 +121,16 @@ impl SearchQuery { message.query, message.whole_word, message.case_sensitive, - deserialize_globs(&message.files_to_include)?, - deserialize_globs(&message.files_to_exclude)?, + deserialize_path_matches(&message.files_to_include)?, + deserialize_path_matches(&message.files_to_exclude)?, ) } else { Ok(Self::text( message.query, message.whole_word, message.case_sensitive, - deserialize_globs(&message.files_to_include)?, - deserialize_globs(&message.files_to_exclude)?, + deserialize_path_matches(&message.files_to_include)?, + deserialize_path_matches(&message.files_to_exclude)?, )) } } @@ -120,12 +145,12 @@ impl SearchQuery { files_to_include: self .files_to_include() .iter() - .map(|g| g.glob().to_string()) + .map(|matcher| matcher.to_string()) .join(","), files_to_exclude: self .files_to_exclude() .iter() - .map(|g| g.glob().to_string()) + .map(|matcher| matcher.to_string()) .join(","), } } @@ -266,7 +291,7 @@ impl SearchQuery { matches!(self, Self::Regex { .. }) } - pub fn files_to_include(&self) -> &[GlobMatcher] { + pub fn files_to_include(&self) -> &[PathMatcher] { match self { Self::Text { files_to_include, .. @@ -277,7 +302,7 @@ impl SearchQuery { } } - pub fn files_to_exclude(&self) -> &[GlobMatcher] { + pub fn files_to_exclude(&self) -> &[PathMatcher] { match self { Self::Text { files_to_exclude, .. @@ -306,11 +331,63 @@ impl SearchQuery { } } -fn deserialize_globs(glob_set: &str) -> Result> { +fn deserialize_path_matches(glob_set: &str) -> anyhow::Result> { glob_set .split(',') .map(str::trim) .filter(|glob_str| !glob_str.is_empty()) - .map(|glob_str| Ok(Glob::new(glob_str)?.compile_matcher())) + .map(|glob_str| { + PathMatcher::new(glob_str) + .with_context(|| format!("deserializing path match glob {glob_str}")) + }) .collect() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn path_matcher_creation_for_valid_paths() { + for valid_path in [ + "file", + "Cargo.toml", + ".DS_Store", + "~/dir/another_dir/", + "./dir/file", + "dir/[a-z].txt", + "../dir/filé", + ] { + let path_matcher = PathMatcher::new(valid_path).unwrap_or_else(|e| { + panic!("Valid path {valid_path} should be accepted, but got: {e}") + }); + assert!( + path_matcher.is_match(valid_path), + "Path matcher for valid path {valid_path} should match itself" + ) + } + } + + #[test] + fn path_matcher_creation_for_globs() { + for invalid_glob in ["dir/[].txt", "dir/[a-z.txt", "dir/{file"] { + match PathMatcher::new(invalid_glob) { + Ok(_) => panic!("Invalid glob {invalid_glob} should not be accepted"), + Err(_expected) => {} + } + } + + for valid_glob in [ + "dir/?ile", + "dir/*.txt", + "dir/**/file", + "dir/[a-z].txt", + "{dir,file}", + ] { + match PathMatcher::new(valid_glob) { + Ok(_expected) => {} + Err(e) => panic!("Valid glob {valid_glob} should be accepted, but got: {e}"), + } + } + } +} diff --git a/crates/search/src/project_search.rs b/crates/search/src/project_search.rs index 52ee12c26d..d6d69e575f 100644 --- a/crates/search/src/project_search.rs +++ b/crates/search/src/project_search.rs @@ -2,14 +2,13 @@ use crate::{ SearchOptions, SelectNextMatch, SelectPrevMatch, ToggleCaseSensitive, ToggleRegex, ToggleWholeWord, }; -use anyhow::Result; +use anyhow::Context; use collections::HashMap; use editor::{ items::active_match_index, scroll::autoscroll::Autoscroll, Anchor, Editor, MultiBuffer, SelectAll, MAX_TAB_TITLE_LEN, }; use futures::StreamExt; -use globset::{Glob, GlobMatcher}; use gpui::{ actions, elements::*, @@ -19,7 +18,10 @@ use gpui::{ }; use menu::Confirm; use postage::stream::Stream; -use project::{search::SearchQuery, Entry, Project}; +use project::{ + search::{PathMatcher, SearchQuery}, + Entry, Project, +}; use semantic_index::SemanticIndex; use smallvec::SmallVec; use std::{ @@ -185,21 +187,15 @@ impl ProjectSearch { cx.notify(); } - fn semantic_search( - &mut self, - query: String, - include_files: Vec, - exclude_files: Vec, - cx: &mut ModelContext, - ) { + fn semantic_search(&mut self, query: SearchQuery, cx: &mut ModelContext) { let search = SemanticIndex::global(cx).map(|index| { index.update(cx, |semantic_index, cx| { semantic_index.search_project( self.project.clone(), - query.clone(), + query.as_str().to_owned(), 10, - include_files, - exclude_files, + query.files_to_include().to_vec(), + query.files_to_exclude().to_vec(), cx, ) }) @@ -590,8 +586,7 @@ impl ProjectSearchView { if !dir_entry.is_dir() { return; } - let filter_path = dir_entry.path.join("**"); - let Some(filter_str) = filter_path.to_str() else { return; }; + let Some(filter_str) = dir_entry.path.to_str() else { return; }; let model = cx.add_model(|cx| ProjectSearch::new(workspace.project().clone(), cx)); let search = cx.add_view(|cx| ProjectSearchView::new(model, cx)); @@ -662,16 +657,10 @@ impl ProjectSearchView { if semantic.outstanding_file_count > 0 { return; } - - let query = self.query_editor.read(cx).text(cx); - if let Some((included_files, exclude_files)) = - self.get_included_and_excluded_globsets(cx) - { - self.model.update(cx, |model, cx| { - model.semantic_search(query, included_files, exclude_files, cx) - }); + if let Some(query) = self.build_search_query(cx) { + self.model + .update(cx, |model, cx| model.semantic_search(query, cx)); } - return; } if let Some(query) = self.build_search_query(cx) { @@ -679,42 +668,10 @@ impl ProjectSearchView { } } - fn get_included_and_excluded_globsets( - &mut self, - cx: &mut ViewContext, - ) -> Option<(Vec, Vec)> { - let included_files = - match Self::load_glob_set(&self.included_files_editor.read(cx).text(cx)) { - Ok(included_files) => { - self.panels_with_errors.remove(&InputPanel::Include); - included_files - } - Err(_e) => { - self.panels_with_errors.insert(InputPanel::Include); - cx.notify(); - return None; - } - }; - let excluded_files = - match Self::load_glob_set(&self.excluded_files_editor.read(cx).text(cx)) { - Ok(excluded_files) => { - self.panels_with_errors.remove(&InputPanel::Exclude); - excluded_files - } - Err(_e) => { - self.panels_with_errors.insert(InputPanel::Exclude); - cx.notify(); - return None; - } - }; - - Some((included_files, excluded_files)) - } - fn build_search_query(&mut self, cx: &mut ViewContext) -> Option { let text = self.query_editor.read(cx).text(cx); let included_files = - match Self::load_glob_set(&self.included_files_editor.read(cx).text(cx)) { + match Self::parse_path_matches(&self.included_files_editor.read(cx).text(cx)) { Ok(included_files) => { self.panels_with_errors.remove(&InputPanel::Include); included_files @@ -726,7 +683,7 @@ impl ProjectSearchView { } }; let excluded_files = - match Self::load_glob_set(&self.excluded_files_editor.read(cx).text(cx)) { + match Self::parse_path_matches(&self.excluded_files_editor.read(cx).text(cx)) { Ok(excluded_files) => { self.panels_with_errors.remove(&InputPanel::Exclude); excluded_files @@ -766,11 +723,14 @@ impl ProjectSearchView { } } - fn load_glob_set(text: &str) -> Result> { + fn parse_path_matches(text: &str) -> anyhow::Result> { text.split(',') .map(str::trim) - .filter(|glob_str| !glob_str.is_empty()) - .map(|glob_str| anyhow::Ok(Glob::new(glob_str)?.compile_matcher())) + .filter(|maybe_glob_str| !maybe_glob_str.is_empty()) + .map(|maybe_glob_str| { + PathMatcher::new(maybe_glob_str) + .with_context(|| format!("parsing {maybe_glob_str} as path matcher")) + }) .collect() } @@ -1769,7 +1729,7 @@ pub mod tests { search_view.included_files_editor.update(cx, |editor, cx| { assert_eq!( editor.display_text(cx), - a_dir_entry.path.join("**").display().to_string(), + a_dir_entry.path.to_str().unwrap(), "New search in directory should have included dir entry path" ); }); diff --git a/crates/semantic_index/src/db.rs b/crates/semantic_index/src/db.rs index d180f5e831..e8c929c995 100644 --- a/crates/semantic_index/src/db.rs +++ b/crates/semantic_index/src/db.rs @@ -1,7 +1,6 @@ use crate::{parsing::Document, SEMANTIC_INDEX_VERSION}; use anyhow::{anyhow, Context, Result}; -use globset::GlobMatcher; -use project::Fs; +use project::{search::PathMatcher, Fs}; use rpc::proto::Timestamp; use rusqlite::{ params, @@ -290,8 +289,8 @@ impl VectorDatabase { pub fn retrieve_included_file_ids( &self, worktree_ids: &[i64], - include_globs: Vec, - exclude_globs: Vec, + includes: &[PathMatcher], + excludes: &[PathMatcher], ) -> Result> { let mut file_query = self.db.prepare( " @@ -310,13 +309,9 @@ impl VectorDatabase { while let Some(row) = rows.next()? { let file_id = row.get(0)?; let relative_path = row.get_ref(1)?.as_str()?; - let included = include_globs.is_empty() - || include_globs - .iter() - .any(|glob| glob.is_match(relative_path)); - let excluded = exclude_globs - .iter() - .any(|glob| glob.is_match(relative_path)); + let included = + includes.is_empty() || includes.iter().any(|glob| glob.is_match(relative_path)); + let excluded = excludes.iter().any(|glob| glob.is_match(relative_path)); if included && !excluded { file_ids.push(file_id); } diff --git a/crates/semantic_index/src/semantic_index.rs b/crates/semantic_index/src/semantic_index.rs index bd114de216..f1450eb7b0 100644 --- a/crates/semantic_index/src/semantic_index.rs +++ b/crates/semantic_index/src/semantic_index.rs @@ -11,13 +11,12 @@ use anyhow::{anyhow, Result}; use db::VectorDatabase; use embedding::{EmbeddingProvider, OpenAIEmbeddings}; use futures::{channel::oneshot, Future}; -use globset::GlobMatcher; use gpui::{AppContext, AsyncAppContext, Entity, ModelContext, ModelHandle, Task, WeakModelHandle}; use language::{Anchor, Buffer, Language, LanguageRegistry}; use parking_lot::Mutex; use parsing::{CodeContextRetriever, Document, PARSEABLE_ENTIRE_FILE_TYPES}; use postage::watch; -use project::{Fs, Project, WorktreeId}; +use project::{search::PathMatcher, Fs, Project, WorktreeId}; use smol::channel; use std::{ cmp::Ordering, @@ -682,8 +681,8 @@ impl SemanticIndex { project: ModelHandle, phrase: String, limit: usize, - include_globs: Vec, - exclude_globs: Vec, + includes: Vec, + excludes: Vec, cx: &mut ModelContext, ) -> Task>> { let project_state = if let Some(state) = self.projects.get(&project.downgrade()) { @@ -714,11 +713,8 @@ impl SemanticIndex { .next() .unwrap(); - let file_ids = database.retrieve_included_file_ids( - &worktree_db_ids, - include_globs, - exclude_globs, - )?; + let file_ids = + database.retrieve_included_file_ids(&worktree_db_ids, &includes, &excludes)?; let batch_n = cx.background().num_cpus(); let ids_len = file_ids.clone().len(); diff --git a/crates/semantic_index/src/semantic_index_tests.rs b/crates/semantic_index/src/semantic_index_tests.rs index acf5a9d72b..6acb25d98a 100644 --- a/crates/semantic_index/src/semantic_index_tests.rs +++ b/crates/semantic_index/src/semantic_index_tests.rs @@ -7,11 +7,10 @@ use crate::{ }; use anyhow::Result; use async_trait::async_trait; -use globset::Glob; use gpui::{Task, TestAppContext}; use language::{Language, LanguageConfig, LanguageRegistry, ToOffset}; use pretty_assertions::assert_eq; -use project::{project_settings::ProjectSettings, FakeFs, Fs, Project}; +use project::{project_settings::ProjectSettings, search::PathMatcher, FakeFs, Fs, Project}; use rand::{rngs::StdRng, Rng}; use serde_json::json; use settings::SettingsStore; @@ -121,8 +120,8 @@ async fn test_semantic_index(cx: &mut TestAppContext) { ); // Test Include Files Functonality - let include_files = vec![Glob::new("*.rs").unwrap().compile_matcher()]; - let exclude_files = vec![Glob::new("*.rs").unwrap().compile_matcher()]; + let include_files = vec![PathMatcher::new("*.rs").unwrap()]; + let exclude_files = vec![PathMatcher::new("*.rs").unwrap()]; let rust_only_search_results = store .update(cx, |store, cx| { store.search_project(