Promote package suggestions to a first-class concept on IndexedDocsProviders (#16177)

This PR promotes package suggestions to a first-class concept on the
`IndexedDocsProvider` trait.

This will allow any implementer of `IndexedDocsProvider` to provide a
list of package names to suggest for use with `/docs`.

For the docs.rs provider we use the 250 most popular Rust crates (as
identified [here](https://lib.rs/std)), and for the rustdoc provider we
use the packages in the Cargo workspace.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-08-13 16:01:58 -04:00 committed by GitHub
parent bd71e9192c
commit a81e355dc5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 328 additions and 93 deletions

View file

@ -72,9 +72,6 @@ impl DocsSlashCommand {
});
if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() {
// List the workspace crates once to prime the cache.
LocalRustdocProvider::list_workspace_crates().ok();
indexed_docs_registry.register_provider(Box::new(LocalRustdocProvider::new(
fs,
cargo_workspace_root,
@ -232,50 +229,26 @@ impl SlashCommand for DocsSlashCommand {
drop(store.clone().index(package.as_str().into()));
}
let items = store.search(package).await;
let suggested_packages = store.clone().suggest_packages().await?;
let search_results = store.search(package).await;
if provider == LocalRustdocProvider::id() {
let items = build_completions(provider.clone(), items);
let workspace_crates = LocalRustdocProvider::list_workspace_crates()?;
let mut all_items = items;
let workspace_crate_completions = workspace_crates
let mut items = build_completions(provider.clone(), search_results);
let workspace_crate_completions = suggested_packages
.into_iter()
.filter(|crate_name| {
!all_items
.filter(|package_name| {
!items
.iter()
.any(|item| item.label.as_str() == crate_name.as_ref())
.any(|item| item.label.as_str() == package_name.as_ref())
})
.map(|crate_name| ArgumentCompletion {
label: format!("{crate_name} (unindexed)"),
new_text: format!("{provider} {crate_name}"),
.map(|package_name| ArgumentCompletion {
label: format!("{package_name} (unindexed)"),
new_text: format!("{provider} {package_name}"),
run_command: true,
})
.collect::<Vec<_>>();
all_items.extend(workspace_crate_completions);
return Ok(all_items);
}
items.extend(workspace_crate_completions);
if items.is_empty() {
if provider == DocsDotRsProvider::id() {
return Ok(std::iter::once(ArgumentCompletion {
label: format!(
"Enter a {package_term} name or try one of these:",
package_term = package_term(&provider)
),
new_text: provider.to_string(),
run_command: false,
})
.chain(DocsDotRsProvider::AUTO_SUGGESTED_CRATES.into_iter().map(
|crate_name| ArgumentCompletion {
label: crate_name.to_string(),
new_text: format!("{provider} {crate_name}"),
run_command: true,
},
))
.collect());
}
return Ok(vec![ArgumentCompletion {
label: format!(
"Enter a {package_term} name.",
@ -286,7 +259,7 @@ impl SlashCommand for DocsSlashCommand {
}]);
}
Ok(build_completions(provider, items))
Ok(items)
}
DocsSlashCommandArgs::SearchItemDocs {
provider,

View file

@ -30,6 +30,10 @@ impl IndexedDocsProvider for ExtensionIndexedDocsProvider {
database_path
}
async fn suggest_packages(&self) -> Result<Vec<PackageName>> {
Ok(Vec::new())
}
async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
self.extension
.call({

View file

@ -44,34 +44,6 @@ impl LocalRustdocProvider {
cargo_workspace_root,
}
}
/// Returns the list of all crates in the Cargo workspace.
///
/// Includes the list of workspace crates as well as all dependency crates.
pub fn list_workspace_crates() -> Result<Vec<Arc<str>>> {
static WORKSPACE_CRATES: LazyLock<RwLock<Option<(BTreeSet<Arc<str>>, Instant)>>> =
LazyLock::new(|| RwLock::new(None));
if let Some((crates, fetched_at)) = &*WORKSPACE_CRATES.read() {
if fetched_at.elapsed() < Duration::from_secs(300) {
return Ok(crates.iter().cloned().collect());
}
}
let workspace = MetadataCommand::new()
.exec()
.context("failed to load cargo metadata")?;
let workspace_crates = workspace
.packages
.into_iter()
.map(|package| package.name.into())
.collect::<BTreeSet<_>>();
*WORKSPACE_CRATES.write() = Some((workspace_crates.clone(), Instant::now()));
Ok(workspace_crates.iter().cloned().collect())
}
}
#[async_trait]
@ -84,6 +56,32 @@ impl IndexedDocsProvider for LocalRustdocProvider {
paths::support_dir().join("docs/rust/rustdoc-db.1.mdb")
}
async fn suggest_packages(&self) -> Result<Vec<PackageName>> {
static WORKSPACE_CRATES: LazyLock<RwLock<Option<(BTreeSet<PackageName>, Instant)>>> =
LazyLock::new(|| RwLock::new(None));
if let Some((crates, fetched_at)) = &*WORKSPACE_CRATES.read() {
if fetched_at.elapsed() < Duration::from_secs(300) {
return Ok(crates.iter().cloned().collect());
}
}
let workspace = MetadataCommand::new()
.manifest_path(self.cargo_workspace_root.join("Cargo.toml"))
.exec()
.context("failed to load cargo metadata")?;
let workspace_crates = workspace
.packages
.into_iter()
.map(|package| PackageName::from(package.name.as_str()))
.collect::<BTreeSet<_>>();
*WORKSPACE_CRATES.write() = Some((workspace_crates.clone(), Instant::now()));
Ok(workspace_crates.iter().cloned().collect())
}
async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
index_rustdoc(package, database, {
move |crate_name, item| {
@ -130,26 +128,6 @@ pub struct DocsDotRsProvider {
}
impl DocsDotRsProvider {
/// The list of crates to auto-suggest for the docs.rs provider when
/// the index is empty.
///
/// List has been chosen loosely based on [this list](https://lib.rs/std) of
/// popular Rust libraries.
///
/// Keep this alphabetized.
pub const AUTO_SUGGESTED_CRATES: &'static [&'static str] = &[
"anyhow",
"axum",
"chrono",
"itertools",
"rand",
"regex",
"serde",
"strum",
"thiserror",
"tokio",
];
pub fn id() -> ProviderId {
ProviderId("docs-rs".into())
}
@ -169,6 +147,18 @@ impl IndexedDocsProvider for DocsDotRsProvider {
paths::support_dir().join("docs/rust/docs-rs-db.1.mdb")
}
async fn suggest_packages(&self) -> Result<Vec<PackageName>> {
static POPULAR_CRATES: LazyLock<Vec<PackageName>> = LazyLock::new(|| {
include_str!("./rustdoc/popular_crates.txt")
.lines()
.filter(|line| !line.starts_with('#'))
.map(|line| PackageName::from(line.trim()))
.collect()
});
Ok(POPULAR_CRATES.clone())
}
async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()> {
index_rustdoc(package, database, {
move |crate_name, item| {

View file

@ -0,0 +1,252 @@
# A list of the most popular Rust crates.
# Sourced from https://lib.rs/std.
serde
serde_json
syn
clap
thiserror
rand
log
tokio
anyhow
regex
quote
proc-macro2
base64
itertools
chrono
lazy_static
once_cell
libc
reqwest
futures
bitflags
tracing
url
bytes
toml
tempfile
uuid
indexmap
env_logger
num-traits
async-trait
sha2
hex
tracing-subscriber
http
parking_lot
cfg-if
futures-util
cc
hashbrown
rayon
hyper
getrandom
semver
strum
flate2
tokio-util
smallvec
criterion
paste
heck
rand_core
nom
rustls
nix
glob
time
byteorder
strum_macros
serde_yaml
wasm-bindgen
ahash
either
num_cpus
rand_chacha
prost
percent-encoding
pin-project-lite
tokio-stream
bincode
walkdir
bindgen
axum
windows-sys
futures-core
ring
digest
num-bigint
rustls-pemfile
serde_with
crossbeam-channel
tokio-rustls
hmac
fastrand
dirs
zeroize
socket2
pin-project
tower
derive_more
memchr
toml_edit
static_assertions
pretty_assertions
js-sys
convert_case
unicode-width
pkg-config
itoa
colored
rustc-hash
darling
mime
web-sys
image
bytemuck
which
sha1
dashmap
arrayvec
fnv
tonic
humantime
libloading
winapi
rustc_version
http-body
indoc
num
home
serde_urlencoded
http-body-util
unicode-segmentation
num-integer
webpki-roots
phf
futures-channel
indicatif
petgraph
ordered-float
strsim
zstd
console
encoding_rs
wasm-bindgen-futures
urlencoding
subtle
crc32fast
slab
rustix
predicates
spin
hyper-rustls
backtrace
rustversion
mio
scopeguard
proc-macro-error
hyper-util
ryu
prost-types
textwrap
memmap2
zip
zerocopy
generic-array
tar
pyo3
async-stream
quick-xml
memoffset
csv
crossterm
windows
num_enum
tokio-tungstenite
crossbeam-utils
async-channel
lru
aes
futures-lite
tracing-core
prettyplease
httparse
serde_bytes
tracing-log
tower-service
cargo_metadata
pest
mime_guess
tower-http
data-encoding
native-tls
prost-build
proptest
derivative
serial_test
libm
half
futures-io
bitvec
rustls-native-certs
ureq
object
anstyle
tonic-build
form_urlencoded
num-derive
pest_derive
schemars
proc-macro-crate
rstest
futures-executor
assert_cmd
termcolor
serde_repr
ctrlc
sha3
clap_complete
flume
mockall
ipnet
aho-corasick
atty
signal-hook
async-std
filetime
num-complex
opentelemetry
cmake
arc-swap
derive_builder
async-recursion
dyn-clone
bumpalo
fs_extra
git2
sysinfo
shlex
instant
approx
rmp-serde
rand_distr
rustls-pki-types
maplit
sqlx
blake3
hyper-tls
dotenvy
jsonwebtoken
openssl-sys
crossbeam
camino
winreg
config
rsa
bit-vec
chrono-tz
async-lock
bstr

View file

@ -39,6 +39,13 @@ pub trait IndexedDocsProvider {
/// Returns the path to the database for this provider.
fn database_path(&self) -> PathBuf;
/// Returns a list of packages as suggestions to be included in the search
/// results.
///
/// This can be used to provide completions for known packages (e.g., from the
/// local project or a registry) before a package has been indexed.
async fn suggest_packages(&self) -> Result<Vec<PackageName>>;
/// Indexes the package with the given name.
async fn index(&self, package: PackageName, database: Arc<IndexedDocsDatabase>) -> Result<()>;
}
@ -122,6 +129,12 @@ impl IndexedDocsStore {
.await
}
pub fn suggest_packages(self: Arc<Self>) -> Task<Result<Vec<PackageName>>> {
let this = self.clone();
self.executor
.spawn(async move { this.provider.suggest_packages().await })
}
pub fn index(
self: Arc<Self>,
package: PackageName,

View file

@ -12,6 +12,9 @@ extend-exclude = [
"crates/google_ai/src/supported_countries.rs",
"crates/open_ai/src/supported_countries.rs",
# Some crate names are flagged as typos.
"crates/indexed_docs/src/providers/rustdoc/popular_crates.txt",
# Stripe IDs are flagged as typos.
"crates/collab/src/db/tests/processed_stripe_event_tests.rs",
# Not our typos