Start work on genericizing /rustdoc (#13745)

This PR begins the process of making the backing infrastructure for the
`/rustdoc` command more generic such that it can be applied to
additional documentation providers.

In this PR we:

- Rename the `rustdoc` crate to `indexed_docs` as a more general-purpose
name
- Start moving rustdoc-specific functionality into
`indexed_docs::providers::rustdoc`
- Add an `IndexedDocsRegistry` to hold multiple `IndexedDocsStore`s (one
per provider)

We haven't yet removed the rustdoc-specific bits in the `DocsIndexer`.
That will follow soon.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-07-02 13:14:56 -04:00 committed by GitHub
parent eab98eb9c9
commit 7460381285
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 425 additions and 355 deletions

52
Cargo.lock generated
View file

@ -391,6 +391,7 @@ dependencies = [
"heed",
"html_to_markdown 0.1.0",
"http 0.1.0",
"indexed_docs",
"indoc",
"language",
"log",
@ -406,7 +407,6 @@ dependencies = [
"rand 0.8.5",
"regex",
"rope",
"rustdoc",
"schemars",
"search",
"semantic_index",
@ -5493,6 +5493,31 @@ version = "1.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44feda355f4159a7c757171a77de25daf6411e217b4cabd03bd6650690468126"
[[package]]
name = "indexed_docs"
version = "0.1.0"
dependencies = [
"anyhow",
"async-trait",
"collections",
"derive_more",
"fs",
"futures 0.3.28",
"fuzzy",
"gpui",
"heed",
"html_to_markdown 0.1.0",
"http 0.1.0",
"indexmap 1.9.3",
"indoc",
"parking_lot",
"paths",
"pretty_assertions",
"serde",
"strum",
"util",
]
[[package]]
name = "indexmap"
version = "1.9.3"
@ -9019,31 +9044,6 @@ dependencies = [
"semver",
]
[[package]]
name = "rustdoc"
version = "0.1.0"
dependencies = [
"anyhow",
"async-trait",
"collections",
"derive_more",
"fs",
"futures 0.3.28",
"fuzzy",
"gpui",
"heed",
"html_to_markdown 0.1.0",
"http 0.1.0",
"indexmap 1.9.3",
"indoc",
"parking_lot",
"paths",
"pretty_assertions",
"serde",
"strum",
"util",
]
[[package]]
name = "rustix"
version = "0.37.23"

View file

@ -45,6 +45,7 @@ members = [
"crates/html_to_markdown",
"crates/http",
"crates/image_viewer",
"crates/indexed_docs",
"crates/inline_completion_button",
"crates/install_cli",
"crates/journal",
@ -82,7 +83,6 @@ members = [
"crates/rich_text",
"crates/rope",
"crates/rpc",
"crates/rustdoc",
"crates/search",
"crates/semantic_index",
"crates/semantic_version",
@ -198,6 +198,7 @@ headless = { path = "crates/headless" }
html_to_markdown = { path = "crates/html_to_markdown" }
http = { path = "crates/http" }
image_viewer = { path = "crates/image_viewer" }
indexed_docs = { path = "crates/indexed_docs" }
inline_completion_button = { path = "crates/inline_completion_button" }
install_cli = { path = "crates/install_cli" }
journal = { path = "crates/journal" }
@ -235,7 +236,6 @@ repl = { path = "crates/repl" }
rich_text = { path = "crates/rich_text" }
rope = { path = "crates/rope" }
rpc = { path = "crates/rpc" }
rustdoc = { path = "crates/rustdoc" }
search = { path = "crates/search" }
semantic_index = { path = "crates/semantic_index" }
semantic_version = { path = "crates/semantic_version" }

View file

@ -13,8 +13,8 @@ path = "src/assistant.rs"
doctest = false
[dependencies]
anyhow.workspace = true
anthropic = { workspace = true, features = ["schemars"] }
anyhow.workspace = true
assistant_slash_command.workspace = true
async-watch.workspace = true
cargo_toml.workspace = true
@ -32,6 +32,7 @@ gpui.workspace = true
heed.workspace = true
html_to_markdown.workspace = true
http.workspace = true
indexed_docs.workspace = true
indoc.workspace = true
language.workspace = true
log.workspace = true
@ -45,7 +46,6 @@ paths.workspace = true
project.workspace = true
regex.workspace = true
rope.workspace = true
rustdoc.workspace = true
schemars.workspace = true
search.workspace = true
semantic_index.workspace = true

View file

@ -20,9 +20,9 @@ pub(crate) use completion_provider::*;
pub(crate) use context_store::*;
use fs::Fs;
use gpui::{actions, AppContext, Global, SharedString, UpdateGlobal};
use indexed_docs::{IndexedDocsRegistry, Provider};
pub(crate) use inline_assistant::*;
pub(crate) use model_selector::*;
use rustdoc::RustdocStore;
use semantic_index::{CloudEmbeddingProvider, SemanticIndex};
use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore};
@ -292,7 +292,8 @@ pub fn init(fs: Arc<dyn Fs>, client: Arc<Client>, cx: &mut AppContext) {
assistant_panel::init(cx);
inline_assistant::init(fs.clone(), client.telemetry().clone(), cx);
terminal_inline_assistant::init(fs.clone(), client.telemetry().clone(), cx);
RustdocStore::init_global(cx);
IndexedDocsRegistry::init_global(cx);
register_indexed_docs_providers(cx);
CommandPaletteFilter::update_global(cx, |filter, _cx| {
filter.hide_namespace(Assistant::NAMESPACE);
@ -327,6 +328,12 @@ fn register_slash_commands(cx: &mut AppContext) {
slash_command_registry.register_command(fetch_command::FetchSlashCommand, false);
}
fn register_indexed_docs_providers(cx: &mut AppContext) {
let indexed_docs_registry = IndexedDocsRegistry::global(cx);
indexed_docs_registry.register_provider(Provider::rustdoc());
}
pub fn humanize_token_count(count: usize) -> String {
match count {
0..=999 => count.to_string(),

View file

@ -39,6 +39,7 @@ use gpui::{
Subscription, Task, Transformation, UpdateGlobal, View, ViewContext, VisualContext, WeakView,
WindowContext,
};
use indexed_docs::{IndexedDocsStore, PackageName, ProviderId};
use language::{
language_settings::SoftWrap, AnchorRangeExt as _, AutoindentMode, Buffer, LanguageRegistry,
LspAdapterDelegate, OffsetRangeExt as _, Point, ToOffset as _,
@ -47,7 +48,6 @@ use multi_buffer::MultiBufferRow;
use paths::contexts_dir;
use picker::{Picker, PickerDelegate};
use project::{Project, ProjectLspAdapterDelegate, ProjectTransaction};
use rustdoc::{CrateName, RustdocStore};
use search::{buffer_search::DivRegistrar, BufferSearchBar};
use settings::Settings;
use std::{
@ -3410,7 +3410,9 @@ fn render_rustdoc_slash_command_trailer(
command: PendingSlashCommand,
cx: &mut WindowContext,
) -> AnyElement {
let rustdoc_store = RustdocStore::global(cx);
let Some(rustdoc_store) = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx).ok() else {
return Empty.into_any();
};
let Some((crate_name, _)) = command
.argument
@ -3420,7 +3422,7 @@ fn render_rustdoc_slash_command_trailer(
return Empty.into_any();
};
let crate_name = CrateName::from(crate_name);
let crate_name = PackageName::from(crate_name);
if !rustdoc_store.is_indexing(&crate_name) {
return Empty.into_any();
}

View file

@ -8,9 +8,12 @@ use fs::Fs;
use futures::AsyncReadExt;
use gpui::{AppContext, Model, Task, WeakView};
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use indexed_docs::{
convert_rustdoc_to_markdown, IndexedDocsStore, LocalProvider, PackageName, ProviderId,
RustdocSource,
};
use language::LspAdapterDelegate;
use project::{Project, ProjectPath};
use rustdoc::{convert_rustdoc_to_markdown, CrateName, LocalProvider, RustdocSource, RustdocStore};
use ui::prelude::*;
use util::{maybe, ResultExt};
use workspace::Workspace;
@ -21,7 +24,7 @@ impl RustdocSlashCommand {
async fn build_message(
fs: Arc<dyn Fs>,
http_client: Arc<HttpClientWithUrl>,
crate_name: CrateName,
crate_name: PackageName,
module_path: Vec<String>,
path_to_cargo_toml: Option<&Path>,
) -> Result<(RustdocSource, String)> {
@ -127,8 +130,10 @@ impl SlashCommand for RustdocSlashCommand {
anyhow::Ok((fs, cargo_workspace_root))
});
let store = RustdocStore::global(cx);
let store = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx);
cx.background_executor().spawn(async move {
let store = store?;
if let Some((crate_name, rest)) = query.split_once(':') {
if rest.is_empty() {
if let Some((fs, cargo_workspace_root)) = index_provider_deps.log_err() {
@ -169,16 +174,17 @@ impl SlashCommand for RustdocSlashCommand {
.next()
.ok_or_else(|| anyhow!("missing crate name"))
{
Ok(crate_name) => CrateName::from(crate_name),
Ok(crate_name) => PackageName::from(crate_name),
Err(err) => return Task::ready(Err(err)),
};
let item_path = path_components.map(ToString::to_string).collect::<Vec<_>>();
let text = cx.background_executor().spawn({
let rustdoc_store = RustdocStore::global(cx);
let rustdoc_store = IndexedDocsStore::try_global(ProviderId::rustdoc(), cx);
let crate_name = crate_name.clone();
let item_path = item_path.clone();
async move {
let rustdoc_store = rustdoc_store?;
let item_docs = rustdoc_store
.load(
crate_name.clone(),
@ -191,7 +197,7 @@ impl SlashCommand for RustdocSlashCommand {
.await;
if let Ok(item_docs) = item_docs {
anyhow::Ok((RustdocSource::Index, item_docs.docs().to_owned()))
anyhow::Ok((RustdocSource::Index, item_docs.to_string()))
} else {
Self::build_message(
fs,

View file

@ -1,5 +1,5 @@
[package]
name = "rustdoc"
name = "indexed_docs"
version = "0.1.0"
edition = "2021"
publish = false
@ -9,7 +9,7 @@ license = "GPL-3.0-or-later"
workspace = true
[lib]
path = "src/rustdoc.rs"
path = "src/indexed_docs.rs"
[dependencies]
anyhow.workspace = true

View file

@ -0,0 +1,8 @@
mod indexer;
mod providers;
mod registry;
mod store;
pub use crate::providers::rustdoc::*;
pub use crate::registry::*;
pub use crate::store::*;

View file

@ -0,0 +1,122 @@
use std::sync::Arc;
use anyhow::{Context, Result};
use async_trait::async_trait;
use collections::{HashSet, VecDeque};
use crate::{
convert_rustdoc_to_markdown, IndexedDocsDatabase, PackageName, RustdocItem, RustdocItemKind,
};
#[async_trait]
pub trait IndexedDocsProvider {
async fn fetch_page(
&self,
package: &PackageName,
item: Option<&RustdocItem>,
) -> Result<Option<String>>;
}
#[derive(Debug)]
struct RustdocItemWithHistory {
pub item: RustdocItem,
#[cfg(debug_assertions)]
pub history: Vec<String>,
}
pub(crate) struct DocsIndexer {
database: Arc<IndexedDocsDatabase>,
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
}
impl DocsIndexer {
pub fn new(
database: Arc<IndexedDocsDatabase>,
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
) -> Self {
Self { database, provider }
}
/// Indexes the package with the given name.
pub async fn index(&self, package: PackageName) -> Result<()> {
let Some(package_root_content) = self.provider.fetch_page(&package, None).await? else {
return Ok(());
};
let (crate_root_markdown, items) =
convert_rustdoc_to_markdown(package_root_content.as_bytes())?;
self.database
.insert(package.clone(), None, crate_root_markdown)
.await?;
let mut seen_items = HashSet::from_iter(items.clone());
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
item,
#[cfg(debug_assertions)]
history: Vec::new(),
}));
while let Some(item_with_history) = items_to_visit.pop_front() {
let item = &item_with_history.item;
let Some(result) = self
.provider
.fetch_page(&package, Some(&item))
.await
.with_context(|| {
#[cfg(debug_assertions)]
{
format!(
"failed to fetch {item:?}: {history:?}",
history = item_with_history.history
)
}
#[cfg(not(debug_assertions))]
{
format!("failed to fetch {item:?}")
}
})?
else {
continue;
};
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
self.database
.insert(package.clone(), Some(item), markdown)
.await?;
let parent_item = item;
for mut item in referenced_items {
if seen_items.contains(&item) {
continue;
}
seen_items.insert(item.clone());
item.path.extend(parent_item.path.clone());
match parent_item.kind {
RustdocItemKind::Mod => {
item.path.push(parent_item.name.clone());
}
_ => {}
}
items_to_visit.push_back(RustdocItemWithHistory {
#[cfg(debug_assertions)]
history: {
let mut history = item_with_history.history.clone();
history.push(item.url_path());
history
},
item,
});
}
}
Ok(())
}
}

View file

@ -0,0 +1 @@
pub mod rustdoc;

View file

@ -0,0 +1,117 @@
mod item;
mod to_markdown;
pub use item::*;
pub use to_markdown::convert_rustdoc_to_markdown;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::{bail, Context, Result};
use async_trait::async_trait;
use fs::Fs;
use futures::AsyncReadExt;
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use crate::indexer::IndexedDocsProvider;
use crate::PackageName;
#[derive(Debug, Clone, Copy)]
pub enum RustdocSource {
/// The docs were sourced from Zed's rustdoc index.
Index,
/// The docs were sourced from local `cargo doc` output.
Local,
/// The docs were sourced from `docs.rs`.
DocsDotRs,
}
pub struct LocalProvider {
fs: Arc<dyn Fs>,
cargo_workspace_root: PathBuf,
}
impl LocalProvider {
pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
Self {
fs,
cargo_workspace_root,
}
}
}
#[async_trait]
impl IndexedDocsProvider for LocalProvider {
async fn fetch_page(
&self,
crate_name: &PackageName,
item: Option<&RustdocItem>,
) -> Result<Option<String>> {
let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
local_cargo_doc_path.push(crate_name.as_ref());
if let Some(item) = item {
local_cargo_doc_path.push(item.url_path());
} else {
local_cargo_doc_path.push("index.html");
}
let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
return Ok(None);
};
Ok(Some(contents))
}
}
pub struct DocsDotRsProvider {
http_client: Arc<HttpClientWithUrl>,
}
impl DocsDotRsProvider {
pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
Self { http_client }
}
}
#[async_trait]
impl IndexedDocsProvider for DocsDotRsProvider {
async fn fetch_page(
&self,
crate_name: &PackageName,
item: Option<&RustdocItem>,
) -> Result<Option<String>> {
let version = "latest";
let path = format!(
"{crate_name}/{version}/{crate_name}{item_path}",
item_path = item
.map(|item| format!("/{}", item.url_path()))
.unwrap_or_default()
);
let mut response = self
.http_client
.get(
&format!("https://docs.rs/{path}"),
AsyncBody::default(),
true,
)
.await?;
let mut body = Vec::new();
response
.body_mut()
.read_to_end(&mut body)
.await
.context("error reading docs.rs response body")?;
if response.status().is_client_error() {
let text = String::from_utf8_lossy(body.as_slice());
bail!(
"status error {}, response: {text:?}",
response.status().as_u16()
);
}
Ok(Some(String::from_utf8(body)?))
}
}

View file

@ -0,0 +1,47 @@
use std::sync::Arc;
use collections::HashMap;
use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, UpdateGlobal};
use parking_lot::RwLock;
use crate::{IndexedDocsStore, Provider, ProviderId};
struct GlobalIndexedDocsRegistry(Arc<IndexedDocsRegistry>);
impl Global for GlobalIndexedDocsRegistry {}
pub struct IndexedDocsRegistry {
executor: BackgroundExecutor,
stores_by_provider: RwLock<HashMap<ProviderId, Arc<IndexedDocsStore>>>,
}
impl IndexedDocsRegistry {
pub fn global(cx: &AppContext) -> Arc<Self> {
GlobalIndexedDocsRegistry::global(cx).0.clone()
}
pub fn init_global(cx: &mut AppContext) {
GlobalIndexedDocsRegistry::set_global(
cx,
GlobalIndexedDocsRegistry(Arc::new(Self::new(cx.background_executor().clone()))),
);
}
pub fn new(executor: BackgroundExecutor) -> Self {
Self {
executor,
stores_by_provider: RwLock::new(HashMap::default()),
}
}
pub fn register_provider(&self, provider: Provider) {
self.stores_by_provider.write().insert(
provider.id.clone(),
Arc::new(IndexedDocsStore::new(provider, self.executor.clone())),
);
}
pub fn get_provider_store(&self, provider_id: ProviderId) -> Option<Arc<IndexedDocsStore>> {
self.stores_by_provider.read().get(&provider_id).cloned()
}
}

View file

@ -8,59 +8,71 @@ use derive_more::{Deref, Display};
use futures::future::{self, BoxFuture, Shared};
use futures::FutureExt;
use fuzzy::StringMatchCandidate;
use gpui::{AppContext, BackgroundExecutor, Global, ReadGlobal, Task, UpdateGlobal};
use gpui::{AppContext, BackgroundExecutor, Task};
use heed::types::SerdeBincode;
use heed::Database;
use parking_lot::RwLock;
use serde::{Deserialize, Serialize};
use util::ResultExt;
use crate::indexer::{RustdocIndexer, RustdocProvider};
use crate::{RustdocItem, RustdocItemKind};
use crate::indexer::{DocsIndexer, IndexedDocsProvider};
use crate::{IndexedDocsRegistry, RustdocItem};
/// The name of a crate.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
pub struct CrateName(Arc<str>);
pub struct ProviderId(Arc<str>);
impl From<&str> for CrateName {
impl ProviderId {
pub fn rustdoc() -> Self {
Self("rustdoc".into())
}
}
pub struct Provider {
pub id: ProviderId,
pub database_path: PathBuf,
}
impl Provider {
pub fn rustdoc() -> Self {
Self {
id: ProviderId("rustdoc".into()),
database_path: paths::support_dir().join("docs/rust/rustdoc-db.1.mdb"),
}
}
}
/// The name of a package.
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Deref, Display)]
pub struct PackageName(Arc<str>);
impl From<&str> for PackageName {
fn from(value: &str) -> Self {
Self(value.into())
}
}
struct GlobalRustdocStore(Arc<RustdocStore>);
impl Global for GlobalRustdocStore {}
pub struct RustdocStore {
/// A store for indexed docs.
pub struct IndexedDocsStore {
executor: BackgroundExecutor,
database_future: Shared<BoxFuture<'static, Result<Arc<RustdocDatabase>, Arc<anyhow::Error>>>>,
indexing_tasks_by_crate:
RwLock<HashMap<CrateName, Shared<Task<Result<(), Arc<anyhow::Error>>>>>>,
database_future:
Shared<BoxFuture<'static, Result<Arc<IndexedDocsDatabase>, Arc<anyhow::Error>>>>,
indexing_tasks_by_package:
RwLock<HashMap<PackageName, Shared<Task<Result<(), Arc<anyhow::Error>>>>>>,
}
impl RustdocStore {
pub fn global(cx: &AppContext) -> Arc<Self> {
GlobalRustdocStore::global(cx).0.clone()
impl IndexedDocsStore {
pub fn try_global(provider: ProviderId, cx: &AppContext) -> Result<Arc<Self>> {
let registry = IndexedDocsRegistry::global(cx);
registry
.get_provider_store(provider.clone())
.ok_or_else(|| anyhow!("no indexed docs store found for {provider}"))
}
pub fn init_global(cx: &mut AppContext) {
GlobalRustdocStore::set_global(
cx,
GlobalRustdocStore(Arc::new(Self::new(cx.background_executor().clone()))),
);
}
pub fn new(executor: BackgroundExecutor) -> Self {
pub fn new(provider: Provider, executor: BackgroundExecutor) -> Self {
let database_future = executor
.spawn({
let executor = executor.clone();
async move {
RustdocDatabase::new(
paths::support_dir().join("docs/rust/rustdoc-db.0.mdb"),
executor,
)
}
async move { IndexedDocsDatabase::new(provider.database_path, executor) }
})
.then(|result| future::ready(result.map(Arc::new).map_err(Arc::new)))
.boxed()
@ -69,34 +81,34 @@ impl RustdocStore {
Self {
executor,
database_future,
indexing_tasks_by_crate: RwLock::new(HashMap::default()),
indexing_tasks_by_package: RwLock::new(HashMap::default()),
}
}
/// Returns whether the crate with the given name is currently being indexed.
pub fn is_indexing(&self, crate_name: &CrateName) -> bool {
self.indexing_tasks_by_crate.read().contains_key(crate_name)
/// Returns whether the package with the given name is currently being indexed.
pub fn is_indexing(&self, package: &PackageName) -> bool {
self.indexing_tasks_by_package.read().contains_key(package)
}
pub async fn load(
&self,
crate_name: CrateName,
package: PackageName,
item_path: Option<String>,
) -> Result<RustdocDatabaseEntry> {
) -> Result<MarkdownDocs> {
self.database_future
.clone()
.await
.map_err(|err| anyhow!(err))?
.load(crate_name, item_path)
.load(package, item_path)
.await
}
pub fn index(
self: Arc<Self>,
crate_name: CrateName,
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
package: PackageName,
provider: Box<dyn IndexedDocsProvider + Send + Sync + 'static>,
) -> Shared<Task<Result<(), Arc<anyhow::Error>>>> {
if let Some(existing_task) = self.indexing_tasks_by_crate.read().get(&crate_name) {
if let Some(existing_task) = self.indexing_tasks_by_package.read().get(&package) {
return existing_task.clone();
}
@ -104,13 +116,13 @@ impl RustdocStore {
.executor
.spawn({
let this = self.clone();
let crate_name = crate_name.clone();
let package = package.clone();
async move {
let _finally = util::defer({
let this = this.clone();
let crate_name = crate_name.clone();
let package = package.clone();
move || {
this.indexing_tasks_by_crate.write().remove(&crate_name);
this.indexing_tasks_by_package.write().remove(&package);
}
});
@ -120,9 +132,9 @@ impl RustdocStore {
.clone()
.await
.map_err(|err| anyhow!(err))?;
let indexer = RustdocIndexer::new(database, provider);
let indexer = DocsIndexer::new(database, provider);
indexer.index(crate_name.clone()).await
indexer.index(package.clone()).await
};
index_task.await.map_err(Arc::new)
@ -130,9 +142,9 @@ impl RustdocStore {
})
.shared();
self.indexing_tasks_by_crate
self.indexing_tasks_by_package
.write()
.insert(crate_name, indexing_task.clone());
.insert(package, indexing_task.clone());
indexing_task
}
@ -177,27 +189,16 @@ impl RustdocStore {
}
}
#[derive(Serialize, Deserialize)]
pub enum RustdocDatabaseEntry {
Crate { docs: String },
Item { kind: RustdocItemKind, docs: String },
}
#[derive(Debug, PartialEq, Eq, Clone, Display, Serialize, Deserialize)]
pub struct MarkdownDocs(pub String);
impl RustdocDatabaseEntry {
pub fn docs(&self) -> &str {
match self {
Self::Crate { docs } | Self::Item { docs, .. } => &docs,
}
}
}
pub(crate) struct RustdocDatabase {
pub(crate) struct IndexedDocsDatabase {
executor: BackgroundExecutor,
env: heed::Env,
entries: Database<SerdeBincode<String>, SerdeBincode<RustdocDatabaseEntry>>,
entries: Database<SerdeBincode<String>, SerdeBincode<MarkdownDocs>>,
}
impl RustdocDatabase {
impl IndexedDocsDatabase {
pub fn new(path: PathBuf, executor: BackgroundExecutor) -> Result<Self> {
std::fs::create_dir_all(&path)?;
@ -238,15 +239,15 @@ impl RustdocDatabase {
pub fn load(
&self,
crate_name: CrateName,
package: PackageName,
item_path: Option<String>,
) -> Task<Result<RustdocDatabaseEntry>> {
) -> Task<Result<MarkdownDocs>> {
let env = self.env.clone();
let entries = self.entries;
let item_path = if let Some(item_path) = item_path {
format!("{crate_name}::{item_path}")
format!("{package}::{item_path}")
} else {
crate_name.to_string()
package.to_string()
};
self.executor.spawn(async move {
@ -259,22 +260,16 @@ impl RustdocDatabase {
pub fn insert(
&self,
crate_name: CrateName,
package: PackageName,
item: Option<&RustdocItem>,
docs: String,
) -> Task<Result<()>> {
let env = self.env.clone();
let entries = self.entries;
let (item_path, entry) = if let Some(item) = item {
(
format!("{crate_name}::{}", item.display()),
RustdocDatabaseEntry::Item {
kind: item.kind,
docs,
},
)
(format!("{package}::{}", item.display()), MarkdownDocs(docs))
} else {
(crate_name.to_string(), RustdocDatabaseEntry::Crate { docs })
(package.to_string(), MarkdownDocs(docs))
};
self.executor.spawn(async move {

View file

@ -1,226 +0,0 @@
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::{bail, Context, Result};
use async_trait::async_trait;
use collections::{HashSet, VecDeque};
use fs::Fs;
use futures::AsyncReadExt;
use http::{AsyncBody, HttpClient, HttpClientWithUrl};
use crate::{
convert_rustdoc_to_markdown, CrateName, RustdocDatabase, RustdocItem, RustdocItemKind,
};
#[derive(Debug, Clone, Copy)]
pub enum RustdocSource {
/// The docs were sourced from Zed's rustdoc index.
Index,
/// The docs were sourced from local `cargo doc` output.
Local,
/// The docs were sourced from `docs.rs`.
DocsDotRs,
}
#[async_trait]
pub trait RustdocProvider {
async fn fetch_page(
&self,
crate_name: &CrateName,
item: Option<&RustdocItem>,
) -> Result<Option<String>>;
}
pub struct LocalProvider {
fs: Arc<dyn Fs>,
cargo_workspace_root: PathBuf,
}
impl LocalProvider {
pub fn new(fs: Arc<dyn Fs>, cargo_workspace_root: PathBuf) -> Self {
Self {
fs,
cargo_workspace_root,
}
}
}
#[async_trait]
impl RustdocProvider for LocalProvider {
async fn fetch_page(
&self,
crate_name: &CrateName,
item: Option<&RustdocItem>,
) -> Result<Option<String>> {
let mut local_cargo_doc_path = self.cargo_workspace_root.join("target/doc");
local_cargo_doc_path.push(crate_name.as_ref());
if let Some(item) = item {
local_cargo_doc_path.push(item.url_path());
} else {
local_cargo_doc_path.push("index.html");
}
let Ok(contents) = self.fs.load(&local_cargo_doc_path).await else {
return Ok(None);
};
Ok(Some(contents))
}
}
pub struct DocsDotRsProvider {
http_client: Arc<HttpClientWithUrl>,
}
impl DocsDotRsProvider {
pub fn new(http_client: Arc<HttpClientWithUrl>) -> Self {
Self { http_client }
}
}
#[async_trait]
impl RustdocProvider for DocsDotRsProvider {
async fn fetch_page(
&self,
crate_name: &CrateName,
item: Option<&RustdocItem>,
) -> Result<Option<String>> {
let version = "latest";
let path = format!(
"{crate_name}/{version}/{crate_name}{item_path}",
item_path = item
.map(|item| format!("/{}", item.url_path()))
.unwrap_or_default()
);
let mut response = self
.http_client
.get(
&format!("https://docs.rs/{path}"),
AsyncBody::default(),
true,
)
.await?;
let mut body = Vec::new();
response
.body_mut()
.read_to_end(&mut body)
.await
.context("error reading docs.rs response body")?;
if response.status().is_client_error() {
let text = String::from_utf8_lossy(body.as_slice());
bail!(
"status error {}, response: {text:?}",
response.status().as_u16()
);
}
Ok(Some(String::from_utf8(body)?))
}
}
#[derive(Debug)]
struct RustdocItemWithHistory {
pub item: RustdocItem,
#[cfg(debug_assertions)]
pub history: Vec<String>,
}
pub(crate) struct RustdocIndexer {
database: Arc<RustdocDatabase>,
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
}
impl RustdocIndexer {
pub fn new(
database: Arc<RustdocDatabase>,
provider: Box<dyn RustdocProvider + Send + Sync + 'static>,
) -> Self {
Self { database, provider }
}
/// Indexes the crate with the given name.
pub async fn index(&self, crate_name: CrateName) -> Result<()> {
let Some(crate_root_content) = self.provider.fetch_page(&crate_name, None).await? else {
return Ok(());
};
let (crate_root_markdown, items) =
convert_rustdoc_to_markdown(crate_root_content.as_bytes())?;
self.database
.insert(crate_name.clone(), None, crate_root_markdown)
.await?;
let mut seen_items = HashSet::from_iter(items.clone());
let mut items_to_visit: VecDeque<RustdocItemWithHistory> =
VecDeque::from_iter(items.into_iter().map(|item| RustdocItemWithHistory {
item,
#[cfg(debug_assertions)]
history: Vec::new(),
}));
while let Some(item_with_history) = items_to_visit.pop_front() {
let item = &item_with_history.item;
let Some(result) = self
.provider
.fetch_page(&crate_name, Some(&item))
.await
.with_context(|| {
#[cfg(debug_assertions)]
{
format!(
"failed to fetch {item:?}: {history:?}",
history = item_with_history.history
)
}
#[cfg(not(debug_assertions))]
{
format!("failed to fetch {item:?}")
}
})?
else {
continue;
};
let (markdown, referenced_items) = convert_rustdoc_to_markdown(result.as_bytes())?;
self.database
.insert(crate_name.clone(), Some(item), markdown)
.await?;
let parent_item = item;
for mut item in referenced_items {
if seen_items.contains(&item) {
continue;
}
seen_items.insert(item.clone());
item.path.extend(parent_item.path.clone());
match parent_item.kind {
RustdocItemKind::Mod => {
item.path.push(parent_item.name.clone());
}
_ => {}
}
items_to_visit.push_back(RustdocItemWithHistory {
#[cfg(debug_assertions)]
history: {
let mut history = item_with_history.history.clone();
history.push(item.url_path());
history
},
item,
});
}
}
Ok(())
}
}

View file

@ -1,9 +0,0 @@
mod indexer;
mod item;
mod store;
mod to_markdown;
pub use crate::indexer::{DocsDotRsProvider, LocalProvider, RustdocSource};
pub use crate::item::*;
pub use crate::store::*;
pub use crate::to_markdown::convert_rustdoc_to_markdown;