Update on-demand input docs

This commit is contained in:
Jack Rickard 2022-09-11 01:46:49 +01:00
parent 28987ed733
commit 0403696c4e
No known key found for this signature in database
GPG key ID: 88084D7D08A72C8A
2 changed files with 69 additions and 59 deletions

View file

@ -1,51 +1,37 @@
# On-Demand (Lazy) Inputs # On-Demand (Lazy) Inputs
Salsa input queries work best if you can easily provide all of the inputs upfront. {{#include ../caveat.md}}
Salsa inputs work best if you can easily provide all of the inputs upfront.
However sometimes the set of inputs is not known beforehand. However sometimes the set of inputs is not known beforehand.
A typical example is reading files from disk. A typical example is reading files from disk.
While it is possible to eagerly scan a particular directory and create an in-memory file tree in a salsa input query, a more straight-forward approach is to read the files lazily. While it is possible to eagerly scan a particular directory and create an in-memory file tree as salsa input structs, a more straight-forward approach is to read the files lazily.
That is, when someone requests the text of a file for the first time: That is, when a query requests the text of a file for the first time:
1. Read the file from disk and cache it. 1. Read the file from disk and cache it.
2. Setup a file-system watcher for this path. 2. Setup a file-system watcher for this path.
3. Invalidate the cached file once the watcher sends a change notification. 3. Update the cached file when the watcher sends a change notification.
This is possible to achieve in salsa, by caching the inputs in your database structs and adding a method to the database trait to retrieve them out of this cache.
A complete, runnable file-watching example can be found in [the lazy-input example](https://github.com/salsa-rs/salsa/tree/master/examples-2022/lazy-input).
This is possible to achieve in salsa, using a derived query and `report_synthetic_read` and `invalidate` queries.
The setup looks roughly like this: The setup looks roughly like this:
```rust,ignore ```rust,ignore
#[salsa::query_group(VfsDatabaseStorage)] {{#include ../../../examples-2022/lazy-input/src/main.rs:db}}
trait VfsDatabase: salsa::Database + FileWatcher {
fn read(&self, path: PathBuf) -> String;
}
trait FileWatcher {
fn watch(&self, path: &Path);
fn did_change_file(&mut self, path: &Path);
}
fn read(db: &dyn VfsDatabase, path: PathBuf) -> String {
db.salsa_runtime()
.report_synthetic_read(salsa::Durability::LOW);
db.watch(&path);
std::fs::read_to_string(&path).unwrap_or_default()
}
#[salsa::database(VfsDatabaseStorage)]
struct MyDatabase { ... }
impl FileWatcher for MyDatabase {
fn watch(&self, path: &Path) { ... }
fn did_change_file(&mut self, path: &Path) {
ReadQuery.in_db_mut(self).invalidate(path);
}
}
``` ```
- We declare the query as a derived query (which is the default). - We declare a method on the `Db` trait that gives us a `File` input on-demand (it only requires a `&dyn Db` not a `&mut dyn Db`).
- In the query implementation, we don't call any other query and just directly read file from disk. - There should only be one input struct per file, so we implement that method using a cache (`DashMap` is like a `RwLock<HashMap>`).
- Because the query doesn't read any inputs, it will be assigned a `HIGH` durability by default, which we override with `report_synthetic_read`.
- The result of the query is cached, and we must call `invalidate` to clear this cache.
A complete, runnable file-watching example can be found in [this git repo](https://github.com/ChristopherBiscardi/salsa-file-watch-example/blob/f968dc8ea13a90373f91d962f173de3fe6ae24cd/main.rs) along with [a write-up](https://www.christopherbiscardi.com/on-demand-lazy-inputs-for-incremental-computation-in-salsa-with-file-watching-powered-by-notify-in-rust) that explains more about the code and what it is doing. The driving code that's doing the top-level queries is then in charge of updating the file contents when a file-change notification arrives.
It does this by updating the Salsa input in the same way that you would update any other input.
Here we implement a simple driving loop, that recompiles the code whenever a file changes.
You can use the logs to check that only the queries that could have changed are re-evaluated.
```rust,ignore
{{#include ../../../examples-2022/lazy-input/src/main.rs:main}}
```

View file

@ -10,14 +10,22 @@ use notify_debouncer_mini::{
}; };
use salsa::DebugWithDb; use salsa::DebugWithDb;
// ANCHOR: main
fn main() -> Result<()> { fn main() -> Result<()> {
// Create the channel to receive file change events.
let (tx, rx) = unbounded(); let (tx, rx) = unbounded();
let mut db = Database::new(tx); let mut db = Database::new(tx);
let initial_file_path = std::env::args_os() let initial_file_path = std::env::args_os()
.nth(1) .nth(1)
.ok_or_else(|| eyre!("Usage: ./lazy-input <input-file>"))?; .ok_or_else(|| eyre!("Usage: ./lazy-input <input-file>"))?;
// Create the initial input using the input method so that changes to it
// will be watched like the other files.
let initial = db.input(initial_file_path.into())?; let initial = db.input(initial_file_path.into())?;
loop { loop {
// Compile the code starting at the provided input, this will read other
// needed files using the on-demand mechanism.
let sum = compile(&db, initial); let sum = compile(&db, initial);
let diagnostics = compile::accumulated::<Diagnostic>(&db, initial); let diagnostics = compile::accumulated::<Diagnostic>(&db, initial);
if diagnostics.is_empty() { if diagnostics.is_empty() {
@ -32,21 +40,38 @@ fn main() -> Result<()> {
eprintln!("{}", log); eprintln!("{}", log);
} }
for event in rx.recv().unwrap().unwrap() { // Wait for file change events, the output can't change unless the
let path = event.path.canonicalize().unwrap(); // inputs change.
for event in rx.recv()?.unwrap() {
let path = event.path.canonicalize().wrap_err_with(|| {
format!("Failed to canonicalize path {}", event.path.display())
})?;
let file = match db.files.get(&path) { let file = match db.files.get(&path) {
Some(file) => *file, Some(file) => *file,
None => continue, None => continue,
}; };
file.set_contents(&mut db) // `path` has changed, so read it and update the contents to match.
.to(std::fs::read_to_string(path).unwrap()); // This creates a new revision and causes the incremental algorithm
// to kick in, just like any other update to a salsa input.
let contents = std::fs::read_to_string(path)
.wrap_err_with(|| format!("Failed to read file {}", event.path.display()))?;
file.set_contents(&mut db).to(contents);
} }
} }
} }
// ANCHOR_END: main
#[salsa::jar(db = Db)] #[salsa::jar(db = Db)]
struct Jar(Diagnostic, File, ParsedFile, compile, parse, sum); struct Jar(Diagnostic, File, ParsedFile, compile, parse, sum);
// ANCHOR: db
#[salsa::input]
struct File {
path: PathBuf,
#[return_ref]
contents: String,
}
trait Db: salsa::DbWithJar<Jar> { trait Db: salsa::DbWithJar<Jar> {
fn input(&self, path: PathBuf) -> Result<File>; fn input(&self, path: PathBuf) -> Result<File>;
} }
@ -71,26 +96,19 @@ impl Database {
} }
} }
impl salsa::Database for Database {
fn salsa_event(&self, event: salsa::Event) {
// don't log boring events
if let salsa::EventKind::WillExecute { .. } = event.kind {
self.logs
.lock()
.unwrap()
.push(format!("{:?}", event.debug(self)));
}
}
}
impl Db for Database { impl Db for Database {
fn input(&self, path: PathBuf) -> Result<File> { fn input(&self, path: PathBuf) -> Result<File> {
let path = path let path = path
.canonicalize() .canonicalize()
.wrap_err_with(|| format!("Failed to read {}", path.display()))?; .wrap_err_with(|| format!("Failed to read {}", path.display()))?;
Ok(match self.files.entry(path.clone()) { Ok(match self.files.entry(path.clone()) {
// If the file already exists in our cache then just return it.
Entry::Occupied(entry) => *entry.get(), Entry::Occupied(entry) => *entry.get(),
// If we haven't read this file yet set up the watch, read the
// contents, store it in the cache, and return it.
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
// Set up the watch before reading the contents to try to avoid
// race conditions.
let watcher = &mut *self.file_watcher.lock().unwrap(); let watcher = &mut *self.file_watcher.lock().unwrap();
watcher watcher
.watcher() .watcher()
@ -103,6 +121,19 @@ impl Db for Database {
}) })
} }
} }
// ANCHOR_END: db
impl salsa::Database for Database {
fn salsa_event(&self, event: salsa::Event) {
// don't log boring events
if let salsa::EventKind::WillExecute { .. } = event.kind {
self.logs
.lock()
.unwrap()
.push(format!("{:?}", event.debug(self)));
}
}
}
#[salsa::accumulator] #[salsa::accumulator]
struct Diagnostic(String); struct Diagnostic(String);
@ -123,13 +154,6 @@ impl Diagnostic {
} }
} }
#[salsa::input]
struct File {
path: PathBuf,
#[return_ref]
contents: String,
}
#[salsa::tracked] #[salsa::tracked]
struct ParsedFile { struct ParsedFile {
value: u32, value: u32,